Thanks to the answer of @GZ0, the performance of this code snippet is now around 0.0344s on a GPU and around 0.2511s on a CPU. The implementation of @GZ0's algorithm is attached. Please do not hesitate to suggest any modifications to make the code snippet more pythonic :)
import timeit
import torch
import argparse
import numpy as np
USE_CUDA = torch.cuda.is_available()
def find_optimal_threshold(pred_mask, groundtruth_masks):
n_patch = groundtruth_masks.shape[0]
groundtruth_masks_tensor = torch.from_numpy(groundtruth_masks)
pred_mask_tensor = torch.from_numpy(pred_mask)
if USE_CUDA:
groundtruth_masks_tensor = groundtruth_masks_tensor.cuda()
pred_mask_tensor = pred_mask_tensor.cuda()
vector_pred = pred_mask_tensor.view(n_patch, -1)
vector_gt = groundtruth_masks_tensor.view(n_patch, -1)
vector_pred, sort_pred_idx = torch.sort(vector_pred, descending=True)
vector_gt = vector_gt[torch.arange(vector_gt.shape[0])[
:, None], sort_pred_idx]
gt_cumsum = torch.cumsum(vector_gt, dim=1)
gt_total = gt_cumsum[:, -1].reshape(n_patch, 1)
predicted = torch.arange(start=1, end=vector_pred.shape[1] + 1)
if USE_CUDA:
predicted = predicted.cuda()
gt_cumsum = gt_cumsum.type(torch.float)
gt_total = gt_total.type(torch.float)
predicted = predicted.type(torch.float)
jaccard_idx = gt_cumsum / (gt_total + predicted - gt_cumsum)
max_jaccard_idx, max_indices = torch.max(jaccard_idx, dim=1)
max_indices = max_indices.reshape(-1, 1)
best_threshold = vector_pred[torch.arange(vector_pred.shape[0])[
:, None], max_indices]
best_threshold = best_threshold.reshape(-1)
return best_threshold
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--pred_mask_path', type=str,
required=False, default='./pred_mask.npy')
parser.add_argument('--groundtruth_mask_path', type=str, required=False,
default='./masks.npy')
parser.add_argument('--run_times', type=int, required=False,
default=10000)
args = parser.parse_args()
groundtruth_masks = np.load(args.groundtruth_mask_path)
pred_mask = np.load(args.pred_mask_path)
t = timeit.timeit(lambda: find_optimal_threshold(pred_mask, groundtruth_masks), number=args.run_times)
print(t / args.run_times, 'seconds')