flow_estimated = nn.functional.interpolate( flow_estimated, size=(h_g, w_g), mode='bilinear', align_corners=False) flow_estimated[:, 0, :, :] *= ratio_w flow_estimated[:, 1, :, :] *= ratio_h assert flow_estimated.shape[2] == h_g and flow_estimated.shape[ 3] == w_g flow_est_x = flow_estimated.permute(0, 2, 3, 1)[:, :, :, 0] flow_est_y = flow_estimated.permute(0, 2, 3, 1)[:, :, :, 1] writeFlow( np.dstack([ flow_est_x[0].cpu().numpy(), flow_est_y[0].cpu().numpy() ]), 'batch_{}'.format(i_batch), path_to_save) else: # Datasets with ground-truth flow fields available # HPATCHES dataset threshold_range = np.linspace(0.002, 0.2, num=50) if args.datasets == 'HPatchesdataset': number_of_scenes = 5 + 1 list_of_outputs = [] # loop over scenes (1-2, 1-3, 1-4, 1-5, 1-6) for id, k in enumerate(range(2, number_of_scenes + 2)): if id == 5:
def calculate_epe_and_pck_per_dataset(test_dataloader, network, device, threshold_range, path_to_save=None, compute_F1=False, save=False): aepe_array = [] pck_alpha_0_05_over_image = [] pck_thresh_1_over_image = [] pck_thresh_5_over_image = [] F1 = [] n_registered_pxs = 0.0 array_n_correct_correspondences = np.zeros(threshold_range.shape, dtype=np.float32) pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader)) for i_batch, mini_batch in pbar: source_img = mini_batch['source_image'] target_img = mini_batch['target_image'] mask_gt = mini_batch['correspondence_mask'].to(device) flow_gt = mini_batch['flow_map'].to(device) if flow_gt.shape[1] != 2: # shape is BxHxWx2 flow_gt = flow_gt.permute(0, 3, 1, 2) bs, ch_g, h_g, w_g = flow_gt.shape flow_estimated = network.estimate_flow(source_img, target_img, device, mode='channel_first') # torch tensor of shape Bx2xH_xW_, will be the same types (cuda or cpu) depending on the device # H_ and W_ could be smaller than the ground truth flow (ex DCG Net takes only 240x240 images) if flow_estimated.shape[2] != h_g or flow_estimated.shape[3] != w_g: ''' the estimated flow is downscaled (the original images were downscaled before passing through the network) as it is the case with DCG Net, the estimate flow will have shape 240x240 it needs to be upscaled to the same size as flow_target_x and rescaled accordingly: ''' ratio_h = float(h_g) / float(flow_estimated.shape[2]) ratio_w = float(w_g) / float(flow_estimated.shape[3]) flow_estimated = nn.functional.interpolate(flow_estimated, size=(h_g, w_g), mode='bilinear', align_corners=False) flow_estimated[:, 0, :, :] *= ratio_w flow_estimated[:, 1, :, :] *= ratio_h assert flow_estimated.shape == flow_gt.shape flow_target_x = flow_gt.permute(0, 2, 3, 1)[:, :, :, 0] flow_target_y = flow_gt.permute(0, 2, 3, 1)[:, :, :, 1] flow_est_x = flow_estimated.permute(0, 2, 3, 1)[:, :, :, 0] # B x h_g x w_g flow_est_y = flow_estimated.permute(0, 2, 3, 1)[:, :, :, 1] flow_target = \ torch.cat((flow_target_x[mask_gt].unsqueeze(1), flow_target_y[mask_gt].unsqueeze(1)), dim=1) flow_est = \ torch.cat((flow_est_x[mask_gt].unsqueeze(1), flow_est_y[mask_gt].unsqueeze(1)), dim=1) # flow_target_x[mask_gt].shape is (number of pixels), then with unsqueze(1) it becomes (number_of_pixels, 1) # final shape is (B*H*W , 2), B*H*W is the number of registered pixels (according to ground truth masks) # let's calculate EPE per batch aepe = epe(flow_est, flow_target) # you obtain the mean per pixel aepe_array.append(aepe.item()) # let's calculate PCK values img_size = max(mini_batch['source_image_size'][0], mini_batch['source_image_size'][1]).float().to(device) alpha_0_05 = correct_correspondences(flow_est, flow_target, alpha=0.05, img_size=img_size) px_1 = correct_correspondences(flow_est, flow_target, alpha=1.0 / float(img_size), img_size=img_size) # threshold of 1 px px_5 = correct_correspondences(flow_est, flow_target, alpha=5.0 / float(img_size), img_size=img_size) # threshold of 5 px # percentage per image is calculated for each pck_alpha_0_05_over_image.append(alpha_0_05 / flow_target.shape[0]) pck_thresh_1_over_image.append(px_1 / flow_target.shape[0]) pck_thresh_5_over_image.append(px_5 / flow_target.shape[0]) # PCK curve for different thresholds ! ATTENTION, here it is over the whole dataset and not per image n_registered_pxs += flow_target.shape[ 0] # also equal to number of correspondences that should be correct # according to ground truth mask for t_id, threshold in enumerate(threshold_range): array_n_correct_correspondences[t_id] += correct_correspondences( flow_est, flow_target, alpha=threshold, img_size=img_size) # number of correct pixel correspondence below a certain threshold, added for each batch if compute_F1: F1.append( F1_kitti_2015(flow_est, flow_target) / flow_target.shape[0]) if save: writeFlow( np.dstack([ flow_est_x[0].cpu().numpy(), flow_est_y[0].cpu().numpy() ]), 'batch_{}'.format(i_batch), path_to_save) output = { 'final_eape': np.mean(aepe_array), 'pck_alpha_0_05_average_per_image': np.mean(pck_alpha_0_05_over_image), 'pck_thresh_1_average_per_image': np.mean(pck_thresh_1_over_image), 'pck_thresh_5_average_per_image': np.mean(pck_thresh_5_over_image), 'alpha_threshold': threshold_range.tolist(), 'pixel_threshold': np.round(threshold_range * img_size.cpu().numpy(), 2).tolist(), 'pck_per_threshold_over_dataset': np.float32(array_n_correct_correspondences / (n_registered_pxs + 1e-6)).tolist() } if compute_F1: output['kitti2015-F1'] = np.mean(F1) return output
else: image_source = image_source[0].numpy().astype(np.uint8) if image_target.shape[1] == 3: image_target = image_target.permute(0, 2, 3, 1)[0].numpy().astype(np.uint8) else: image_target = image_target[0].numpy().astype(np.uint8) flow_gt = minibatch['flow_map'][0].permute( 1, 2, 0).numpy() # now shape is HxWx2 # save the flow file and the images files base_name = 'image_{}'.format(i) name_flow = base_name + '_flow.flo' writeFlow(flow_gt, name_flow, flow_dir) imageio.imwrite( os.path.join(save_dir, 'images/', base_name + '_img_1.jpg'), image_source) imageio.imwrite( os.path.join(save_dir, 'images/', base_name + '_img_2.jpg'), image_target) # plotting to make sure that eevrything is working if plot and i < 4: # just for now fig, axis = plt.subplots(1, 3, figsize=(20, 20)) axis[0].imshow(image_source) axis[0].set_title("Image source") axis[1].imshow(image_target) axis[1].set_title("Image target")