def accuracy_pixel(output, meta_data, cfgs=None, image_size=(256.0, 256.0), arg_max='hard'): """ pixel-wise distance computed from predicted heatmaps """ # report distance in terms of pixel in the original image if arg_max == 'soft': if isinstance(output, np.ndarray): pred, max_vals = lip.get_max_preds_soft(output) else: pred, max_vals = lip.get_max_preds_soft_pt(output) elif arg_max == 'hard': if not isinstance(output, np.ndarray): output = pred, max_vals = lip.get_max_preds(output) else: raise NotImplementedError image_size = image_size if cfgs is None else cfgs['heatmapModel'][ 'input_size'] # TODO: check the target generation and coordinate mapping # multiply by down-sample ratio if not isinstance(pred, np.ndarray): pred = max_vals = pred *= image_size[0] / output.shape[3] # inverse transform and compare pixel didstance centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data[ 'rotation'] centers = scales = rots = joints_original_batch = meta_data['original_joints'].data.cpu().numpy() distance_list = [] all_src_coordinates = [] for sample_idx in range(len(pred)): trans_inv = lip.get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], image_size, inv=1) joints_original = joints_original_batch[sample_idx] pred_src_coordinates = lip.affine_transform_modified( pred[sample_idx], trans_inv) all_src_coordinates.append( pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2)) distance_list += get_distance(joints_original, pred_src_coordinates) cnt = len(distance_list) avg_acc = sum(distance_list) / cnt others = { 'src_coord': np.concatenate(all_src_coordinates, axis=0), 'joints_pred': pred, 'max_vals': max_vals } return avg_acc, cnt, others
def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, normalize=True): ''' batch_image: [batch_size, channel, height, width] batch_heatmaps: ['batch_size, num_joints, height, width] file_name: saved file name ''' if normalize: batch_image = batch_image.clone() min = float(batch_image.min()) max = float(batch_image.max()) batch_image.add_(-min).div_(max - min + 1e-5) batch_size = batch_heatmaps.size(0) num_joints = batch_heatmaps.size(1) heatmap_height = batch_heatmaps.size(2) heatmap_width = batch_heatmaps.size(3) grid_image = np.zeros( (batch_size * heatmap_height, (num_joints + 1) * heatmap_width, 3), dtype=np.uint8) preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) for i in range(batch_size): image = batch_image[i].mul(255)\ .clamp(0, 255)\ .byte()\ .permute(1, 2, 0)\ .cpu().numpy() heatmaps = batch_heatmaps[i].mul(255)\ .clamp(0, 255)\ .byte()\ .cpu().numpy() resized_image = cv2.resize(image, (int(heatmap_width), int(heatmap_height))) height_begin = heatmap_height * i height_end = heatmap_height * (i + 1) for j in range(num_joints):, (int(preds[i][j][0]), int(preds[i][j][1])), 1, [0, 0, 255], 1) heatmap = heatmaps[j, :, :] colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) masked_image = colored_heatmap * 0.7 + resized_image * 0.3, (int(preds[i][j][0]), int(preds[i][j][1])), 1, [0, 0, 255], 1) width_begin = heatmap_width * (j + 1) width_end = heatmap_width * (j + 2) grid_image[height_begin:height_end, width_begin:width_end, :] = \ masked_image # grid_image[height_begin:height_end, width_begin:width_end, :] = \ # colored_heatmap*0.7 + resized_image*0.3 grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image cv2.imwrite(file_name, grid_image) return
def get_keypoints(instances, records, model, image_size=(256,256), arg_max='hard', is_cuda=True ): """ Foward pass to obtain the screen coordinates. """ if is_cuda: instances = instances.cuda() output = model(instances) if type(output) is tuple: pred, max_vals = output[1].data.cpu().numpy(), None elif arg_max == 'hard': if not isinstance(output, np.ndarray): output = pred, max_vals = get_max_preds(output) else: raise NotImplementedError if type(output) is tuple: pred *= image_size[0] else: pred *= image_size[0]/output.shape[3] centers = [records[i]['center'] for i in range(len(records))] scales = [records[i]['scale'] for i in range(len(records))] rots = [records[i]['rotation'] for i in range(len(records))] for sample_idx in range(len(pred)): trans_inv = get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], image_size, inv=1) pred_src_coordinates = affine_transform_modified(pred[sample_idx], trans_inv) record = records[sample_idx] # pred_src_coordinates += np.array([[record['bbox'][0], record['bbox'][1]]]) records[sample_idx]['kpts'] = pred_src_coordinates # assemble a dictionary where each key corresponds to one image ret = {} for record in records: path = record['path'] if path not in ret: ret[path] = {'center':[], 'scale':[], 'rotation':[], 'bbox_resize':[], # resized bounding box 'kpts_2d_pred':[], 'label':[], 'score':[] } ret[path]['kpts_2d_pred'].append(record['kpts'].reshape(1, -1)) ret[path]['center'].append(record['center']) ret[path]['scale'].append(record['scale']) ret[path]['bbox_resize'].append(record['bbox_resize']) ret[path]['label'].append(record['label']) ret[path]['score'].append(record['score']) ret[path]['rotation'].append(record['rotation']) return ret
def get_distance_src(output, meta_data, cfgs=None, image_size = (256.0, 256.0), arg_max='hard' ): """ From predicted heatmaps, obtain local coordinates (\phi_l in the paper) and transform them back to the source images based on metadata. Error is then evaluated on the source image for the screen coordinates (\phi_g in the paper). """ # the error is reported as distance in terms of pixels in the source image if type(output) is tuple: pred, max_vals = output[1].data.cpu().numpy(), None elif isinstance(output, np.ndarray) and arg_max == 'soft': pred, max_vals = lip.soft_arg_max_np(output) elif isinstance(output, torch.Tensor) and arg_max == 'soft': pred, max_vals = lip.soft_arg_max(output) elif isinstance(output, np.ndarray) or isinstance(output, torch.Tensor) and arg_max == 'hard': if not isinstance(output, np.ndarray): output = pred, max_vals = lip.get_max_preds(output) else: raise NotImplementedError image_size = image_size if cfgs is None else cfgs['heatmapModel']['input_size'] width, height = image_size # multiply by down-sample ratio if not isinstance(pred, np.ndarray): pred = if (max_vals is not None) and (not isinstance(max_vals, np.ndarray)): max_vals = # the coordinates need to be rescaled for different cases if type(output) is tuple: pred *= np.array(image_size).reshape(1, 1, 2) else: pred *= image_size[0] / output.shape[3] # inverse transform and compare pixel didstance centers, scales = meta_data['center'], meta_data['scale'] # some predictions are generated for unlabeled data if len(pred) != len(centers): pred_used = pred[:len(centers)] else: pred_used = pred if 'rotation' in meta_data: rots = meta_data['rotation'] else: rots = [0. for i in range(len(centers))] joints_original_batch = meta_data['original_joints'] distance_list = [] correct_cnt_sum = np.zeros((len(PCK_THRES))) all_src_coordinates = [] for sample_idx in range(len(pred_used)): trans_inv = lip.get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], (height, width), inv=1 ) joints_original = joints_original_batch[sample_idx] pred_src_coordinates = lip.affine_transform_modified(pred_used[sample_idx], trans_inv ) all_src_coordinates.append(pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2)) distance_list += get_distance(joints_original, pred_src_coordinates) correct_cnt_sum += get_PCK(pred_src_coordinates, joints_original) cnt = len(distance_list) avg_acc = sum(distance_list) / cnt others = { 'src_coord': np.concatenate(all_src_coordinates, axis=0), # screen coordinates 'joints_pred': pred, # predicted local coordinates 'max_vals': max_vals, 'correct_cnt': correct_cnt_sum, 'PCK_batch': correct_cnt_sum / cnt } return avg_acc, cnt, others