示例#1
0
def accuracy_pixel(output,
                   meta_data,
                   cfgs=None,
                   image_size=(256.0, 256.0),
                   arg_max='hard'):
    """
    pixel-wise distance computed from predicted heatmaps
    """
    # report distance in terms of pixel in the original image
    if arg_max == 'soft':
        if isinstance(output, np.ndarray):
            pred, max_vals = lip.get_max_preds_soft(output)
        else:
            pred, max_vals = lip.get_max_preds_soft_pt(output)
    elif arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()
        pred, max_vals = lip.get_max_preds(output)
    else:
        raise NotImplementedError
    image_size = image_size if cfgs is None else cfgs['heatmapModel'][
        'input_size']
    # TODO: check the target generation and coordinate mapping
    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
        max_vals = max_vals.data.cpu().numpy()
    pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data[
        'rotation']
    centers = centers.data.cpu().numpy()
    scales = scales.data.cpu().numpy()
    rots = rots.data.cpu().numpy()
    joints_original_batch = meta_data['original_joints'].data.cpu().numpy()
    distance_list = []
    all_src_coordinates = []
    for sample_idx in range(len(pred)):
        trans_inv = lip.get_affine_transform(centers[sample_idx],
                                             scales[sample_idx],
                                             rots[sample_idx],
                                             image_size,
                                             inv=1)
        joints_original = joints_original_batch[sample_idx]
        pred_src_coordinates = lip.affine_transform_modified(
            pred[sample_idx], trans_inv)
        all_src_coordinates.append(
            pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list += get_distance(joints_original, pred_src_coordinates)
    cnt = len(distance_list)
    avg_acc = sum(distance_list) / cnt
    others = {
        'src_coord': np.concatenate(all_src_coordinates, axis=0),
        'joints_pred': pred,
        'max_vals': max_vals
    }
    return avg_acc, cnt, others
示例#2
0
 def get_keypoints(self, instances, records, is_cuda=True):
     """
     Foward pass to obtain the screen coordinates.
     """
     if is_cuda:
         instances = instances.cuda()
     output = self.HC(instances)
     # local part coordinates
     width, height = self.resolution
     local_coord = output[1].data.cpu().numpy()
     local_coord *= np.array(self.resolution).reshape(1, 1, 2)
     # transform local part coordinates to screen coordinates
     centers = [records[i]['center'] for i in range(len(records))]
     scales = [records[i]['scale'] for i in range(len(records))]
     rots = [records[i]['rotation'] for i in range(len(records))]
     for instance_idx in range(len(local_coord)):
         trans_inv = get_affine_transform(centers[instance_idx],
                                          scales[instance_idx],
                                          rots[instance_idx],
                                          (height, width),
                                          inv=1)
         screen_coord = affine_transform_modified(local_coord[instance_idx],
                                                  trans_inv)
         records[instance_idx]['kpts'] = screen_coord
     # assemble a dictionary where each key corresponds to one image
     ret = {}
     for record in records:
         path = record['path']
         if path not in ret:
             ret[path] = self.new_img_dict()
         ret[path]['kpts_2d_pred'].append(record['kpts'].reshape(1, -1))
         ret[path]['center'].append(record['center'])
         ret[path]['scale'].append(record['scale'])
         ret[path]['bbox_resize'].append(record['bbox_resize'])
         ret[path]['label'].append(record['label'])
         ret[path]['score'].append(record['score'])
         ret[path]['rotation'].append(record['rotation'])
     return ret
示例#3
0
def get_keypoints(instances, 
                  records, 
                  model, 
                  image_size=(256,256), 
                  arg_max='hard',
                  is_cuda=True
                  ):
    """
    Foward pass to obtain the screen coordinates.
    """
    if is_cuda:
        instances = instances.cuda()
    output = model(instances)
    if type(output) is tuple:
        pred, max_vals = output[1].data.cpu().numpy(), None  
        
    elif arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()
        pred, max_vals = get_max_preds(output)
    else:
        raise NotImplementedError
    if type(output) is tuple:
        pred *= image_size[0]
    else:
        pred *= image_size[0]/output.shape[3]
    centers = [records[i]['center'] for i in range(len(records))]
    scales = [records[i]['scale'] for i in range(len(records))]
    rots = [records[i]['rotation'] for i in range(len(records))]    
    for sample_idx in range(len(pred)):
        trans_inv = get_affine_transform(centers[sample_idx],
                                         scales[sample_idx], 
                                         rots[sample_idx], 
                                         image_size, 
                                         inv=1)
        pred_src_coordinates = affine_transform_modified(pred[sample_idx], 
                                                             trans_inv) 
        record = records[sample_idx]
        # pred_src_coordinates += np.array([[record['bbox'][0], record['bbox'][1]]])
        records[sample_idx]['kpts'] = pred_src_coordinates
    # assemble a dictionary where each key corresponds to one image
    ret = {}
    for record in records:
        path = record['path']
        if path not in ret:
            ret[path] = {'center':[], 
                         'scale':[], 
                         'rotation':[], 
                         'bbox_resize':[], # resized bounding box
                         'kpts_2d_pred':[], 
                         'label':[], 
                         'score':[]
                         }
        ret[path]['kpts_2d_pred'].append(record['kpts'].reshape(1, -1))
        ret[path]['center'].append(record['center'])
        ret[path]['scale'].append(record['scale'])
        ret[path]['bbox_resize'].append(record['bbox_resize'])
        ret[path]['label'].append(record['label'])
        ret[path]['score'].append(record['score'])
        ret[path]['rotation'].append(record['rotation'])
    return ret
示例#4
0
def get_distance_src(output,
                     meta_data,
                     cfgs=None,
                     image_size = (256.0, 256.0),
                     arg_max='hard'
                     ):
    """
    From predicted heatmaps, obtain local coordinates (\phi_l in the paper) 
    and transform them back to the source images based on metadata. 
    Error is then evaluated on the source image for the screen coordinates 
    (\phi_g in the paper).
    """
    # the error is reported as distance in terms of pixels in the source image
    if type(output) is tuple:
        pred, max_vals = output[1].data.cpu().numpy(), None
    elif isinstance(output, np.ndarray) and arg_max == 'soft':
        pred, max_vals = lip.soft_arg_max_np(output)
    elif isinstance(output, torch.Tensor) and arg_max == 'soft': 
        pred, max_vals = lip.soft_arg_max(output)
    elif isinstance(output, np.ndarray) or isinstance(output, torch.Tensor) and arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()        
        pred, max_vals = lip.get_max_preds(output)
    else:
        raise NotImplementedError
    image_size = image_size if cfgs is None else cfgs['heatmapModel']['input_size']
    width, height = image_size
    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
    if (max_vals is not None) and (not isinstance(max_vals, np.ndarray)):
        max_vals = max_vals.data.cpu().numpy()
    # the coordinates need to be rescaled for different cases
    if type(output) is tuple:
        pred *= np.array(image_size).reshape(1, 1, 2)
    else:
        pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales = meta_data['center'], meta_data['scale']
    # some predictions are generated for unlabeled data
    if len(pred) != len(centers):
        pred_used = pred[:len(centers)]
    else:
        pred_used = pred
    if 'rotation' in meta_data:
        rots = meta_data['rotation']
    else:
        rots = [0. for i in range(len(centers))]
    joints_original_batch = meta_data['original_joints']
    distance_list = []
    correct_cnt_sum = np.zeros((len(PCK_THRES)))
    all_src_coordinates = []
    for sample_idx in range(len(pred_used)):
        trans_inv = lip.get_affine_transform(centers[sample_idx], 
                                             scales[sample_idx], 
                                             rots[sample_idx], 
                                             (height, width), 
                                             inv=1
                                             )
        joints_original = joints_original_batch[sample_idx]        
        pred_src_coordinates = lip.affine_transform_modified(pred_used[sample_idx], 
                                                             trans_inv
                                                             ) 
        all_src_coordinates.append(pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list += get_distance(joints_original, pred_src_coordinates)
        correct_cnt_sum += get_PCK(pred_src_coordinates, joints_original)
    cnt = len(distance_list)
    avg_acc = sum(distance_list) / cnt
    others = {
        'src_coord': np.concatenate(all_src_coordinates, axis=0), # screen coordinates
        'joints_pred': pred, # predicted local coordinates
        'max_vals': max_vals, 
        'correct_cnt': correct_cnt_sum,
        'PCK_batch': correct_cnt_sum / cnt
        }
    return avg_acc, cnt, others