示例#1
0
def accuracy_pixel(output,
                   meta_data,
                   cfgs=None,
                   image_size=(256.0, 256.0),
                   arg_max='hard'):
    """
    pixel-wise distance computed from predicted heatmaps
    """
    # report distance in terms of pixel in the original image
    if arg_max == 'soft':
        if isinstance(output, np.ndarray):
            pred, max_vals = lip.get_max_preds_soft(output)
        else:
            pred, max_vals = lip.get_max_preds_soft_pt(output)
    elif arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()
        pred, max_vals = lip.get_max_preds(output)
    else:
        raise NotImplementedError
    image_size = image_size if cfgs is None else cfgs['heatmapModel'][
        'input_size']
    # TODO: check the target generation and coordinate mapping
    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
        max_vals = max_vals.data.cpu().numpy()
    pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data[
        'rotation']
    centers = centers.data.cpu().numpy()
    scales = scales.data.cpu().numpy()
    rots = rots.data.cpu().numpy()
    joints_original_batch = meta_data['original_joints'].data.cpu().numpy()
    distance_list = []
    all_src_coordinates = []
    for sample_idx in range(len(pred)):
        trans_inv = lip.get_affine_transform(centers[sample_idx],
                                             scales[sample_idx],
                                             rots[sample_idx],
                                             image_size,
                                             inv=1)
        joints_original = joints_original_batch[sample_idx]
        pred_src_coordinates = lip.affine_transform_modified(
            pred[sample_idx], trans_inv)
        all_src_coordinates.append(
            pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list += get_distance(joints_original, pred_src_coordinates)
    cnt = len(distance_list)
    avg_acc = sum(distance_list) / cnt
    others = {
        'src_coord': np.concatenate(all_src_coordinates, axis=0),
        'joints_pred': pred,
        'max_vals': max_vals
    }
    return avg_acc, cnt, others
示例#2
0
def save_batch_heatmaps(batch_image,
                        batch_heatmaps,
                        file_name,
                        normalize=True):
    '''
    batch_image: [batch_size, channel, height, width]
    batch_heatmaps: ['batch_size, num_joints, height, width]
    file_name: saved file name
    '''
    if normalize:
        batch_image = batch_image.clone()
        min = float(batch_image.min())
        max = float(batch_image.max())

        batch_image.add_(-min).div_(max - min + 1e-5)

    batch_size = batch_heatmaps.size(0)
    num_joints = batch_heatmaps.size(1)
    heatmap_height = batch_heatmaps.size(2)
    heatmap_width = batch_heatmaps.size(3)

    grid_image = np.zeros(
        (batch_size * heatmap_height, (num_joints + 1) * heatmap_width, 3),
        dtype=np.uint8)

    preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())

    for i in range(batch_size):
        image = batch_image[i].mul(255)\
                              .clamp(0, 255)\
                              .byte()\
                              .permute(1, 2, 0)\
                              .cpu().numpy()
        heatmaps = batch_heatmaps[i].mul(255)\
                                    .clamp(0, 255)\
                                    .byte()\
                                    .cpu().numpy()

        resized_image = cv2.resize(image,
                                   (int(heatmap_width), int(heatmap_height)))

        height_begin = heatmap_height * i
        height_end = heatmap_height * (i + 1)
        for j in range(num_joints):
            cv2.circle(resized_image,
                       (int(preds[i][j][0]), int(preds[i][j][1])), 1,
                       [0, 0, 255], 1)
            heatmap = heatmaps[j, :, :]
            colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
            masked_image = colored_heatmap * 0.7 + resized_image * 0.3
            cv2.circle(masked_image,
                       (int(preds[i][j][0]), int(preds[i][j][1])), 1,
                       [0, 0, 255], 1)

            width_begin = heatmap_width * (j + 1)
            width_end = heatmap_width * (j + 2)
            grid_image[height_begin:height_end, width_begin:width_end, :] = \
                masked_image
            # grid_image[height_begin:height_end, width_begin:width_end, :] = \
            #     colored_heatmap*0.7 + resized_image*0.3

        grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image

    cv2.imwrite(file_name, grid_image)
    return
示例#3
0
def get_keypoints(instances, 
                  records, 
                  model, 
                  image_size=(256,256), 
                  arg_max='hard',
                  is_cuda=True
                  ):
    """
    Foward pass to obtain the screen coordinates.
    """
    if is_cuda:
        instances = instances.cuda()
    output = model(instances)
    if type(output) is tuple:
        pred, max_vals = output[1].data.cpu().numpy(), None  
        
    elif arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()
        pred, max_vals = get_max_preds(output)
    else:
        raise NotImplementedError
    if type(output) is tuple:
        pred *= image_size[0]
    else:
        pred *= image_size[0]/output.shape[3]
    centers = [records[i]['center'] for i in range(len(records))]
    scales = [records[i]['scale'] for i in range(len(records))]
    rots = [records[i]['rotation'] for i in range(len(records))]    
    for sample_idx in range(len(pred)):
        trans_inv = get_affine_transform(centers[sample_idx],
                                         scales[sample_idx], 
                                         rots[sample_idx], 
                                         image_size, 
                                         inv=1)
        pred_src_coordinates = affine_transform_modified(pred[sample_idx], 
                                                             trans_inv) 
        record = records[sample_idx]
        # pred_src_coordinates += np.array([[record['bbox'][0], record['bbox'][1]]])
        records[sample_idx]['kpts'] = pred_src_coordinates
    # assemble a dictionary where each key corresponds to one image
    ret = {}
    for record in records:
        path = record['path']
        if path not in ret:
            ret[path] = {'center':[], 
                         'scale':[], 
                         'rotation':[], 
                         'bbox_resize':[], # resized bounding box
                         'kpts_2d_pred':[], 
                         'label':[], 
                         'score':[]
                         }
        ret[path]['kpts_2d_pred'].append(record['kpts'].reshape(1, -1))
        ret[path]['center'].append(record['center'])
        ret[path]['scale'].append(record['scale'])
        ret[path]['bbox_resize'].append(record['bbox_resize'])
        ret[path]['label'].append(record['label'])
        ret[path]['score'].append(record['score'])
        ret[path]['rotation'].append(record['rotation'])
    return ret
示例#4
0
def get_distance_src(output,
                     meta_data,
                     cfgs=None,
                     image_size = (256.0, 256.0),
                     arg_max='hard'
                     ):
    """
    From predicted heatmaps, obtain local coordinates (\phi_l in the paper) 
    and transform them back to the source images based on metadata. 
    Error is then evaluated on the source image for the screen coordinates 
    (\phi_g in the paper).
    """
    # the error is reported as distance in terms of pixels in the source image
    if type(output) is tuple:
        pred, max_vals = output[1].data.cpu().numpy(), None
    elif isinstance(output, np.ndarray) and arg_max == 'soft':
        pred, max_vals = lip.soft_arg_max_np(output)
    elif isinstance(output, torch.Tensor) and arg_max == 'soft': 
        pred, max_vals = lip.soft_arg_max(output)
    elif isinstance(output, np.ndarray) or isinstance(output, torch.Tensor) and arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()        
        pred, max_vals = lip.get_max_preds(output)
    else:
        raise NotImplementedError
    image_size = image_size if cfgs is None else cfgs['heatmapModel']['input_size']
    width, height = image_size
    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
    if (max_vals is not None) and (not isinstance(max_vals, np.ndarray)):
        max_vals = max_vals.data.cpu().numpy()
    # the coordinates need to be rescaled for different cases
    if type(output) is tuple:
        pred *= np.array(image_size).reshape(1, 1, 2)
    else:
        pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales = meta_data['center'], meta_data['scale']
    # some predictions are generated for unlabeled data
    if len(pred) != len(centers):
        pred_used = pred[:len(centers)]
    else:
        pred_used = pred
    if 'rotation' in meta_data:
        rots = meta_data['rotation']
    else:
        rots = [0. for i in range(len(centers))]
    joints_original_batch = meta_data['original_joints']
    distance_list = []
    correct_cnt_sum = np.zeros((len(PCK_THRES)))
    all_src_coordinates = []
    for sample_idx in range(len(pred_used)):
        trans_inv = lip.get_affine_transform(centers[sample_idx], 
                                             scales[sample_idx], 
                                             rots[sample_idx], 
                                             (height, width), 
                                             inv=1
                                             )
        joints_original = joints_original_batch[sample_idx]        
        pred_src_coordinates = lip.affine_transform_modified(pred_used[sample_idx], 
                                                             trans_inv
                                                             ) 
        all_src_coordinates.append(pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list += get_distance(joints_original, pred_src_coordinates)
        correct_cnt_sum += get_PCK(pred_src_coordinates, joints_original)
    cnt = len(distance_list)
    avg_acc = sum(distance_list) / cnt
    others = {
        'src_coord': np.concatenate(all_src_coordinates, axis=0), # screen coordinates
        'joints_pred': pred, # predicted local coordinates
        'max_vals': max_vals, 
        'correct_cnt': correct_cnt_sum,
        'PCK_batch': correct_cnt_sum / cnt
        }
    return avg_acc, cnt, others