Exemplo n.º 1
0
def single_image_testing_on_mpi_mp_dataset(net, im, objpos=None, \
                                                    scale_provided=None, \
                                                    center_box=None, \
                                                    center_box_extend_pixels=50, \
                                                    transform=None, \
                                                    stride=4, \
                                                    crop_size=256, \
                                                    training_crop_size=256, \
                                                    scale_multiplier=[1], \
                                                    num_of_joints=16, \
                                                    conf_th=0.1, \
                                                    dist_th=120, \
                                                    visualization=False, \
                                                    vis_im_path='./exps/preds/vis_results/mppe_vis_result.jpg'):
    
         
    # Get the original image size
    im_height = im.shape[0]
    im_width = im.shape[1]
    long_edge = max(im_height, im_width) 

    # Get the group center
    if objpos != None and scale_provided != None and center_box != None:
        ori_center = np.array([[objpos[0], objpos[1]]])
        base_scale = 1.1714 / scale_provided
    else:
        ori_center = np.array([[im_width / 2.0, im_height / 2.0]])
        scale_provided = long_edge * 1.0 / crop_size
        base_scale = 1 / scale_provided

    # Variables to store multi-scale test images and their crop parameters
    cropped_im_list = []
    cropped_param_list = []
    flipped_cropped_im_list = []
    flipped_cropped_param_list = []

    for sm in scale_multiplier:
        # Resized image to base scales
        scale = base_scale * sm
        resized_im = cv2.resize(im, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
        scaled_center = np.zeros([1, 2])
        scaled_center[0, 0] = int(ori_center[0, 0] * scale)
        scaled_center[0, 1] = int(ori_center[0, 1] * scale)

        # Get flipped images
        flipped_resized_im = cv2.flip(resized_im, 1)

        # Crop image for testing
        cropped_im, cropped_param = augmentation_cropped(resized_im, scaled_center, crop_x=crop_size, crop_y=crop_size, max_center_trans=0)
        cropped_im_list.append(cropped_im)
        cropped_param_list.append(cropped_param)

        scaled_flipped_center = np.zeros([1,2])
        scaled_flipped_center[0,0] = resized_im.shape[1] - scaled_center[0,0]
        scaled_flipped_center[0,1] = scaled_center[0,1]

        # Crop flipped image for testing
        flipped_cropped_im, flipped_cropped_param = augmentation_cropped(flipped_resized_im, scaled_flipped_center, crop_x=crop_size, crop_y=crop_size, max_center_trans=0)
        flipped_cropped_im_list.append(flipped_cropped_im)
        flipped_cropped_param_list.append(flipped_cropped_param)

    # Transform image
    input_im_list = []
    flipped_input_im_list = []
    if transform is not None:
        for cropped_im in cropped_im_list:
            input_im = transform(cropped_im)
            input_im_list.append(input_im)
        for flipped_cropped_im in flipped_cropped_im_list:
            flipped_input_im = transform(flipped_cropped_im)
            flipped_input_im_list.append(flipped_input_im)
    else:
        for cropped_im in cropped_im_list:
            input_im =cropped_im.copy()
            input_im_list.append(input_im)
        for flipped_cropped_im in flipped_cropped_im_list:
            flipped_input_im = flipped_cropped_im.copy()
            flipped_input_im_list.append(flipped_input_im)

    # Preparing input variable
    batch_input_im = input_im_list[0].view(-1, 3, crop_size, crop_size)
    for smi in range(1, len(input_im_list)):
        batch_input_im = torch.cat((batch_input_im, input_im_list[smi].view(-1, 3, crop_size, crop_size)), 0)
    batch_input_im = batch_input_im.cuda(async=True)
    batch_input_var = torch.autograd.Variable(batch_input_im, volatile=True)

    # Preparing flipped input variable
    batch_flipped_input_im = flipped_input_im_list[0].view(-1, 3, crop_size, crop_size)
    for smi in range(1, len(flipped_input_im_list)):
        batch_flipped_input_im = torch.cat((batch_flipped_input_im, flipped_input_im_list[smi].view(-1, 3, crop_size, crop_size)), 0)
    batch_flipped_input_im = batch_flipped_input_im.cuda(async=True)
    batch_flipped_input_var = torch.autograd.Variable(batch_flipped_input_im, volatile=True)

    # Get predicted heatmaps and convert them to numpy array
    pose_outputs, orie_outputs = net(batch_input_var)
    pose_output = pose_outputs[-1]
    pose_output = pose_output.data
    pose_output = pose_output.cpu().numpy()
    orie_output = orie_outputs[-1]
    orie_output = orie_output.data
    orie_output = orie_output.cpu().numpy()
    
    # Get predicted flipped heatmaps and convert them to numpy array
    flipped_pose_outputs, flipped_orie_outputs = net(batch_flipped_input_var)
    flipped_pose_output = flipped_pose_outputs[-1]
    flipped_pose_output = flipped_pose_output.data
    flipped_pose_output = flipped_pose_output.cpu().numpy()
    flipped_orie_output = flipped_orie_outputs[-1]
    flipped_orie_output = flipped_orie_output.data
    flipped_orie_output = flipped_orie_output.cpu().numpy()

    # First fuse the original prediction with flipped prediction
    fused_pose_output = np.zeros((pose_output.shape[0], pose_output.shape[1] - 1, crop_size, crop_size))
    flipped_idx = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 14, 15]
    for smi in range(0, len(scale_multiplier)):
        # Get single scale output
        single_scale_output = pose_output[smi, :, :, :].copy()
        single_scale_flipped_output = flipped_pose_output[smi, :, :, :].copy()

        # fuse each joint's heatmap
        for ji in range(0, 16):
            # Get the original heatmap
            heatmap = single_scale_output[ji, :, :].copy()
            heatmap = cv2.resize(heatmap, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR)

            # Get the flipped heatmap
            flipped_heatmap = single_scale_flipped_output[flipped_idx[ji], :, :].copy()
            flipped_heatmap = cv2.resize(flipped_heatmap, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR)
            flipped_heatmap = cv2.flip(flipped_heatmap, 1)

            # Average the original heatmap with flipped heatmap
            heatmap += flipped_heatmap
            heatmap *= 0.5

            fused_pose_output[smi, ji, :, :] = heatmap

    # Second fuse multi-scale predictions
    base_pose_output_list = []
    base_crop_param_list = []
    for smi in range(0, len(scale_multiplier)):
        single_scale_output = fused_pose_output[smi, :, :, :]
        crop_param = cropped_param_list[smi]

        # Crop the heatmaps without padding
        cropped_single_scale_output = single_scale_output[:, crop_param[0, 3]:crop_param[0, 7], crop_param[0, 2]:crop_param[0, 6]]

        # Resize the cropped heatmaps to base scale
        cropped_single_scale_output = cropped_single_scale_output.transpose((1, 2, 0))
        base_single_scale_output = cv2.resize(cropped_single_scale_output, None, fx=1.0/scale_multiplier[smi], fy=1.0/scale_multiplier[smi], interpolation=cv2.INTER_LINEAR)
        base_single_scale_output = base_single_scale_output.transpose((2, 0, 1))

        # Resize the cropping parameters
        base_crop_param = crop_param * (1.0 / scale_multiplier[smi])

        # Add to list
        base_pose_output_list.append(base_single_scale_output)
        base_crop_param_list.append(base_crop_param)

    # Multi-scale fusion results
    ms_fused_pose_output = np.zeros((base_pose_output_list[0].shape))

    # Accumulate map for division
    accumulate_map = np.zeros((base_pose_output_list[0].shape)) + 1

    # Use the smallest image as reference
    base_start_x = int(base_crop_param_list[0][0, 0])
    base_start_y = int(base_crop_param_list[0][0, 1])
    for smi in range(0, len(scale_multiplier)):
        # Get base parameters and pose output
        base_crop_param = base_crop_param_list[smi]
        base_pose_output = base_pose_output_list[smi]

        # Temporary pose heatmaps
        temp_pose_output = np.zeros_like(ms_fused_pose_output)

        # Relative location for reference image
        store_start_x = int(base_crop_param[0, 0]) - base_start_x
        store_start_y = int(base_crop_param[0, 1]) - base_start_y
        store_end_x = int(min(store_start_x + base_pose_output.shape[2], ms_fused_pose_output.shape[2]))
        store_end_y = int(min(store_start_y + base_pose_output.shape[1], ms_fused_pose_output.shape[1]))

        temp_pose_output[:, store_start_y:store_end_y, store_start_x:store_end_x] = base_pose_output[:, 0:(store_end_y - store_start_y), 0:(store_end_x - store_start_x)]
        ms_fused_pose_output += temp_pose_output

        # Update the accumulate map
        if smi >= 1:
            accumulate_map[:, store_start_y:store_end_y, store_start_x:store_end_x] += 1
    
    # Average by the accumulate map
    # Every position should add at leat once, avoid divide by 0; also avoide dominated by center cropping
    accumulate_map[accumulate_map == 0] = len(scale_multiplier)
    ms_fused_pose_output = np.divide(ms_fused_pose_output, accumulate_map)
    
    # Get the final prediction results
    pred_joints = np.zeros((num_of_joints, 3))

    # Perform NMS to find joint candidates
    all_peaks = []
    peak_counter = 0
    for ji in range(0, num_of_joints):
        heatmap_ori = ms_fused_pose_output[ji, :, :]
        heatmap = gaussian_filter(heatmap_ori, sigma=3)

        heatmap_left = np.zeros(heatmap.shape)
        heatmap_left[1:, :] = heatmap[:-1, :]
        heatmap_right = np.zeros(heatmap.shape)
        heatmap_right[:-1, :] = heatmap[1:, :]
        heatmap_up = np.zeros(heatmap.shape)
        heatmap_up[:, 1:] = heatmap[:, :-1]
        heatmap_down = np.zeros(heatmap.shape)
        heatmap_down[:, :-1] = heatmap[:, 1:]
    
        peaks_binary = np.logical_and.reduce((heatmap >= heatmap_left, heatmap >= heatmap_right, heatmap >= heatmap_up, heatmap >= heatmap_down, heatmap > conf_th))
        peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))
        peaks_with_score = [x + (heatmap_ori[x[1], x[0]], ) for x in peaks]
        id = range(peak_counter, peak_counter + len(peaks))
        peaks_with_score_and_id = [peaks_with_score[i] + (id[i], ) for i in range(len(id))]

        all_peaks.append(peaks_with_score_and_id)
        peak_counter += len(peaks)

    # Recover the peaks to locations in original image
    cropped_param = base_crop_param_list[0]
    all_joint_candi_list = []
    for ji in range(0, len(all_peaks)):
        joint_candi_list = []
        peaks_base = all_peaks[ji]
        for ci in range(0, len(peaks_base)):
            joint_candi = np.zeros((1, 4))
            joint_candi[0, :] = np.array(peaks_base[ci])
            joint_candi[0, 0] = (joint_candi[0, 0] + cropped_param[0, 0]) / base_scale
            joint_candi[0, 1] = (joint_candi[0, 1] + cropped_param[0, 1]) / base_scale
            joint_candi_list.append(joint_candi)
        all_joint_candi_list.append(joint_candi_list)

    # Get the center embedding results
    start = stride / 2.0 - 0.5
    all_embedding_list = []
    for ji in range(0, len(all_joint_candi_list)):
        joint_candi_list = all_joint_candi_list[ji]
        embedding_list = []
        for ci in range(0, len(joint_candi_list)):
            joint_candi = joint_candi_list[ci][0, 0:2]
            offset_x_avg = 0.0
            offset_y_avg = 0.0
            valid_offset_count = 0.0
            embedding = np.zeros((1, 2))
            for si in range(0, len(scale_multiplier)):
                orie_maps = orie_output[si, :, :, :]

                flipped_orie_maps = flipped_orie_output[si, :, :, :]

                joint_candi_scaled = joint_candi * scale_multiplier[si] * base_scale
                joint_candi_scaled[0] = joint_candi_scaled[0] - cropped_param_list[si][0, 0] + cropped_param_list[si][0, 2]
                joint_candi_scaled[1] = joint_candi_scaled[1] - cropped_param_list[si][0, 1] + cropped_param_list[si][0, 3]
                g_x = int((joint_candi_scaled[0] - start) / stride)
                g_y = int((joint_candi_scaled[1] - start) / stride)
                if g_x >= 0 and g_x < crop_size / stride and g_y >= 0 and g_y < crop_size / stride:
                    offset_x = orie_maps[ji * 2, g_y, g_x]
                    offset_y = orie_maps[ji * 2 + 1, g_y, g_x]

                    flipped_offset_x = flipped_orie_maps[flipped_idx[ji] * 2, g_y, crop_size / stride - g_x - 1]
                    flipped_offset_y = flipped_orie_maps[flipped_idx[ji] * 2 + 1, g_y, crop_size / stride - g_x - 1]
                    offset_x = (offset_x - flipped_offset_x) / 2.0
                    offset_y = (offset_y + flipped_offset_y) / 2.0

                    offset_x *= training_crop_size / 2.0  
                    offset_y *= training_crop_size / 2.0 
                    offset_x = offset_x / (scale_multiplier[si] * base_scale)
                    offset_y = offset_y / (scale_multiplier[si] * base_scale)
                    offset_x_avg += offset_x
                    offset_y_avg += offset_y
                    valid_offset_count += 1

            if valid_offset_count > 0:
                offset_x_avg /= valid_offset_count
                offset_y_avg /= valid_offset_count
            embedding[0, 0] = joint_candi[0] + offset_x_avg
            embedding[0, 1] = joint_candi[1] + offset_y_avg
            embedding_list.append(embedding)
        all_embedding_list.append(embedding_list)
        
    # Convert to np array
    all_embedding_np_array = np.empty((0, 2))
    for ji in range(0, len(all_embedding_list)):
        embedding_list = all_embedding_list[ji]
        for ci in range(0, len(embedding_list)):
            embedding = embedding_list[ci]
            all_embedding_np_array = np.vstack((all_embedding_np_array, embedding))

    all_joint_candi_np_array = np.empty((0, 5))
    for ji in range(0, len(all_joint_candi_list)):
        joint_candi_list = all_joint_candi_list[ji]
        for ci in range(0, len(joint_candi_list)):
            joint_candi_with_type = np.zeros((1, 5))
            joint_candi = joint_candi_list[ci]
            joint_candi_with_type[0, 0:4] = joint_candi[0, :]
            joint_candi_with_type[0, 4] = ji
            all_joint_candi_np_array = np.vstack((all_joint_candi_np_array, joint_candi_with_type))

    # Cluster the embeddings
    if all_embedding_np_array.shape[0] < 2:
        clusters = [-1]
    else:
        Z = hcluster.linkage(all_embedding_np_array, method='centroid')
        clusters = hcluster.fcluster(Z, dist_th, criterion='distance')
        clusters = clusters - 1
    
    # Get people structure by greedy search
    num_of_people = max(clusters) + 1
    joint_idx_list = [1, 
                      0, 2, 5, 8, 11,
                         3, 6, 9, 12,
                         4, 7, 10, 13]

    people = []
    for pi in range(0, num_of_people):
        joint_of_person_idx = np.where(clusters == pi)[0]
        joint_candi_cur_persons = all_joint_candi_np_array[joint_of_person_idx, 0:3]
        end_candi_cur_persons = all_embedding_np_array[joint_of_person_idx, :]
        joint_type_cur_person = all_joint_candi_np_array[joint_of_person_idx, 4]
        if len(joint_type_cur_person) > len(np.unique(joint_type_cur_person)):
            persons = []    
            persons_ends_list = []
            for joint_idx in joint_idx_list:
                # If the joint is neck, do initialization
                if joint_idx == 1:
                    neck_candi = np.where(joint_type_cur_person == joint_idx)[0]
                    for ni in range(len(neck_candi)):
                        person = {}
                        person[str(joint_idx)] = joint_candi_cur_persons[neck_candi[ni], :]
                        persons.append(person)
                        persons_ends = np.zeros((1, 2))
                        persons_ends[0, :] = end_candi_cur_persons[neck_candi[ni], :]
                        persons_ends_list.append(persons_ends)
                # For other joints, do connection
                else:
                    other_candi = np.where(joint_type_cur_person == joint_idx)[0]
                    other_pos = end_candi_cur_persons[other_candi]
                    person_centers = np.zeros((len(persons), 2))
                    person_idx = np.zeros((len(persons), 1), dtype=np.int)
                    for mi in range(len(persons_ends_list)):
                        person_centers[mi, :] = np.mean(persons_ends_list[mi], axis=0)
                        person_idx[mi] = mi
                    while (other_candi.shape[0] > 0 and person_centers.shape[0] > 0):
                        dist_matrix = np.zeros((other_candi.shape[0], person_centers.shape[0]))
                        for hi in range(other_candi.shape[0]):
                            for ci in range(person_centers.shape[0]):
                                offset_vec = other_pos[hi, :] - person_centers[ci, :]
                                dist = math.sqrt(offset_vec[0] * offset_vec[0] + offset_vec[1] * offset_vec[1])
                                dist_matrix[hi, ci] = dist
                        connection = np.where(dist_matrix == dist_matrix.min())
                        persons[person_idx[connection[1][0], 0]][str(joint_idx)] = joint_candi_cur_persons[other_candi[connection[0][0]], :]
                        persons_ends_list[person_idx[connection[1][0], 0]] = np.vstack((persons_ends_list[person_idx[connection[1][0], 0]], 
                                                                                        end_candi_cur_persons[other_candi[connection[0][0]], :]))

                        other_candi = np.delete(other_candi, connection[0][0], axis=0)
                        other_pos = np.delete(other_pos, connection[0][0], axis=0)
                        person_centers = np.delete(person_centers, connection[1][0], axis=0)
                        person_idx = np.delete(person_idx, connection[1][0], axis=0)
                    if other_candi.shape[0] > 0 and joint_idx < 2:
                        # Add new person to list
                        for hi in range(other_candi.shape[0]):
                            person = {}
                            person[str(joint_idx)] = joint_candi_cur_persons[other_candi[hi], :]
                            persons.append(person)
                            persons_ends = np.zeros((1, 2))
                            persons_ends[0, :] = end_candi_cur_persons[other_candi[hi], :]
                            persons_ends_list.append(persons_ends)
            for person in persons:
                people.append(person)
        else:
            person = {}
            for ji in range(0, len(joint_of_person_idx)):
                person[str(int(all_joint_candi_np_array[joint_of_person_idx[ji], 4]))] = all_joint_candi_np_array[joint_of_person_idx[ji], :]
            people.append(person)

    
    if objpos != None and scale_provided != None and center_box != None:
        # Exclude out of group persons
        extend_pixels = center_box_extend_pixels
        extend_pixels = extend_pixels / base_scale
        extend_center_box = np.zeros((4, 1)) 
        extend_center_box[0] = max(0, int(center_box[0] - extend_pixels))
        extend_center_box[1] = max(0, int(center_box[1] - extend_pixels))
        extend_center_box[2] = min(im_width, int(center_box[2] + extend_pixels))
        extend_center_box[3] = min(im_height, int(center_box[3] + extend_pixels))

        num_of_people = len(people)
        center_of_mass = np.zeros((num_of_people, 2))

        for pi in range(0, num_of_people):
            person = people[pi]
            point = {}
            point['x'] = []
            point['y'] = []
            for ji in range(0, num_of_joints):
                if str(ji) in person:
                    point['x'].append(person[str(ji)][0])
                    point['y'].append(person[str(ji)][1])
            if len(point['x']) > 0 and len(point['y']) > 0:
                center_of_mass[pi, 0] = np.mean(point['x'])
                center_of_mass[pi, 1] = np.mean(point['y'])

        isInExtendedBBox = np.zeros((num_of_people, 1))
        for pi in range(0, num_of_people):
            com = center_of_mass[pi, :]
            if (com[0] >= extend_center_box[0] and com[1] >= extend_center_box[1]) and (com[0] <= extend_center_box[2] and com[1] <= extend_center_box[3]):
               isInExtendedBBox[pi] = 1

        people_in_center_box = []
        for pi in range(0, num_of_people):
            if isInExtendedBBox[pi] == 1:
                people_in_center_box.append(people[pi])
    else:
        people_in_center_box = people

    # Reture prediction results
    joint_idx_mapping = [9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5]
    annopoints_array = []
    
    for pi in range(0, len(people_in_center_box)):
        person = people_in_center_box[pi]
        point = {}
        point['x'] = []
        point['y'] = []
        point['score'] = []
        point['id'] = []
        for ji in range(0, 14):
            if str(ji) in person:
                point['x'].append(person[str(ji)][0])
                point['y'].append(person[str(ji)][1])
                point['score'].append(person[str(ji)][2])
                point['id'].append(joint_idx_mapping[ji])
        points_struct = fromarrays([point['x'], point['y'], point['id'], point['score']], names=['x', 'y', 'id', 'score'])
        if len(points_struct) < 4:
            continue
        annopoints = {}
        annopoints['point'] = points_struct
        annopoints_array.append(annopoints)    
    
    # If the is no detected point, add random dummy persons
    dummy_joint_id = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    if len(annopoints_array) == 0:
        for pi in range(0, np.random.randint(2, 5)):
            point = {}
            point['x'] = []
            point['y'] = []
            point['score'] = []
            point['id'] = []
            for ji in range(0, np.random.randint(2, len(dummy_joint_id))):
                point['x'].append(np.float64(crop_size / 2.0))
                point['y'].append(np.float64(crop_size / 2.0))
                point['score'].append(np.float64(0.5))
                point['id'].append(int(dummy_joint_id[ji]))
            points_struct = fromarrays([point['x'], point['y'], point['id'], point['score']], names=['x', 'y', 'id', 'score'])
            annopoints = {}
            annopoints['point'] = points_struct
            annopoints_array.append(annopoints)
    
    mp_pose = fromarrays([annopoints_array], names=['annopoints'])

    if visualization:
        vis_mppe_results(im, people_in_center_box, save_im=True, save_path=vis_im_path)    

    return mp_pose
Exemplo n.º 2
0
    def __getitem__(self, index):
        # Select a training sample
        train_item = self.pose_anno_list[index]

        # Load training image
        im_name = train_item['im_name']
        im = self.loader(os.path.join(self.im_root, im_name))

        # Get parsing annotation 
        name_prefix = im_name.split('.')[0]
        parsing_anno_name = name_prefix + '.png'
        parsing_anno_path = os.path.join(self.parsing_anno_root, parsing_anno_name)
        parsing_anno = cv2.imread(parsing_anno_path, 0)

        # Get pose annotation 
        joints_all_info = np.array(train_item['joints'])
        joints_loc = np.zeros((joints_all_info.shape[0], 2))
        joints_loc[:, :] = joints_all_info[:, 0:2]

        # Reorder joints from MPI to ours
        joints_loc = joint_trans.transform_mpi_to_ours(joints_loc)

        # Get visibility of joints (The visibility information provided by the annotation is not accurate)
        coord_sum = np.sum(joints_loc, axis=1)
        visibility = coord_sum != 0

        # Get person center and scale
        person_center = np.array([train_item['objpos']])
        scale_provided = train_item['scale_provided']

        # Random scaling
        scaled_im, scale_param = data_aug.augmentation_scale(im, scale_provided, target_dist=self.target_dist, scale_min=self.scale_min, scale_max=self.scale_max)
        scaled_joints, scaled_center = joint_trans.scale_coords(joints_loc, person_center, scale_param)

        # Random rotating
        rotated_im, rotate_param = data_aug.augmentation_rotate(scaled_im, max_rotate_degree=self.max_rotate_degree)
        rotated_joints, rotated_center = joint_trans.rotate_coords(scaled_joints, scaled_center, rotate_param)

        # Random cropping
        cropped_im, crop_param = data_aug.augmentation_cropped(rotated_im, rotated_center, crop_x=self.crop_size, crop_y=self.crop_size, max_center_trans=self.max_center_trans)
        cropped_joints, cropped_center = joint_trans.crop_coords(rotated_joints, rotated_center, crop_param)
        
        # Random flipping
        flipped_im, flip_param = data_aug.augmentation_flip(cropped_im, flip_prob=self.flip_prob)
        flipped_joints, flipped_center = joint_trans.flip_coords(cropped_joints, cropped_center, flip_param, flipped_im.shape[1])

        # If flip, then swap the visibility of left and right joints
        if flip_param:
            right_idx = [2, 3, 4, 8, 9, 10]
            left_idx = [5, 6, 7, 11, 12, 13]
            for i in range(0, 6):
                temp_visibility = visibility[right_idx[i]]
                visibility[right_idx[i]] = visibility[left_idx[i]]
                visibility[left_idx[i]] = temp_visibility

        # Generate pose target maps
        grid_x = flipped_im.shape[1] / self.pose_net_stride
        grid_y = flipped_im.shape[0] / self.pose_net_stride
        pose_target = target_gen.gen_pose_target(flipped_joints, visibility, self.pose_net_stride, grid_x, grid_y, self.sigma)

        # Generate parsing target maps 
        parsing_target = target_gen.gen_parsing_target(parsing_anno, 
                                                       scale_param=scale_param, 
                                                       rotate_param=[rotate_param, rotated_im.shape[1], rotated_im.shape[0]],
                                                       crop_param=[crop_param, cropped_im.shape[1], cropped_im.shape[0]],
                                                       flip_param=flip_param,
                                                       stride=self.parsing_net_stride)

        # Transform
        if self.transform is not None:
            aug_im = self.transform(flipped_im)
        else:
            aug_im = flipped_im
        
        # Visualize target maps
        if self.is_visualization:
            print('Visualize pose targets')
            vis_utils.vis_gaussian_maps(flipped_im, pose_target, self.pose_net_stride, save_im=True)
            vis_utils.vis_parsing_maps(flipped_im, parsing_target, self.parsing_net_stride, save_im=True)

        return aug_im, pose_target, parsing_target
Exemplo n.º 3
0
def single_image_testing_for_parsing_on_lip_dataset(net, \
                                                    im, \
                                                    transform=None,
                                                    crop_size=256, \
                                                    num_of_parts=20, \
                                                    visualization=False, \
                                                    vis_im_path='exps/preds/vis_results/parsing_vis_result.jpg'):

    # height, width and long edge of image
    im_height = im.shape[0]
    im_width = im.shape[1]
    long_edge = max(im_height, im_width)

    # Use the image center as the person center
    center = np.array([[im_width / 2.0, im_height / 2.0]])

    # Resize the long edge of image to crop_size
    scale_provided = long_edge * 1.0 / crop_size
    scale = 1 / scale_provided
    resized_im = cv2.resize(im,
                            None,
                            fx=scale,
                            fy=scale,
                            interpolation=cv2.INTER_CUBIC)
    center[0, 0] = int(center[0, 0] * scale)
    center[0, 1] = int(center[0, 1] * scale)

    # Crop image for testing
    cropped_im, cropped_param = augmentation_cropped(resized_im,
                                                     center,
                                                     crop_x=crop_size,
                                                     crop_y=crop_size,
                                                     max_center_trans=0)

    # Transform image
    if transform is not None:
        input_im = transform(cropped_im)
    else:
        input_im = cropped_im.copy()

    # Preparing input variable
    input_im = input_im.view(-1, 3, input_im.size(1), input_im.size(2))
    input_im = input_im.cuda(async=True)
    input_var = torch.autograd.Variable(input_im, volatile=True)

    # Get predicted heatmaps and convert them to numpy array
    pose_output, parsing_output = net(input_var)
    if isinstance(parsing_output, list):
        parsing_output = parsing_output[-1]
    parsing_output = parsing_output.data
    parsing_output = parsing_output.view(parsing_output.size(1),
                                         parsing_output.size(2),
                                         parsing_output.size(3))
    parsing_output = parsing_output.cpu().numpy()
    output_argmax = parsing_output.argmax(0)

    parsing = np.zeros((resized_im.shape[0], resized_im.shape[1]))
    parsing[cropped_param[0, 1]:cropped_param[0, 5],
            cropped_param[0, 0]:cropped_param[
                0, 4]] = output_argmax[cropped_param[0, 3]:cropped_param[0, 7],
                                       cropped_param[0, 2]:cropped_param[0, 6]]

    parsing = cv2.resize(parsing,
                         dsize=(im_width, im_height),
                         interpolation=cv2.INTER_NEAREST)

    if visualization:
        vis_parsing_results(im,
                            parsing,
                            stride=1,
                            save_im=True,
                            save_path=vis_im_path)

    return parsing
Exemplo n.º 4
0
def single_image_testing_for_pose_on_lip_dataset(net, \
                                                 im, \
                                                 transform=None, \
                                                 stride=4, \
                                                 crop_size=256, \
                                                 scale_multiplier=[1], \
                                                 num_of_joints=16, \
                                                 visualization=False, \
                                                 vis_im_path='exps/preds/vis_results/hpe_vis_result.jpg'):

    # Get the original image size
    im_height = im.shape[0]
    im_width = im.shape[1]
    long_edge = max(im_height, im_width)

    # Use the image center as the person center
    ori_center = np.array([[im_width / 2.0, im_height / 2.0]])

    # Resize the long edge of image to crop_size
    scale_provided = long_edge * 1.0 / crop_size
    base_scale = 1.0 / scale_provided

    # Variables to store multi-scale test images and their crop parameters
    cropped_im_list = []
    cropped_param_list = []
    flipped_cropped_im_list = []

    for sm in scale_multiplier:
        # Resized image to base scales
        scale = base_scale * sm
        resized_im = cv2.resize(im,
                                None,
                                fx=scale,
                                fy=scale,
                                interpolation=cv2.INTER_CUBIC)
        scaled_center = np.zeros([1, 2])
        scaled_center[0, 0] = int(ori_center[0, 0] * scale)
        scaled_center[0, 1] = int(ori_center[0, 1] * scale)

        # Get flipped images
        flipped_resized_im = cv2.flip(resized_im, 1)

        # Crop image for testing
        cropped_im, cropped_param = augmentation_cropped(resized_im,
                                                         scaled_center,
                                                         crop_x=crop_size,
                                                         crop_y=crop_size,
                                                         max_center_trans=0)
        cropped_im_list.append(cropped_im)
        cropped_param_list.append(cropped_param)

        # Get flipped cropped image for testing
        flipped_cropped_im = cv2.flip(cropped_im, 1)
        flipped_cropped_im_list.append(flipped_cropped_im)

    # Transform image
    input_im_list = []
    flipped_input_im_list = []
    if transform is not None:
        for cropped_im in cropped_im_list:
            input_im = transform(cropped_im)
            input_im_list.append(input_im)
        for flipped_cropped_im in flipped_cropped_im_list:
            flipped_input_im = transform(flipped_cropped_im)
            flipped_input_im_list.append(flipped_input_im)
    else:
        for cropped_im in cropped_im_list:
            input_im = cropped_im.copy()
            input_im_list.append(input_im)
        for flipped_cropped_im in flipped_cropped_im_list:
            flipped_input_im = flipped_cropped_im.copy()
            flipped_input_im_list.append(flipped_input_im)

    # Preparing input variable
    batch_input_im = input_im_list[0].view(-1, 3, crop_size, crop_size)
    for smi in range(1, len(input_im_list)):
        batch_input_im = torch.cat((batch_input_im, input_im_list[smi].view(
            -1, 3, crop_size, crop_size)), 0)
    batch_input_im = batch_input_im.cuda(async=True)
    batch_input_var = torch.autograd.Variable(batch_input_im, volatile=True)

    # Preparing flipped input variable
    batch_flipped_input_im = flipped_input_im_list[0].view(
        -1, 3, crop_size, crop_size)
    for smi in range(1, len(flipped_input_im_list)):
        batch_flipped_input_im = torch.cat(
            (batch_flipped_input_im, flipped_input_im_list[smi].view(
                -1, 3, crop_size, crop_size)), 0)
    batch_flipped_input_im = batch_flipped_input_im.cuda(async=True)
    batch_flipped_input_var = torch.autograd.Variable(batch_flipped_input_im,
                                                      volatile=True)

    # Get predicted heatmaps and convert them to numpy array
    pose_output, parsing_output = net(batch_input_var)
    if isinstance(pose_output, list):
        pose_output = pose_output[-1]
    pose_output = pose_output.data
    pose_output = pose_output.cpu().numpy()

    # Get predicted flipped heatmaps and convert them to numpy array
    flipped_pose_output, flipped_parsing_output = net(batch_flipped_input_var)
    if isinstance(flipped_pose_output, list):
        flipped_pose_output = flipped_pose_output[-1]
    flipped_pose_output = flipped_pose_output.data
    flipped_pose_output = flipped_pose_output.cpu().numpy()

    # First fuse the original prediction with flipped prediction
    fused_pose_output = np.zeros(
        (pose_output.shape[0], pose_output.shape[1] - 1, crop_size, crop_size))
    flipped_idx = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 14, 15]
    for smi in range(0, len(scale_multiplier)):
        # Get single scale output
        single_scale_output = pose_output[smi, :, :, :].copy()
        single_scale_flipped_output = flipped_pose_output[smi, :, :, :].copy()

        # fuse each joint's heatmap
        for ji in range(0, num_of_joints):
            # Get the original heatmap
            heatmap = single_scale_output[ji, :, :].copy()
            heatmap = cv2.resize(heatmap, (crop_size, crop_size),
                                 interpolation=cv2.INTER_LINEAR)

            # Get the flipped heatmap
            flipped_heatmap = single_scale_flipped_output[
                flipped_idx[ji], :, :].copy()
            flipped_heatmap = cv2.resize(flipped_heatmap,
                                         (crop_size, crop_size),
                                         interpolation=cv2.INTER_LINEAR)
            flipped_heatmap = cv2.flip(flipped_heatmap, 1)

            # Average the original heatmap with flipped heatmap
            heatmap += flipped_heatmap
            heatmap *= 0.5

            fused_pose_output[smi, ji, :, :] = heatmap

    # Second fuse multi-scale predictions
    ms_fused_pose_output = np.zeros(
        (fused_pose_output.shape[1], crop_size, crop_size))
    for smi in range(0, len(scale_multiplier)):
        single_scale_output = fused_pose_output[smi, :, :, :]
        crop_param = cropped_param_list[smi]

        # Crop the heatmaps without padding
        cropped_single_scale_output = single_scale_output[:, crop_param[
            0, 3]:crop_param[0, 7], crop_param[0, 2]:crop_param[0, 6]]

        # Resize the cropped heatmaps to base scale
        scaled_single_scale_output = cropped_single_scale_output.transpose(
            (1, 2, 0))
        scaled_single_scale_output = cv2.resize(scaled_single_scale_output,
                                                None,
                                                fx=1.0 / scale_multiplier[smi],
                                                fy=1.0 / scale_multiplier[smi],
                                                interpolation=cv2.INTER_LINEAR)
        scaled_single_scale_output = scaled_single_scale_output.transpose(
            (2, 0, 1))

        # Cropping position
        ul_x = int((crop_size - scaled_single_scale_output.shape[2]) / 2.0)
        ul_y = int((crop_size - scaled_single_scale_output.shape[1]) / 2.0)
        br_x = ul_x + scaled_single_scale_output.shape[2]
        br_y = ul_y + scaled_single_scale_output.shape[1]

        # Paste to base-scale heatmaps
        ms_fused_pose_output[:, ul_y:br_y,
                             ul_x:br_x] += scaled_single_scale_output

    # Normalize with number of scales
    ms_fused_pose_output = ms_fused_pose_output / len(scale_multiplier)

    pose = np.zeros((num_of_joints, 3))
    cropped_param = cropped_param_list[scale_multiplier.index(1)]
    for ji in range(0, num_of_joints):
        heatmap = ms_fused_pose_output[ji, :, :]
        heatmap = gaussian_filter(heatmap, sigma=3)

        pred_pos = np.unravel_index(heatmap.argmax(), np.shape(heatmap))
        pred_x = (pred_pos[1] - cropped_param[0, 2] +
                  cropped_param[0, 0]) / base_scale
        pred_y = (pred_pos[0] - cropped_param[0, 3] +
                  cropped_param[0, 1]) / base_scale

        pose[ji, 0] = pred_x
        pose[ji, 1] = pred_y
        pose[ji, 2] = heatmap[pred_pos[0], pred_pos[1]]

    if visualization:
        vis_hpe_results(im, pose, save_im=True, save_path=vis_im_path)

    return pose
Exemplo n.º 5
0
    def __getitem__(self, index):

        # Select a training sample
        train_item = self.train_list[index]

        # Load training image
        im_name = train_item['img_paths']
        im = self.loader(os.path.join(self.root, im_name))

        # Get joint info
        joints_all_info = np.array(train_item['joint_self'])
        joints_loc = np.zeros((joints_all_info.shape[0], 2))
        joints_loc[:, :] = joints_all_info[:, 0:2]
        
        # Reorder joints from MPI to ours
        joints_loc = joint_trans.transform_mpi_to_ours(joints_loc)

        # Get visibility of joints (never can be seen)
        coord_sum = np.sum(joints_loc, axis=1)
        self_ori_visibility = coord_sum != 0
        self_ori_visibility = self_ori_visibility.astype(int)

        # Get person center and scale
        person_center = np.array([train_item['objpos']])
        scale_provided = train_item['scale_provided']

        # Random scaling
        scaled_im, scale_param = data_aug.augmentation_scale(im, scale_provided, target_dist=self.target_dist, scale_min=self.scale_min, scale_max=self.scale_max)
        scaled_joints, scaled_center = joint_trans.scale_coords(joints_loc, person_center, scale_param)
           
        # Random rotating
        rotated_im, rotate_param = data_aug.augmentation_rotate(scaled_im, max_rotate_degree=self.max_rotate_degree)
        rotated_joints, rotated_center = joint_trans.rotate_coords(scaled_joints, scaled_center, rotate_param)

        # Random cropping
        cropped_im, crop_param = data_aug.augmentation_cropped(rotated_im, rotated_center, crop_x=self.crop_size, crop_y=self.crop_size, max_center_trans=self.max_center_trans)
        cropped_joints, cropped_center = joint_trans.crop_coords(rotated_joints, rotated_center, crop_param)
        
        # Random flipping
        flipped_im, flip_param = data_aug.augmentation_flip(cropped_im, flip_prob=self.flip_prob)
        flipped_joints, flipped_center = joint_trans.flip_coords(cropped_joints, cropped_center, flip_param, flipped_im.shape[1])

        # If flip, then swap the visibility of left and right joints
        if flip_param:
            right_idx = [2, 3, 4, 8, 9, 10]
            left_idx = [5, 6, 7, 11, 12, 13]
            for i in range(0, 6):
                temp_visibility = self_ori_visibility[right_idx[i]]
                self_ori_visibility[right_idx[i]] = self_ori_visibility[left_idx[i]]
                self_ori_visibility[left_idx[i]] = temp_visibility

        onplane = np.logical_and(flipped_joints >= 0, flipped_joints < self.crop_size) 
        self_visibility = np.logical_and(onplane[:, 0], onplane[:, 1]).astype(int)
        self_visibility = self_ori_visibility * self_visibility

        # Generate target maps
        grid_x = flipped_im.shape[1] / self.stride
        grid_y = flipped_im.shape[0] / self.stride 

        conf_target = target_gen.gen_gaussian_maps(flipped_joints, self_visibility, self.stride, grid_x, grid_y, self.sigma)

        embedding_center = np.zeros((1, 2))
        if self_ori_visibility[15]:
            embedding_center[0, :] = flipped_joints[15, :]
        else:
            embedding_center[0, :] = flipped_center[0, :]
        
        # For recovering the points, remember that x = start + g_x * stride        
        orie_target, orie_target_weight = target_gen.gen_orientation_maps(flipped_joints, self_visibility, embedding_center, self.stride, grid_x, grid_y, self.sigma)

        # The number of other people in the image
        num_other_people = int(train_item['numOtherPeople'])

        # If there are other people in the image, then...
        if num_other_people > 0:

            # The joints of all other people
            joint_others = train_item['joint_others']
            other_joints_all_info = []
            
            # The centers and scales of other people
            other_objpos = train_item['objpos_other']
            other_objpos_list = []

            if num_other_people == 1:
                other_joints_all_info.append(np.array(joint_others)) 
                np_other_objpos = np.zeros((1, 2))
                np_other_objpos[0, :] = np.array(other_objpos)
                other_objpos_list.append(np_other_objpos)    
            else:
                for oi in range(0, num_other_people):
                    other_joints_all_info.append(np.array(joint_others[oi]))
                    np_other_objpos = np.zeros((1, 2))
                    np_other_objpos[0, :] = np.array(other_objpos[oi])
                    other_objpos_list.append(np_other_objpos)
        
            # Reorder joints of other people from MPI to ours
            other_joints_loc_list = []  
            other_ori_visibility_list = []
            for oi in range(0, num_other_people):
            
                other_joints_all_info[oi] = joint_trans.transform_mpi_to_ours(other_joints_all_info[oi])    

                # Get the joint location of other joints
                other_joints_loc = np.zeros((joints_all_info.shape[0], 2))
                other_joints_loc = other_joints_all_info[oi][:, 0:2]
                other_joints_loc_list.append(other_joints_loc)

                # Get visibility of joints (never can be seen)
                coord_sum = np.sum(other_joints_loc, axis=1)
                other_ori_visibility = coord_sum != 0
                other_ori_visibility = other_ori_visibility.astype(int)   
                other_ori_visibility_list.append(other_ori_visibility)

            # Random sacling
            scaled_other_joints_list = []
            scaled_other_objpos_list = []
            for oi in range(0, num_other_people):
                scaled_other_joints, temp_scaled_center = joint_trans.scale_coords(other_joints_loc_list[oi], person_center, scale_param)
                scaled_other_joints_list.append(scaled_other_joints)
            
                scaled_other_objpos, temp_scaled_center = joint_trans.scale_coords(other_objpos_list[oi], person_center, scale_param)
                scaled_other_objpos_list.append(scaled_other_objpos)

            # Random rotating
            rotated_other_joints_list = []
            rotated_other_objpos_list = []
            for oi in range(0, num_other_people):
                rotated_other_joints, temp_rotated_center = joint_trans.rotate_coords(scaled_other_joints_list[oi], scaled_center, rotate_param) 
                rotated_other_joints_list.append(rotated_other_joints)

                rotated_other_objpos, temp_rotated_center = joint_trans.rotate_coords(scaled_other_objpos_list[oi], scaled_center, rotate_param) 
                rotated_other_objpos_list.append(rotated_other_objpos)
                
            # Random cropping
            cropped_other_joints_list = []
            cropped_other_objpos_list = []
            for oi in range(0, num_other_people):
                cropped_other_joints, temp_cropped_center = joint_trans.crop_coords(rotated_other_joints_list[oi], rotated_center, crop_param)
                cropped_other_joints_list.append(cropped_other_joints)
            
                cropped_other_objpos, temp_cropped_center = joint_trans.crop_coords(rotated_other_objpos_list[oi], rotated_center, crop_param)
                cropped_other_objpos_list.append(cropped_other_objpos)

            # Random flipping
            flipped_other_joints_list = []
            flipped_other_objpos_list = []
            other_visibility_list = []
            for oi in range(0, num_other_people):
                flipped_other_joints, temp_flipped_center = joint_trans.flip_coords(cropped_other_joints_list[oi], cropped_center, flip_param, flipped_im.shape[1]) 
                flipped_other_joints_list.append(flipped_other_joints)

                onplane = np.logical_and(flipped_other_joints >= 0, flipped_other_joints < self.crop_size) 
                other_visibility = np.logical_and(onplane[:, 0], onplane[:, 1]).astype(int)

                other_ori_visibility = other_ori_visibility_list[oi]

                # If flip, then swap the visibility of left and right joints
                if flip_param:
                    right_idx = [2, 3, 4, 8, 9, 10]
                    left_idx = [5, 6, 7, 11, 12, 13]
                    for i in range(0, 6):
                        temp_visibility = other_ori_visibility[right_idx[i]]
                        other_ori_visibility[right_idx[i]] = other_ori_visibility[left_idx[i]]
                        other_ori_visibility[left_idx[i]] = temp_visibility

                other_visibility = other_visibility * other_ori_visibility
                other_visibility_list.append(other_visibility)

                flipped_other_objpos = cropped_other_objpos_list[oi].copy()
                if flip_param:
                    flipped_other_objpos[:, 0] = flipped_im.shape[1] - 1 - flipped_other_objpos[:, 0]
                flipped_other_objpos_list.append(flipped_other_objpos)

            # Generate target maps for other people
            for oi in range(0, num_other_people):
                other_conf_target = target_gen.gen_gaussian_maps(flipped_other_joints_list[oi], other_visibility_list[oi], self.stride, grid_x, grid_y, self.sigma)
                conf_target += other_conf_target

            conf_target[conf_target > 1] = 1 
            max_target_map = conf_target[0:joints_all_info.shape[0], :, :].max(0)
            conf_target[joints_all_info.shape[0], :, :] = 1 - max_target_map            

            other_embedding_center_list = []
            for oi in range(0, num_other_people):
                other_embedding_center = np.zeros((1, 2))
                if other_ori_visibility_list[oi][15]:
                    other_embedding_center[0, :] = flipped_other_joints_list[oi][15, :]
                else:
                    other_embedding_center[0, :] = flipped_other_objpos_list[oi][0, :]
                other_embedding_center_list.append(other_embedding_center)

            # Generate orientation target maps for all people
            flipped_other_joints_list.append(flipped_joints)
            other_visibility_list.append(self_visibility)
            other_embedding_center_list.append(embedding_center)

            orie_target, orie_target_weight = target_gen.gen_orientation_maps_from_list(flipped_other_joints_list, other_visibility_list, other_embedding_center_list, self.stride, grid_x, grid_y, self.sigma)

        # Transform
        if self.transform is not None:
            aug_im = self.transform(flipped_im)
        else:
            aug_im = flipped_im
        
        # Visualize target maps
        if self.is_visualization:
            print('Visualize joint gaussian maps and orientation maps')
            vis_utils.vis_gaussian_maps(flipped_im, conf_target, self.stride, save_im=True)
            vis_utils.vis_orientation_maps(flipped_im, orie_target, self.stride, grid_x, grid_y, save_im=True)

        return aug_im, conf_target, orie_target, orie_target_weight