示例#1
0
    def __getitem__(self, index):
        # Load training image
        im_name = self.im_list[index]

        im_path = os.path.join(self.root, self.dataset + '_images',
                               im_name + '.jpg')
        parsing_anno_path = os.path.join(self.root,
                                         self.dataset + '_segmentations',
                                         im_name + '.png')

        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        parsing_anno = np.zeros((h, w), dtype=np.long)

        # Get center and scale
        center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0

        if self.dataset != 'test':
            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)

            if self.dataset == 'train' or self.dataset == 'trainval':

                sf = self.scale_factor
                rf = self.rotation_factor
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                    if random.random() <= 0.6 else 0

                if random.random() <= self.flip_prob:
                    im = im[:, ::-1, :]
                    parsing_anno = parsing_anno[:, ::-1]

                    center[0] = im.shape[1] - center[0] - 1
                    right_idx = [15, 17, 19]
                    left_idx = [14, 16, 18]
                    for i in range(0, 3):
                        right_pos = np.where(parsing_anno == right_idx[i])
                        left_pos = np.where(parsing_anno == left_idx[i])
                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]

        trans = get_affine_transform(center, s, r, self.crop_size)
        input = cv2.warpAffine(
            im,
            trans, (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        if self.transform:
            input = self.transform(input)

        meta = {
            'name': im_name,
            'center': center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        if self.dataset != 'train':
            return input, meta
        else:

            label_parsing = cv2.warpAffine(
                parsing_anno,
                trans, (int(self.crop_size[1]), int(self.crop_size[0])),
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255))

            label_edge = generate_edge(label_parsing)

            label_parsing = torch.from_numpy(label_parsing)
            label_edge = torch.from_numpy(label_edge)

            return input, label_parsing, label_edge, meta
示例#2
0
    def __getitem__(self, index):
        im_name = self.im_list[index]
        im_path = os.path.join(self.root, self.dataset + '_images',
                               im_name + '.jpg')
        parsing_anno_path = os.path.join(self.root,
                                         self.dataset + '_segmentations',
                                         im_name + '.png')

        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
        h, w, _ = im.shape
        parsing_anno = np.zeros((h, w), dtype=np.long)

        # get pose anno
        if self.dataset == 'train' or self.dataset == 'val':
            joints_all_info = np.array(self.pose_info[im_name])
            joints_loc = np.zeros((joints_all_info.shape[0], 2))
            joints_loc[:, :] = joints_all_info[:, 0:2]  # 1st and 2nd column

            # get visibility of joints
            coord_sum = np.sum(joints_loc, axis=1)
            visibility = coord_sum != 0

        # Get center and scale
        center, s = self._box2cs([0, 0, w - 1, h - 1])
        r = 0

        if self.dataset != 'test':
            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)

            if self.dataset == 'train' or self.dataset == 'trainval':

                sf = self.scale_factor
                rf = self.rotation_factor
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                    if random.random() <= 0.6 else 0

                if random.random() <= self.flip_prob:
                    im = im[:, ::-1, :]
                    parsing_anno = parsing_anno[:, ::-1]

                    center[0] = im.shape[1] - center[0] - 1
                    right_idx = [15, 17, 19]
                    left_idx = [14, 16, 18]
                    for i in range(0, 3):
                        right_pos = np.where(parsing_anno == right_idx[i])
                        left_pos = np.where(parsing_anno == left_idx[i])
                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]

                    # flip the joints
                    joints_loc = flip_joints(joints_loc, w)

                    # swap the visibility of left and right joints
                    r_joint = [0, 1, 2, 10, 11, 12]
                    l_joint = [3, 4, 5, 13, 14, 15]
                    for i in range(0, 6):
                        temp_visibility = visibility[r_joint[i]]
                        visibility[r_joint[i]] = visibility[l_joint[i]]
                        visibility[l_joint[i]] = temp_visibility

        trans = get_affine_transform(center, s, r, self.crop_size)

        input = cv2.warpAffine(
            im,
            trans, (int(self.crop_size[1]), int(self.crop_size[0])),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(0, 0, 0))

        if self.transform:
            input = self.transform(input)

        meta = {
            'name': im_name,
            'center': center,
            'height': h,
            'width': w,
            'scale': s,
            'rotation': r
        }

        if self.dataset == 'test':
            return input, meta
        else:

            label_parsing = cv2.warpAffine(
                parsing_anno,
                trans, (int(self.crop_size[1]), int(self.crop_size[0])),
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255))

            grid_x = int(self.crop_size[1] / self.pose_net_stride)
            grid_y = int(self.crop_size[0] / self.pose_net_stride)

            for i in range(joints_all_info.shape[0]):
                if visibility[i] > 0:
                    joints_loc[i, 0:2] = self.affine_trans(
                        joints_loc[i, 0:2], trans)

            label_pose = generate_pose(joints_loc, visibility, trans, grid_x,
                                       grid_y, self.pose_net_stride,
                                       self.sigma)
            label_edge = generate_edge(label_parsing)

            return input, label_parsing, label_pose, label_edge, meta