Exemplo n.º 1
0
    def _get_pre_dets(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.height, self.opt.width
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros(
            (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, track_ids = [], []
        for ann in anns:
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
                    ('iscrowd' in ann and ann['iscrowd'] > 0):
                continue
            bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1
            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct0 = ct.copy()
                conf = 1

                ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                ct_int = ct.astype(np.int32)
                if conf == 0:
                    pre_cts.append(ct / down_ratio)
                else:
                    pre_cts.append(ct0 / down_ratio)

                track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                if reutrn_hm:
                    draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                    ct2 = ct0.copy()
                    # Hard code heatmap disturb ratio, haven't tried other numbers.
                    ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                    ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                    ct2_int = ct2.astype(np.int32)
                    draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)

        return pre_hm, pre_cts, track_ids
Exemplo n.º 2
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()
        joints_vis = db_rec['joints_vis'].copy()

        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                if (np.min(joints[i, :2]) < 0
                        or joints[i, 0] >= self.image_size[0]
                        or joints[i, 1] >= self.image_size[1]):
                    joints_vis[i, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source']
        }
        return input, target, target_weight, meta
Exemplo n.º 3
0
    def get_image_info(self, index):

        info = self.gt_db[index]
        imgpath = info['image']
        image = cv2.imread(imgpath)[:, :, ::-1]
        joints = info['joints_3d']
        joints_vis = info['joints_3d_vis'][:, 0]

        c = info['center']
        s = info['scale']
        r = 0
        if self.train_flag:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(c, s, r, (self.crop_size, self.crop_size))
        dst_image = cv2.warpAffine(image,
                                   trans, (self.crop_size, self.crop_size),
                                   flags=cv2.INTER_LINEAR)

        for i in range(self.num_joints):
            if joints_vis[i] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        kp2d = np.concatenate([joints[:, 0:2], joints_vis[:, None]],
                              1)[self.mpii_2_lsp14]

        result_dir = '{}/{}'.format(self.save_dir, os.path.basename(imgpath))
        metas = ('mpii', imgpath, result_dir, self.empty_kp3d, self.empty_kp3d,
                 self.empty_param, self.empty_gr)

        return dst_image, kp2d, self.const_box, metas
Exemplo n.º 4
0
def data_augmentation(sample, is_train):
    image_file = sample['image']
    filename = sample['filename'] if 'filename' in sample else ''
    joints = sample['joints_3d']
    joints_vis = sample['joints_3d_vis']
    c = sample['center']
    s = sample['scale']
    score = sample['score'] if 'score' in sample else 1
    # imgnum = sample['imgnum'] if 'imgnum' in sample else ''
    r = 0

    # used for ce
    if 'ce_mode' in os.environ:
        random.seed(0)
        np.random.seed(0)

    data_numpy = cv2.imread(image_file,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

    if is_train:
        sf = cfg.SCALE_FACTOR
        rf = cfg.ROT_FACTOR
        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

        if cfg.FLIP and random.random() <= 0.5:
            data_numpy = data_numpy[:, ::-1, :]
            joints, joints_vis = fliplr_joints(joints, joints_vis,
                                               data_numpy.shape[1],
                                               cfg.FLIP_PAIRS)
            c[0] = data_numpy.shape[1] - c[0] - 1

    trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
    input = cv2.warpAffine(data_numpy,
                           trans,
                           (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
                           flags=cv2.INTER_LINEAR)

    for i in range(cfg.NUM_JOINTS):
        if joints_vis[i, 0] > 0.0:
            joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

    # Numpy target
    target, target_weight = generate_target(cfg, joints, joints_vis)

    if cfg.DEBUG:
        visualize(cfg, filename, data_numpy, input.copy(), joints, target)

    # Normalization
    input = input.astype('float32').transpose((2, 0, 1)) / 255
    input -= np.array(cfg.MEAN).reshape((3, 1, 1))
    input /= np.array(cfg.STD).reshape((3, 1, 1))

    if is_train:
        return input, target, target_weight
    else:
        return input, target, target_weight, c, s, score, image_file
Exemplo n.º 5
0
    def __getitem__(self, idx: int):
        data = self.data[idx]
        frame_idx = data["image_id"]
        x,y,w,h = data['bbox']
        # x1,y1,x2,y2 = data['orig_bbox']
        
        
        self.cap.set(1, frame_idx)
        _, img = self.cap.read()
        
        aspect_ratio = self.cfg.MODEL.IMAGE_SIZE[1] / self.cfg.MODEL.IMAGE_SIZE[0]
        centre = np.array([x+w*.5, y+h*.5])

        if w > aspect_ratio * h:
            h = w / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio
        scale = np.array([w, h]) * 1.25
        rotation = 0
        
        trans = get_affine_transform(centre, scale, rotation, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]))
        cropped_img = cv2.warpAffine(img, trans, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]), flags=cv2.INTER_LINEAR)
        cropped_img = normalize_input(cropped_img, self.cfg)
        
        # cv2.imshow("orig", img)
        # cropped_show = denormalize_input(cropped_img, self.cfg).copy().astype(np.uint8)
        # cv2.imshow("crop", cropped_show)
        # cv2.waitKey()
        # cv2.destroyAllWindows()
        
        estimated_joints = np.zeros((self.cfg.MODEL.NUM_JOINTS, 3), dtype=np.float)
        offsets = np.zeros((self.cfg.MODEL.NUM_JOINTS, 2), dtype=np.float)
        offsets[:, 0] = self.frame_area[0]
        offsets[:, 1] = self.frame_area[1]
        
        estimated_joints[:, :2] = np.array(data['joints']).reshape(self.cfg.MODEL.NUM_JOINTS, 2)
        estimated_joints[:, :2] += offsets
        estimated_joints[:,  2] = np.array(data['score'])
        
        for j in range(self.cfg.MODEL.NUM_JOINTS):
            if estimated_joints[j,2] > 0:
                estimated_joints[j,:2] = affine_transform(estimated_joints[j,:2], trans)
                estimated_joints[j, 2] *= ((estimated_joints[j,0] >= 0) & (estimated_joints[j,0] < self.cfg.MODEL.IMAGE_SIZE[1]) & (estimated_joints[j,1] >= 0) & (estimated_joints[j,1] < self.cfg.MODEL.IMAGE_SIZE[0]))

        input_pose_coord = estimated_joints[:,:2]
        input_pose_valid = np.array([1 if i not in self.cfg.ignore_kps else 0 for i in range(self.cfg.MODEL.NUM_JOINTS)])
        input_pose_score = estimated_joints[:, 2]
        
        crop_info = np.asarray([centre[0]-scale[0]*0.5, centre[1]-scale[1]*0.5, centre[0]+scale[0]*0.5, centre[1]+scale[1]*0.5])
        
        
        return [torch.from_numpy(cropped_img).float().permute(2, 0, 1), 
                input_pose_coord,
                input_pose_valid, 
                input_pose_score,
                crop_info,
                frame_idx,
        ]
Exemplo n.º 6
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            print('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)
        target_map = torch.from_numpy(self.generate_paf(joints, joints_vis))

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, target_map, meta
Exemplo n.º 7
0
    def _get_bbox_output(self, bbox, trans_output, height, width):
        bbox = self._coco_box_to_bbox(bbox).copy()

        rect = np.array([[bbox[0], bbox[1]], [bbox[0], bbox[3]],
                         [bbox[2], bbox[3]], [bbox[2], bbox[1]]],
                        dtype=np.float32)
        for t in range(4):
            rect[t] = affine_transform(rect[t], trans_output)
        bbox[:2] = rect[:, 0].min(), rect[:, 1].min()
        bbox[2:] = rect[:, 0].max(), rect[:, 1].max()

        bbox_amodal = copy.deepcopy(bbox)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        return bbox, bbox_amodal
Exemplo n.º 8
0
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize):
    """
    Args:
        heatmap: array of size (n * k * h * w)
                -n: number of views,  -k: number of joints
                -h: heatmap height,   -w: heatmap width
        grid: list of k ndarrays of size (nbins * 3)
                    -k: number of joints; 1 when the grid is shared in PSM
                    -nbins: number of bins in the grid
        bbox2D: bounding box on which heatmap is computed
    Returns:
        unary_of_all_joints: a list of ndarray of size nbins, same order as heatmaps
    """

    n, k = heatmap.shape[0], heatmap.shape[1]
    h, w = heatmap.shape[2], heatmap.shape[3]
    nbins = grid[0].shape[0]

    unary_of_all_joints = []
    for j in range(k):
        unary = np.zeros(nbins)
        for c in range(n):

            grid_id = 0 if len(grid) == 1 else j
            xy = cameras.project_pose(grid[grid_id], cam[c])
            trans = get_affine_transform(bbox2D[c]['center'],
                                         bbox2D[c]['scale'], 0, imgSize)

            xy = affine_transform(xy, trans) * np.array([w, h]) / imgSize
            # for i in range(nbins):
            #     xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize

            hmap = heatmap[c, j, :, :]
            point_x, point_y = np.arange(hmap.shape[0]), np.arange(
                hmap.shape[1])
            rgi = RegularGridInterpolator(points=[point_x, point_y],
                                          values=hmap.transpose(),
                                          bounds_error=False,
                                          fill_value=0)
            score = rgi(xy)
            unary = unary + np.reshape(score, newshape=unary.shape)
        unary_of_all_joints.append(unary)

    return unary_of_all_joints
Exemplo n.º 9
0
    def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w):
        num_joints = self.num_joints
        pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) \
            if 'keypoints' in ann else np.zeros((self.num_joints, 3), np.float32)
        if self.opt.simple_radius > 0:
            hp_radius = int(
                simple_radius(h, w, min_overlap=self.opt.simple_radius))
        else:
            hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            hp_radius = max(0, int(hp_radius))

        for j in range(num_joints):
            pts[j, :2] = affine_transform(pts[j, :2], trans_output)
            if pts[j, 2] > 0:
                if pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and \
                        pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h:
                    ret['hps'][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                    ret['hps_mask'][k, j * 2:j * 2 + 2] = 1
                    pt_int = pts[j, :2].astype(np.int32)
                    ret['hp_offset'][k * num_joints + j] = pts[j, :2] - pt_int
                    ret['hp_ind'][k * num_joints + j] = \
                        pt_int[1] * self.opt.output_w + pt_int[0]
                    ret['hp_offset_mask'][k * num_joints + j] = 1
                    ret['hm_hp_mask'][k * num_joints + j] = 1
                    ret['joint'][k * num_joints + j] = j
                    draw_umich_gaussian(ret['hm_hp'][j], pt_int, hp_radius)
                    if pts[j, 2] == 1:
                        ret['hm_hp'][j, pt_int[1], pt_int[0]] = self.ignore_val
                        ret['hp_offset_mask'][k * num_joints + j] = 0
                        ret['hm_hp_mask'][k * num_joints + j] = 0
                else:
                    pts[j, :2] *= 0
            else:
                pts[j, :2] *= 0
                self._ignore_region(ret['hm_hp'][j,
                                                 int(bbox[1]):int(bbox[3]) + 1,
                                                 int(bbox[0]):int(bbox[2]) +
                                                 1])
        gt_det['hps'].append(pts[:, :2].reshape(num_joints * 2))
Exemplo n.º 10
0
def get_pose_estimation_prediction(pose_model, image, centers, scales, transform):
    rotation = 0

    # pose estimation transformation
    model_inputs = []
    center_maps = []
    for center, scale in zip(centers, scales):
        trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE)
        # Crop smaller image of people

        c = affine_transform(center, trans)
        center_map = gaussian(np.zeros(cfg.MODEL.IMAGE_SIZE), c, cfg.MODEL.SIGMA)
        center_map = torch.from_numpy(center_map)
        center_maps.append(center_map)

        model_input = cv2.warpAffine(
            image,
            trans,
            (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        # hwc -> 1chw
        model_input = transform(model_input)#.unsqueeze(0)
        model_inputs.append(model_input)


    # n * 1chw -> nchw
    center_maps = torch.stack(center_maps)
    model_inputs = torch.stack(model_inputs)

    # compute output heatmap
    output = pose_model(model_inputs.to(CTX), center_maps.to(CTX))
    coords, _ = get_final_preds(
        cfg,
        output.cpu().detach().numpy(),
        np.asarray(centers),
        np.asarray(scales))

    return coords
Exemplo n.º 11
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            # logger.error('=> fail to read {}'.format(image_file))
            # raise ValueError('Fail to read {}'.format(image_file))
            return None, None, None, None, None, None

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        joints = db_rec['joints_2d']
        joints_vis = db_rec['joints_2d_vis']
        joints_3d = db_rec['joints_3d']
        joints_3d_vis = db_rec['joints_3d_vis']

        nposes = len(joints)
        assert nposes <= self.maximum_person, 'too many persons'

        height, width, _ = data_numpy.shape
        c = np.array([width / 2.0, height / 2.0])
        s = get_scale((width, height), self.image_size)
        r = 0

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for n in range(nposes):
            for i in range(len(joints[0])):
                if joints_vis[n][i, 0] > 0.0:
                    joints[n][i,
                              0:2] = affine_transform(joints[n][i, 0:2], trans)
                    if (np.min(joints[n][i, :2]) < 0
                            or joints[n][i, 0] >= self.image_size[0]
                            or joints[n][i, 1] >= self.image_size[1]):
                        joints_vis[n][i, :] = 0

        if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None:
            # For convenience, we use predicted poses and corresponding values at the original heatmaps
            # to generate 2d heatmaps for Campus and Shelf dataset.
            # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly.
            pred_pose2d = db_rec['pred_pose2d']
            for n in range(len(pred_pose2d)):
                for i in range(len(pred_pose2d[n])):
                    pred_pose2d[n][i, 0:2] = affine_transform(
                        pred_pose2d[n][i, 0:2], trans)

            input_heatmap = self.generate_input_heatmap(pred_pose2d)
            input_heatmap = torch.from_numpy(input_heatmap)
        else:
            input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS,
                                        self.heatmap_size[1],
                                        self.heatmap_size[0])

        target_heatmap, target_weight = self.generate_target_heatmap(
            joints, joints_vis)
        target_heatmap = torch.from_numpy(target_heatmap)
        target_weight = torch.from_numpy(target_weight)

        # make joints and joints_vis having same shape
        joints_u = np.zeros((self.maximum_person, self.num_joints, 2))
        joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2))
        for i in range(nposes):
            joints_u[i] = joints[i]
            joints_vis_u[i] = joints_vis[i]

        joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3))
        joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3))
        for i in range(nposes):
            joints_3d_u[i] = joints_3d[i][:, 0:3]
            joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]

        target_3d = self.generate_3d_target(joints_3d)
        target_3d = torch.from_numpy(target_3d)

        if isinstance(self.root_id, int):
            roots_3d = joints_3d_u[:, self.root_id]
        elif isinstance(self.root_id, list):
            roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id],
                               axis=0)
        meta = {
            'image': image_file,
            'num_person': nposes,
            'joints_3d': joints_3d_u,
            'joints_3d_vis': joints_3d_vis_u,
            'roots_3d': roots_3d,
            'joints': joints_u,
            'joints_vis': joints_vis_u,
            'center': c,
            'scale': s,
            'rotation': r,
            'camera': db_rec['camera']
        }

        return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
Exemplo n.º 12
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec["image"]
        filename = db_rec["filename"] if "filename" in db_rec else ""
        imgnum = db_rec["imgnum"] if "imgnum" in db_rec else ""

        if self.data_format == "zip":
            from utils import zipreader

            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error("=> fail to read {}".format(image_file))
            raise ValueError("Fail to read {}".format(image_file))

        joints = db_rec["joints_3d"]
        joints_vis = db_rec["joints_3d_vis"]

        c = db_rec["center"]
        s = db_rec["scale"]
        score = db_rec["score"] if "score" in db_rec else 1
        r = 0

        if self.is_train:
            if np.sum(joints_vis[:, 0]
                      ) > self.num_joints_half_body and np.random.rand(
                      ) < self.prob_half_body:
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        joints_heatmap = joints.copy()
        trans = get_affine_transform(c, s, r, self.image_size)
        trans_heatmap = get_affine_transform(c, s, r, self.heatmap_size)

        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                joints_heatmap[i,
                               0:2] = affine_transform(joints_heatmap[i, 0:2],
                                                       trans_heatmap)

        target, target_weight = self.generate_target(joints_heatmap,
                                                     joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            "image": image_file,
            "filename": filename,
            "imgnum": imgnum,
            "joints": joints,
            "joints_vis": joints_vis,
            "center": c,
            "scale": s,
            "rotation": r,
            "score": score,
        }

        return input, target, target_weight, meta
Exemplo n.º 13
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        from boxx import cf
        if cf.args.task == 'ssm':
            feat_stride = self.image_size / self.heatmap_size
            joints_h = copy.deepcopy(joints)
            # TODO 减少量化损失
            joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5)
            joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5)
            joints_h = joints_h.astype(np.int32)
            meta['joints_h'] = joints_h

        return input, target, target_weight, meta
    def _get_single_view_item(self, joints_3d, joints_3d_vis, cam):
        joints_3d = copy.deepcopy(joints_3d)
        joints_3d_vis = copy.deepcopy(joints_3d_vis)
        nposes = len(joints_3d)

        width = 360
        height = 288
        c = np.array([width / 2.0, height / 2.0], dtype=np.float32)
        # s = np.array(
        #     [width / self.pixel_std, height / self.pixel_std], dtype=np.float32)
        s = get_scale((width, height), self.image_size)
        r = 0

        joints = []
        joints_vis = []
        for n in range(nposes):
            pose2d = project_pose(joints_3d[n], cam)

            x_check = np.bitwise_and(pose2d[:, 0] >= 0,
                                     pose2d[:, 0] <= width - 1)
            y_check = np.bitwise_and(pose2d[:, 1] >= 0,
                                     pose2d[:, 1] <= height - 1)
            check = np.bitwise_and(x_check, y_check)
            vis = joints_3d_vis[n][:, 0] > 0
            vis[np.logical_not(check)] = 0

            joints.append(pose2d)
            joints_vis.append(np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1))

        trans = get_affine_transform(c, s, r, self.image_size)
        input = np.ones((height, width, 3), dtype=np.float32)
        input = cv2.warpAffine(
            input,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for n in range(nposes):
            for i in range(len(joints[0])):
                if joints_vis[n][i, 0] > 0.0:
                    joints[n][i,
                              0:2] = affine_transform(joints[n][i, 0:2], trans)
                    if (np.min(joints[n][i, :2]) < 0
                            or joints[n][i, 0] >= self.image_size[0]
                            or joints[n][i, 1] >= self.image_size[1]):
                        joints_vis[n][i, :] = 0

        input_heatmap, _ = self.generate_input_heatmap(joints, joints_vis)
        input_heatmap = torch.from_numpy(input_heatmap)
        target_heatmap = torch.zeros_like(input_heatmap)
        target_weight = torch.zeros(len(target_heatmap), 1)

        # make joints and joints_vis having same shape
        joints_u = np.zeros((self.maximum_person, len(joints[0]), 2))
        joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2))
        for i in range(nposes):
            joints_u[i] = joints[i]
            joints_vis_u[i] = joints_vis[i]

        joints_3d_u = np.zeros((self.maximum_person, len(joints[0]), 3))
        joints_3d_vis_u = np.zeros((self.maximum_person, len(joints[0]), 3))
        for i in range(nposes):
            joints_3d_u[i] = joints_3d[i][:, 0:3]
            joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]

        target_3d = self.generate_3d_target(joints_3d)
        target_3d = torch.from_numpy(target_3d)

        meta = {
            'image': '',
            'num_person': nposes,
            'joints_3d': joints_3d_u,
            'roots_3d': (joints_3d_u[:, 11] + joints_3d_u[:, 12]) / 2.0,
            'joints_3d_vis': joints_3d_vis_u,
            'joints': joints_u,
            'joints_vis': joints_vis_u,
            'center': c,
            'scale': s,
            'rotation': r,
            'camera': cam
        }

        return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
Exemplo n.º 15
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        ############################################## data augmentation
        if self.is_train:

            # scale and rotation augmentation
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf *
                        2) if random.random() <= 0.6 else 0

            # flips images
            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

            # brighten/darken image by shifting all pixels. not sure if this actually helps
#             if self.brighten and random.random() <= 0.5:
#                 shift = 2 * np.random.randn()
#                 data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8)

        trans = get_affine_transform(c, s, r, self.image_size)

        # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy
        input = data_numpy
        if not 'TEST_MODE' in self.cfg:
            input = cv2.warpAffine(
                data_numpy,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Exemplo n.º 16
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
        target, target_weight = self.generate_target(joints, joints_vis)

        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)
            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)
            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        all_ins_target = np.maximum(inter_target, target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)

        # AE labels
        All_joints = [joints] + interference_joints
        ae_targets = self.generate_joints_ae_targets(All_joints)

        # GPU formate
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        ae_targets = torch.from_numpy(ae_targets)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            'interference_maps': inter_target,
        }
        return input, all_ins_target, all_ins_target_weight, ae_targets, meta
Exemplo n.º 17
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = self.read_image(image_file)

            ##### supporting frame
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                T = self.timestep_delta_range
                temp = image_file.split('/')
                prev_nm = temp[len(temp) - 1]
                ref_idx = int(prev_nm.replace('.jpg', ''))

                if self.timestep_delta_rand:
                    delta = -T + np.random.randint(T * 2 + 1)
                else:
                    delta = self.timestep_delta

                sup_idx = ref_idx + delta
                ########

                if 'nframes' in db_rec:
                    nframes = db_rec['nframes']
                    if not self.is_posetrack18:
                        sup_idx = np.clip(sup_idx, 1, nframes)
                    else:
                        sup_idx = np.clip(sup_idx, 0, nframes - 1)

                if not self.is_posetrack18:
                    new_sup_image_file = image_file.replace(
                        prev_nm,
                        str(sup_idx).zfill(8) + '.jpg')
                else:
                    new_sup_image_file = image_file.replace(
                        prev_nm,
                        str(sup_idx).zfill(6) + '.jpg')

                if os.path.exists(new_sup_image_file):
                    sup_image_file = new_sup_image_file
                else:
                    sup_image_file = image_file
                ##########

                data_numpy_sup = self.read_image(sup_image_file)
            ###########

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                data_numpy_sup = cv2.cvtColor(data_numpy_sup,
                                              cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            if data_numpy_sup is None:
                logger.error('=> SUP: fail to read {}'.format(sup_image_file))
                raise ValueError('SUP: Fail to read {}'.format(sup_image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                if (self.is_train and self.use_warping_train) or (
                        not self.is_train and self.use_warping_test):
                    data_numpy_sup = data_numpy_sup[:, ::-1, :]

                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        ##### supportingimage
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            input_sup = cv2.warpAffine(
                data_numpy_sup,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
        #########

        if self.transform:
            input = self.transform(input)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                input_sup = self.transform(input_sup)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):

            meta = {
                'image': image_file,
                'sup_image': sup_image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, input_sup, target, target_weight, meta

        else:
            meta = {
                'image': image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, target, target_weight, meta
Exemplo n.º 18
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        if db_rec['source'] == 'h36m' and self.no_distortion:
            image_dir_zip = 'images_nodistortion.zip@'
        else:
            image_dir_zip = 'images.zip@'

        image_dir = image_dir_zip if self.data_format == 'zip' else ''
        # special process for coco dataset
        if db_rec['source'] == 'coco':
            image_dir = ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if db_rec['source'] == 'h36m' and self.pseudo_label:
            joints = db_rec['joints_2d_pseudo'].copy()  # [union_joints, 2]
            joints_vis = db_rec['joints_vis_pseudo'].copy()[:, :2]  # [union_joints, 2]
        else:
            joints = db_rec['joints_2d'].copy()  # [union_joints, 2]
            joints_vis = db_rec['joints_vis'].copy()[:, :2]  # [union_joints, 2]
        assert len(joints) == self.num_joints
        assert len(joints_vis) == self.num_joints

        # crop and scale according to ground truth
        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train and db_rec['source'] != 'h36m':
            sf = self.aug_param_dict[db_rec['source']]['scale_factor']
            rf = self.aug_param_dict[db_rec['source']]['rotation_factor']
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.aug_param_dict[db_rec['source']]['flip'] and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(
                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            if self.color_jitter:
                input = input[:, :, ::-1]  # BGR -> RGB
                input = self.color_jitter(input)
                r, g, b = input.split()
                input = Image.merge("RGB", (b, g, r))  # RGB -> BGR
            input = self.transform(input)

        visible_joints = joints_vis[:, 0] > 0
        if np.any(visible_joints):
            joints[visible_joints, :2] = affine_transform(joints[visible_joints, :2], trans)
            # zero_indices = np.any(
            #         np.concatenate((joints[:, :2]<0, 
            #         joints[:, [0]] >= self.image_size[0],
            #         joints[:, [1]] >= self.image_size[1]), 
            #         axis=1), 
            #         axis=1)
            # joints_vis[zero_indices, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis, db_rec['source'])

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source'],
            'subject': db_rec['subject'] if db_rec['source'] == 'h36m' else -1
        }
        return input, target, target_weight, meta
Exemplo n.º 19
0
    def __getitem__(self, idx, source='h36m', **kwargs):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()
        joints_vis = db_rec['joints_vis'].copy()

        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                if (np.min(joints[i, :2]) < 0
                        or joints[i, 0] >= self.image_size[0]
                        or joints[i, 1] >= self.image_size[1]):
                    joints_vis[i, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source'],
            'heatmap_size': self.heatmap_size
        }
        if source == 'totalcapture':
            imubone_mapping = kwargs['tc_imubone_map']
            meta['joints_gt'] = db_rec['joints_gt']
            meta['bone_vec'] = db_rec['bone_vec']
            meta['camera'] = db_rec['camera']
            bone_vec_tc = meta['bone_vec']
            bone_vectors = dict()
            for bone_name in imubone_mapping:
                bone_vectors[
                    imubone_mapping[bone_name]] = bone_vec_tc[bone_name]
            meta['bone_vectors'] = bone_vectors
            # if self.totalcapture_template_meta is None:
            #     self.totalcapture_template_meta = meta
        elif source == 'h36m':
            meta['camera'] = db_rec['camera']
            meta['joints_gt'] = cam_utils.camera_to_world_frame(
                db_rec['joints_3d'], db_rec['camera']['R'],
                db_rec['camera']['T'])
        else:
            # since tc is mixed with mpii, they should have same keys in meta,
            # otherwise will lead to error when collate data in dataloader
            meta['joints_gt'] = self.totalcapture_template_meta['joints_gt']
            # meta['joints_gt'] = np.zeros((16,3))
            meta['bone_vec'] = self.totalcapture_template_meta['bone_vec']
            meta['camera'] = self.totalcapture_template_meta['camera']
            meta['bone_vectors'] = self.totalcapture_template_meta[
                'bone_vectors']
        return input, target, target_weight, meta
Exemplo n.º 20
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        # # sharpening
        # kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
        # input = cv2.filter2D(input, -1, kernel)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }
        onehot_heatmap = self.render_onehot_heatmap(meta['joints'],
                                                    input.shape[1])

        return input, target, target_weight, meta, onehot_heatmap
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
            )
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
            )

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        # print(joints)
        joints_copy = db_rec['joints_3d_copy']
        joints_vis = db_rec['joints_3d_vis']
        # body = db_rec['body_3d']
        # body_vis = db_rec['body_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis
                )

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)  # 随机缩放因子
            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0  # 随机旋转因子

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(
                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
                # 加我们的对称
                c[0] = data_numpy.shape[1] - c[0] - 1  # 重新确定镜像翻转后的中心点

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans,
            (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        body = np.zeros((self.num_body, 3), dtype=np.float)
        body_vis = np.zeros((self.num_body, 3), dtype=np.float)
        for idbody, skeleton in enumerate(self.skeletons):
            point_a = joints[skeleton[0]]
            # print(point_a)
            point_b = joints[skeleton[1]]
            # if point_a[2] == 0 or point_b[2] == 0:
            if joints_copy[skeleton[0]][2] == 0 or joints_copy[skeleton[1]][2] == 0:
                continue
            axis_x = (point_b - point_a)[:-1]
            # print(x)
            lx = np.sqrt(axis_x.dot(axis_x))
            if lx == 0:
                continue
            ly = 1
            cos_angle = axis_x.dot(self.axis_y) / (lx * ly)
            angle = np.arccos(cos_angle)
            angle = angle / np.pi
            # angle2 = angle * 180 / np.pi

            if axis_x[1] < 0:
                angle = - angle
            # print(angle2)
            # print(lx,angle2)
            body[idbody] = [lx/332.55, angle, 1]
            body_vis[idbody] = [1, 1, 0]

        joint_target, joint_target_weight = self.generate_target(joints, joints_vis)
        body_target, body_target_weight = self.generate_body_target(joints, joints_copy, body_vis)
        # for i in range(19):
        #     # print(image_file)
        #     cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'.jpg', np.uint8(body_target[i][:,:,np.newaxis]*255))
        # for i in range(17):
        #     # print(image_file)
        #     cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'_point.jpg', np.uint8(joint_target[i][:,:,np.newaxis]*255))
        joint_target = torch.from_numpy(joint_target)
        joint_target_weight = torch.from_numpy(joint_target_weight)
        body_target = torch.from_numpy(body_target)
        body_target_weight = torch.from_numpy(body_target_weight)
        body = torch.from_numpy(body)
        body_vis = torch.from_numpy(body_vis)



        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'body': body,
            'body_vis': body_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, joint_target, joint_target_weight, body_target, body_target_weight, body, body_vis, meta
Exemplo n.º 22
0
    def __getitem__(self, idx, source='h36m', **kwargs):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()
        joints_vis = db_rec['joints_vis'].copy()

        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        # ! Notice: this trans represents full image to cropped image,
        # not full image->heatmap
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                if (np.min(joints[i, :2]) < 0 or
                        joints[i, 0] >= self.image_size[0] or
                        joints[i, 1] >= self.image_size[1]):
                    joints_vis[i, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        # 3x3 data augmentation affine trans (scale rotate)
        # !!! Notice: this transformation contains both heatmap->image scale affine
        # and data augmentation affine
        aug_trans = np.eye(3, 3)
        aug_trans[0:2] = trans  # full img -> cropped img
        hm_scale = self.heatmap_size / self.image_size
        scale_trans = np.eye(3,3)  # cropped img -> heatmap
        scale_trans[0,0] = hm_scale[1]
        scale_trans[1, 1] = hm_scale[0]
        aug_trans = scale_trans @ aug_trans

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source'],
            'heatmap_size': self.heatmap_size,
            'aug_trans': aug_trans,
        }
        if source == 'totalcapture':
            meta['joints_gt'] = db_rec['joints_gt']
            meta['camera'] = db_rec['camera']
        elif source in ['h36m']:
            meta['camera'] = db_rec['camera']
            meta['joints_gt'] = cam_utils.camera_to_world_frame(db_rec['joints_3d'], db_rec['camera']['R'], db_rec['camera']['T'])
        elif source == 'panoptic':
            meta['camera'] = db_rec['camera']
            meta['joints_gt'] = db_rec['joints_gt']
        elif source in ['unrealcv']:
            meta['camera'] = db_rec['camera']
            meta['joints_gt'] = db_rec['joints_gt']
        else:
            assert 0==1, 'No such dataset definition in JointDataset'
        return input, target, target_weight, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            prev_image_file1 = db_rec['image']
            prev_image_file2 = db_rec['image']
            next_image_file1 = db_rec['image']
            next_image_file2 = db_rec['image']

        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = self.read_image(image_file)

            ##### supporting frames
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                T = self.timestep_delta_range
                temp = prev_image_file1.split('/')
                prev_nm = temp[len(temp) - 1]
                ref_idx = int(prev_nm.replace('.jpg', ''))

                ### setting deltas
                prev_delta1 = -1
                prev_delta2 = -2
                next_delta1 = 1
                next_delta2 = 2

                #### image indices
                prev_idx1 = ref_idx + prev_delta1
                prev_idx2 = ref_idx + prev_delta2
                next_idx1 = ref_idx + next_delta1
                next_idx2 = ref_idx + next_delta2

                if 'nframes' in db_rec:
                    nframes = db_rec['nframes']
                    if not self.is_posetrack18:
                        prev_idx1 = np.clip(prev_idx1, 1, nframes)
                        prev_idx2 = np.clip(prev_idx2, 1, nframes)
                        next_idx1 = np.clip(next_idx1, 1, nframes)
                        next_idx2 = np.clip(next_idx2, 1, nframes)
                    else:
                        prev_idx1 = np.clip(prev_idx1, 0, nframes - 1)
                        prev_idx2 = np.clip(prev_idx2, 0, nframes - 1)
                        next_idx1 = np.clip(next_idx1, 0, nframes - 1)
                        next_idx2 = np.clip(next_idx2, 0, nframes - 1)

                if self.is_posetrack18:
                    z = 6
                else:
                    z = 8

                ### delta -1
                new_prev_image_file1 = prev_image_file1.replace(
                    prev_nm,
                    str(prev_idx1).zfill(z) + '.jpg')
                #### delta -2
                new_prev_image_file2 = prev_image_file1.replace(
                    prev_nm,
                    str(prev_idx2).zfill(z) + '.jpg')
                ### delta 1
                new_next_image_file1 = next_image_file1.replace(
                    prev_nm,
                    str(next_idx1).zfill(z) + '.jpg')
                #### delta 2
                new_next_image_file2 = next_image_file1.replace(
                    prev_nm,
                    str(next_idx2).zfill(z) + '.jpg')

                ###### checking for files existence
                if os.path.exists(new_prev_image_file1):
                    prev_image_file1 = new_prev_image_file1
                if os.path.exists(new_prev_image_file2):
                    prev_image_file2 = new_prev_image_file2
                if os.path.exists(new_next_image_file1):
                    next_image_file1 = new_next_image_file1
                if os.path.exists(new_next_image_file2):
                    next_image_file2 = new_next_image_file2

                ##########

            data_numpy_prev1 = self.read_image(prev_image_file1)
            data_numpy_prev2 = self.read_image(prev_image_file2)
            data_numpy_next1 = self.read_image(next_image_file1)
            data_numpy_next2 = self.read_image(next_image_file2)
            ###########

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                data_numpy_prev1 = cv2.cvtColor(data_numpy_prev1,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_prev2 = cv2.cvtColor(data_numpy_prev2,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_next1 = cv2.cvtColor(data_numpy_next1,
                                                cv2.COLOR_BGR2RGB)
                data_numpy_next2 = cv2.cvtColor(data_numpy_next2,
                                                cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))
        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            if data_numpy_prev1 is None:
                logger.error(
                    '=> PREV SUP: fail to read {}'.format(prev_image_file1))
                raise ValueError(
                    'PREV SUP: Fail to read {}'.format(prev_image_file1))
            if data_numpy_prev2 is None:
                logger.error(
                    '=> PREV SUP: fail to read {}'.format(prev_image_file2))
                raise ValueError(
                    'PREV SUP: Fail to read {}'.format(prev_image_file2))
            if data_numpy_next1 is None:
                logger.error(
                    '=> NEXT SUP: fail to read {}'.format(next_image_file1))
                raise ValueError(
                    'NEXT SUP: Fail to read {}'.format(next_image_file1))
            if data_numpy_next2 is None:
                logger.error(
                    '=> NEXT SUP: fail to read {}'.format(next_image_file2))
                raise ValueError(
                    'NEXT SUP: Fail to read {}'.format(next_image_file2))
        ##########

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                #####
                if (self.is_train and self.use_warping_train) or (
                        not self.is_train and self.use_warping_test):
                    data_numpy_prev1 = data_numpy_prev1[:, ::-1, :]
                    data_numpy_prev2 = data_numpy_prev2[:, ::-1, :]
                    data_numpy_next1 = data_numpy_next1[:, ::-1, :]
                    data_numpy_next2 = data_numpy_next2[:, ::-1, :]
                ##########

                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):
            input_prev1 = cv2.warpAffine(
                data_numpy_prev1,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_prev2 = cv2.warpAffine(
                data_numpy_prev2,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_next1 = cv2.warpAffine(
                data_numpy_next1,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            input_next2 = cv2.warpAffine(
                data_numpy_next2,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
        #########

        if self.transform:
            input = self.transform(input)
            if (self.is_train
                    and self.use_warping_train) or (not self.is_train
                                                    and self.use_warping_test):
                input_prev1 = self.transform(input_prev1)
                input_prev2 = self.transform(input_prev2)
                input_next1 = self.transform(input_next1)
                input_next2 = self.transform(input_next2)
            ############
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        if (self.is_train
                and self.use_warping_train) or (not self.is_train
                                                and self.use_warping_test):

            meta = {
                'image': image_file,
                'sup_image': prev_image_file1,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta

        else:
            meta = {
                'image': image_file,
                'filename': filename,
                'imgnum': imgnum,
                'joints': joints,
                'joints_vis': joints_vis,
                'center': c,
                'scale': s,
                'rotation': r,
                'score': score
            }

            return input, target, target_weight, meta
Exemplo n.º 24
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        # load data
        # xiaofeng modify it for data fetch accelerate
        data_numpy = db_rec['image']
        filename = db_rec['filename']

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(idx))
            raise ValueError('Fail to read {}'.format(idx))

        if 'fovea' in db_rec.keys():
            fovea = np.array(db_rec['fovea'])
        else:
            fovea = np.array([-1, -1])

        # xiaofeng add for test
        # gray_trans = iaa.Grayscale(alpha=0.5)
        # im = data_numpy[:, :, ::-1]  # Change channels to RGB
        # im = gray_trans.augment_image(im)
        # data_numpy = im[:, :, ::-1]  # Change channels to RGB

        # alpha = 0.5
        # img_temp = data_numpy.copy()
        # # img_gray = (img_temp[:, :, 0] + img_temp[:, :, 1] + img_temp[:, :, 2]) / 3
        # img_gray = img_temp[:, :, 0] * 0.11 + img_temp[:, :, 1] * 0.59 + img_temp[:, :, 2] * 0.3
        # img_gray2 = img_gray * alpha
        # img_gray2 = img_gray2.reshape(img_gray2.shape[0], img_gray2.shape[1], -1)
        # img_gray3 = np.tile(img_gray2, [1, 1, 3])
        # data_numpy = data_numpy.astype(np.float)
        # data_numpy = data_numpy * alpha + img_gray3
        #
        # cmax = data_numpy.max()
        # Thr0 = 250
        # if (cmax > Thr0):
        #     cmax = Thr0
        #     d2 = data_numpy[data_numpy <= Thr0]
        #     cmax2 = d2.max()
        #     data = (data_numpy.clip(0, cmax2)).astype(np.uint16)
        # else:
        #     data = (data_numpy.clip(0, cmax)).astype(np.uint16)
        #     cmax2 = cmax
        #
        # scale = float(255.0) / cmax2
        # if scale == 0:
        #     scale = 1
        # bytedata = (data - 0) * scale
        # data_numpy = (bytedata.clip(0, 255)).astype(np.uint8)
        # xiaofeng -- end of the trick

        # data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2GRAY)
        # data_numpy = data_numpy.reshape(data_numpy.shape[0], data_numpy.shape[1], -1)
        # if data_numpy.shape[2] == 1:
        #     # repeat 3 times to make fake RGB images
        #     data_numpy = np.tile(data_numpy, [1, 1, 3])

        # prepare for refuge2 final submission - 'Refuge2-Ext'
        # image size is 4288x2848
        if self.trial_enable:
            dh, dw = data_numpy.shape[:2]
            # crop left 300, right 500
            pw_l = 300
            pw_r = 500
            data_numpy = data_numpy[:, pw_l:(dw - pw_r), :]
            fovea[0] -= pw_l

        dh, dw = data_numpy.shape[:2]
        # TODO -- need to do sth for different image size
        if dh != self.image_size[1] or dw != self.image_size[0]:
            data_numpy = cv2.resize(data_numpy,
                                    dsize=(self.image_size[0],
                                           self.image_size[1]),
                                    interpolation=cv2.INTER_LINEAR)
            h_ratio = self.image_size[1] * 1.0 / dh
            w_ratio = self.image_size[0] * 1.0 / dw
            fovea[0] *= w_ratio
            fovea[1] *= h_ratio

        if self.is_train:
            if self.scale_factor > 0 and np.random.rand() > 0.5:
                sign = 1 if np.random.rand() > 0.5 else -1
                scale_factor = 1.0 + np.random.rand() * self.scale_factor * sign
                dh, dw = data_numpy.shape[:2]
                nh, nw = int(dh * scale_factor), int(dw * scale_factor)
                data_numpy = cv2.resize(data_numpy,
                                        dsize=(nw, nh),
                                        interpolation=cv2.INTER_LINEAR)
                fovea[0] *= (nw * 1.0 / dw)
                fovea[1] *= (nh * 1.0 / dh)
                if sign > 0:  # crop
                    ph = (nh - self.image_size[1]) // 2
                    pw = (nw - self.image_size[0]) // 2
                    data_numpy = data_numpy[ph:ph + self.image_size[1],
                                            pw:pw + self.image_size[0], :]
                    fovea[0] -= pw
                    fovea[1] -= ph
                else:  # pad
                    ph = (self.image_size[1] - nh) // 2
                    pw = (self.image_size[0] - nw) // 2
                    data_numpy = np.pad(data_numpy,
                                        ((ph, self.image_size[1] - nh - ph),
                                         (pw, self.image_size[0] - nw - pw),
                                         (0, 0)),
                                        mode='constant')
                    fovea[0] += pw
                    fovea[1] += ph

        image_size = self.image_size
        # crop image from center
        crop_size = self.crop_size
        pw = (image_size[0] - crop_size[0]) // 2
        ph = (image_size[1] - crop_size[1]) // 2
        data_numpy = data_numpy[ph:ph + crop_size[1], pw:pw + crop_size[0], :]
        image_size = crop_size
        fovea[0] -= pw
        fovea[1] -= ph

        # get image transform for augmentation
        c = image_size * 0.5
        r = 0
        s = 0

        if self.is_train:
            rf = self.rotation_factor
            sf = self.shift_factor
            sign = 1 if np.random.randn() > 0.5 else -1
            r = np.clip(sign*np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0
            sign = 1 if np.random.randn() > 0.5 else -1
            s = sign * np.random.rand() * sf

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                fovea = fliplr_coord(fovea, data_numpy.shape[1])
                c[0] = data_numpy.shape[1] - c[0] - 1

        # xiaofeng test, don't do affine always
        affine_applied = True
        if self.is_train and np.random.randn() > 0.9:
            r = 0
            s = 0
            affine_applied = False
            # print("ignore affine")

        trans = get_affine_transform(c, r, image_size, shift=s)
        input = cv2.warpAffine(data_numpy,
                               trans, (int(image_size[0]), int(image_size[1])),
                               flags=cv2.INTER_LINEAR)

        fovea = affine_transform(fovea, trans)

        if self.is_train:
            patch_size = self.patch_size.astype(np.int32)
            pw = np.random.randint(0, int(image_size[0] - patch_size[0] + 1))
            ph = np.random.randint(0, int(image_size[1] - patch_size[1] + 1))
            orig_fovea = copy.deepcopy(fovea)
            fovea[0] -= pw
            fovea[1] -= ph
            while (fovea[0] < 0 or fovea[1] < 0 or fovea[0] >= patch_size[0]
                   or fovea[1] >= patch_size[1]):
                pw = np.random.randint(0,
                                       int(image_size[0] - patch_size[0] + 1))
                ph = np.random.randint(0,
                                       int(image_size[1] - patch_size[1] + 1))
                fovea[0] = orig_fovea[0] - pw
                fovea[1] = orig_fovea[1] - ph
            input = input[ph:ph + patch_size[1], pw:pw + patch_size[0], :]
            # print("fovea, orig_fovea, pw, ph, input.shape: ", fovea, orig_fovea, pw, ph, input.shape)
            # print("fovea, pw, ph, input.shape: ", fovea, pw, ph, input.shape)

        try:
            if self.transform:
                input = self.transform(input)
        except:
            print("crash info: ", fovea, input.shape, affine_applied)

        # print("image: %s=d" %(idx))
        # print("fovea and size: ", fovea, input.shape)
        meta = {'fovea': fovea, 'image': filename}

        if self.is_train:
            heatmap_ds, heatmap_roi, roi_center, pixel_in_roi, offset_in_roi, fovea, fovea_in_roi, target_weight = \
                self.generate_target(input, fovea)

            # xiaofeng change
            if self.clahe_enaled:
                data_numpy = copy.deepcopy(input)
                b, g, r = cv2.split(data_numpy)
                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                b = clahe.apply(b)
                g = clahe.apply(g)
                r = clahe.apply(r)
                data_numpy = cv2.merge([b, g, r])

                input_roi = crop_and_resize(
                    data_numpy.unsqueeze(0),
                    torch.from_numpy(roi_center).unsqueeze(0),
                    output_size=2 * self.region_radius,
                    scale=1.0)[0]
            else:
                # crop ROI
                input_roi = crop_and_resize(
                    input.unsqueeze(0),
                    torch.from_numpy(roi_center).unsqueeze(0),
                    output_size=2 * self.region_radius,
                    scale=1.0)[0]

            heatmap_ds = torch.from_numpy(heatmap_ds).float()
            heatmap_roi = torch.from_numpy(heatmap_roi).float()

            roi_center = torch.from_numpy(roi_center).float()
            pixel_in_roi = torch.from_numpy(pixel_in_roi).float()
            offset_in_roi = torch.from_numpy(offset_in_roi).float()
            fovea = torch.from_numpy(fovea).float()
            fovea_in_roi = torch.from_numpy(fovea_in_roi).float()

            meta.update({
                'roi_center': roi_center,
                'pixel_in_roi': pixel_in_roi,
                'fovea_in_roi': fovea_in_roi
            })

            return input, input_roi, heatmap_ds, heatmap_roi, offset_in_roi, target_weight, meta
        else:
            return input, meta
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_dir = 'images.zip@' if self.data_format == 'zip' else ''
        image_file = osp.join(self.root, db_rec['source'], image_dir, 'images',
                              db_rec['image'])
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        joints = db_rec['joints_2d'].copy()  # [union_joints, 2]
        joints_vis = db_rec['joints_vis'].copy()[:, :2]  # [union_joints, 2]
        assert len(joints) == self.num_joints
        assert len(joints_vis) == self.num_joints

        # crop and scale according to ground truth
        center = np.array(db_rec['center']).copy()
        scale = np.array(db_rec['scale']).copy()
        rotation = 0

        if self.is_train and db_rec['source'] == 'mpii':
            sf = self.mpii_scale_factor
            rf = self.mpii_rotation_factor
            scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.mpii_flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.mpii_flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        visible_joints = joints_vis[:, 0] > 0
        if np.any(visible_joints):
            joints[visible_joints, :2] = affine_transform(
                joints[visible_joints, :2], trans)
            # zero_indices = np.any(
            #         np.concatenate((joints[:, :2]<0,
            #         joints[:, [0]] >= self.image_size[0],
            #         joints[:, [1]] >= self.image_size[1]),
            #         axis=1),
            #         axis=1)
            # joints_vis[zero_indices, :] = 0

        target, target_weight = self.generate_target(joints, joints_vis,
                                                     db_rec['source'])

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'scale': scale,
            'center': center,
            'rotation': rotation,
            'joints_2d': db_rec['joints_2d'],
            'joints_2d_transformed': joints,
            'joints_vis': joints_vis,
            'source': db_rec['source']
        }
        return input, target, target_weight, meta
    def __getitem__(self, idx):
        # 根据 idx 从db获取样本信息
        db_rec = copy.deepcopy(self.db[idx])
        # 获取图像名
        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        # 如果数据格式为zip则解压
        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        # 否则直接读取图像,获得像素值
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        # 转化为rgb格式
        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        # 如果读取到的数据不为numpy格式则报错
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        # 获取人体关键点坐标
        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        # 获取训练样本转化之后的center以及scale
        c = db_rec['center']
        s = db_rec['scale']

        # 如果训练样本中没有设置score,则加载该属性,并且设置为1
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        # 如果是进行训练
        if self.is_train:
            # 如果可见关键点大于人体一半关键点,并且生成的随机数小于self.prob_half_body=0.3
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                # 重新调整center、scale
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            # 缩放因子scale_factor=0.35,以及旋转因子rotation_factor=0.35
            sf = self.scale_factor
            rf = self.rotation_factor

            # s大小为[1-0.35=0.65, 1+0.35=1.35]之间
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            # r大小为[-2*45=95, 2*45=90]之间
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            # 进行数据水平翻转
            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        # 进行仿射变换,样本数据关键点发生角度旋转之后,每个像素也旋转到对应位置
        # 获得旋转矩阵
        trans = get_affine_transform(c, s, r, self.image_size)
        # 根据旋转矩阵进行仿射变换
        # 通过仿射变换截取实例图片
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        # 进行正则化,形状改变等
        if self.transform:
            input = self.transform(input)

        # 对人体关键点也进行仿射变换
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        # 获得ground truch,热图target[17, 64, 48],target_weight[17, 1]
        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Exemplo n.º 27
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        data_numpy = cv2.imread(
            image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1])
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']
        if 'interference' in db_rec.keys():
            interference_joints = db_rec['interference']
            interference_joints_vis = db_rec['interference_vis']
        else:
            interference_joints = [joints]
            interference_joints_vis = [joints_vis]

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1

        size = db_rec['obj_size']
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1
                for i in range(len(interference_joints)):
                    interference_joints[i], interference_joints_vis[
                        i] = fliplr_joints(interference_joints[i],
                                           interference_joints_vis[i],
                                           data_numpy.shape[1],
                                           self.flip_pairs)

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)
        # cv2.imwrite('img.jpg',input[:,:,::-1])
        if self.transform:
            input = self.transform(input)

        # relation_joints = []
        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)
        # all_points = np.asarray(np.where(target == 1))[::-1].transpose()
        # for p in all_points:
        #     relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1]
        # interference joints heatmaps
        inter_target = np.zeros_like(target)
        inter_target_weight = np.zeros_like(target_weight)
        for i in range(len(interference_joints)):
            inter_joints = interference_joints[i]
            inter_joints_vis = interference_joints_vis[i]
            for j in range(self.num_joints):
                if inter_joints_vis[j, 0] > 0.0:
                    inter_joints[j, 0:2] = affine_transform(
                        inter_joints[j, 0:2], trans)

            _inter_target, _inter_target_weight = self.generate_target(
                inter_joints, inter_joints_vis)

            inter_target = np.maximum(inter_target, _inter_target)
            inter_target_weight = np.maximum(inter_target_weight,
                                             _inter_target_weight)
        # if inter_target.max()>0:
        #     all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose()
        #     for p in all_points:
        #         relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0]

        # all_ins_target = np.maximum(inter_target, target)
        all_ins_target = np.maximum(inter_target * 0.5, target)
        # points = self.generate_candidate_points_from_heatmaps(inter_target)
        all_ins_target_weight = np.maximum(inter_target_weight, target_weight)
        # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255)
        # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255)
        # relation labels
        # relation_joints = np.asarray(relation_joints).reshape((-1,8))
        kpts_onehots = self.heatmap2onehot(target)
        # if kpts_onehots.shape[0]!=15:
        #     print(target.shape)
        # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target)

        # amaps = self.generate_association_map_from_labels(relation_joints)
        # max_points = self.num_joints * 5
        # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points
        # target_relation_points = np.zeros((max_points, 8))
        # target_amaps = np.zeros((max_points, max_points))
        # target_relation_points[:num_points] = relation_joints[:num_points]
        # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points]

        # heatmap labels
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)
        all_ins_target = torch.from_numpy(all_ins_target)
        all_ins_target_weight = torch.from_numpy(all_ins_target_weight)
        # target_amaps = torch.from_numpy(target_amaps)
        # target_aweights = torch.from_numpy(target_aweights)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score,
            # 'relation_joints': target_relation_points,
            # 'num_points': num_points,
            # 'association_maps': target_amaps,
            # 'association_weights': target_aweights,
            'interference_maps': inter_target,
            'kpt_cat_maps': kpts_onehots,
        }
        # return input, target, target_weight, meta
        return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
Exemplo n.º 28
0
    def __getitem__(self, idx):
        # 【c】db_rec是db的其中一个,是啥来着,一张图及其相关信息?
        db_rec = copy.deepcopy(self.db[idx])
        image_file = db_rec['image']  # db是数据集
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''  # 【c】总数?batch?

        if self.data_format == 'zip':  # 解压
            from utils import zipreader  # 【see】如果要用才导
            data_numpy = zipreader.imread(
                image_file,
                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)  # 【l】
        else:
            data_numpy = cv2.imread(
                image_file,
                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)  # 【】随便挑一个选项?

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy,
                                      cv2.COLOR_BGR2RGB)  # 【l】为啥要转,不是该rgb2bgr?

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            # 【see】语法不会报错但是完全影响了后面的结果,因此让其主动报错
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']  # 【c】3d?
        joints_vis = db_rec['joints_3d_vis']  # 【】之前那个joints_vis就是从这儿获取的吧?

        c = db_rec['center']
        s = db_rec['scale']  # 数据集标注的
        # 【】谁的score,还是说暂时只用来说明非空
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:  # 训练集才求半身
            if (np.sum(joints_vis[:, 0]) > self.
                    num_joints_half_body  # 【】第0列元素求和;那么就是第一列为0,1?那么就是所有的点都有?
                    and np.random.rand() <
                    self.prob_half_body):  # 【c】第二个是要采取半身的概率,为什么不在预处理做
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body  # 取到了上半身或下半身的点就将c和s替换掉原标注的

            sf = self.scale_factor
            rf = self.rotation_factor  # 缩放旋转因子
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf,
                            1 + sf)  # 【l】取最大?
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0  # 【c】

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]  # 将图像值水平翻转
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)  # GT坐标
                c[0] = data_numpy.shape[1] - c[0] - 1  # 最右-原==翻转过的因为宽比最右多1

        trans = get_affine_transform(
            c, s, r, self.image_size)  # 缩放旋转在transform里定义的,旋转空白怎么解决的?
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)  # 【l】应用缩放旋转变换,input的size也变了吧?

        if self.transform:
            input = self.transform(input)  # 【c】还有另外的变换?从哪儿传入的哪儿定义的?
            # cut_trans = self._cutpoint(8, 1, 1, point)
            # input = cut_trans(input)
            #
        for i in range(self.num_joints):
            if joints_vis[i,
                          0] > 0.0:  # 【c】第一列不是0,1?有权重?只对可见点执行?还是说vis是未缺失有标记的点?
                # 【】对GT坐标也执行,怎么上面那个用的是warpAffine有何不同?
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints,
                                                     joints_vis)  # 权重代表什么?
        # 【】上面都是在对numpy进行变换
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)  # 【c】

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }  # 【】有何用,日志?

        return input, target, target_weight, meta  # 【c】input是Tensor?
Exemplo n.º 29
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta
Exemplo n.º 30
0
    def __getitem__(self, index):
        i_name = self.data_image[index]
        i_meta = self.data_annot[index]


        # filename check
        meta_name = i_meta["filename"]
        assert meta_name == i_name


        # image load
        img_path = self.cfg.image / i_name
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


        # visibility and joints pose load
        meta_visible = sorted(i_meta["is_visible"].items(), key=lambda x: int(x[0]))
        meta_joints = sorted(i_meta["joint_pos"].items(), key=lambda x: int(x[0]))
        meta_xywh = i_meta["bbox"]
        meta_xyxy = [
            int(meta_xywh[0] - meta_xywh[2]*.5*1.2), 
            int(meta_xywh[1] - meta_xywh[3]*.5*1.1), 
            int(meta_xywh[0] + meta_xywh[2]*.5*1.3), 
            int(meta_xywh[1] + meta_xywh[3]*.5*1.2), 
        ]

        # meta_visible = np.array([v[1] for v in meta_visible if v[0] not in ["6", "7"]]).reshape(14, 1)
        # meta_joints = np.array([v[1] for v in meta_joints if v[0] not in [6, 7]]).reshape(14, 2)
        meta_visible = np.array([v[1] for v in meta_visible]).reshape(16, 1)
        meta_joints = np.array([v[1] for v in meta_joints]).reshape(16, 2)

        # debuging
        if self.cfg.debug:
            debug_visualize(self.cfg.debug_path, index, img, meta_visible, meta_joints, meta_xyxy, True, postfix="Original")


        if self.train:
            # lr flipping
            if self.cfg.flip:
                if np.random.random() <= 0.5:
                    img = img[:, ::-1, :]
                    meta_xywh[0] = img.shape[1] - 1 - meta_xywh[0]

                    meta_joints[:, 0] = img.shape[1] - 1 - meta_joints[:, 0]
                    for (q, w) in self.cfg.flip_pairs:
                        meta_joints_q, meta_joints_w = (
                            meta_joints[q, :].copy(),
                            meta_joints[w, :].copy(),
                        )
                        meta_joints[w, :], meta_joints[q, :] = meta_joints_q, meta_joints_w

                    # if self.cfg.debug:
                        # debug_visualize(self.cfg.debug_path, index, img, meta_visible, meta_joints, meta_xyxy, True, postfix="Flipped")

            # rotating and cropped
            if self.cfg.affine_transform:
                centre = np.array([
                    img.shape[1]/2.,
                    img.shape[0]/2.
                ])
                
                scale = np.array(img.shape[:2][::-1])
                rotation = 0
                
                if self.cfg.rotate:
                    if random.random() <= 0.6:
                        rotation = np.clip(np.random.randn()*self.cfg.rotate_factor, -self.cfg.rotate_factor, self.cfg.rotate_factor)
                
                trans = get_affine_transform(centre, scale, rotation, (img.shape[1], img.shape[0]))
                # cropped_img = cv2.warpAffine(img, trans, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR)
            
                for j in range(self.cfg.joints_num):
                    meta_joints[j, :2] = affine_transform(
                        meta_joints[j, :2], trans)

                # if self.cfg.debug:
                    # debug_visualize(self.cfg.debug_path, index, cropped_img, meta_visible, meta_joints, meta_xyxy, True, postfix="rotated")
            
            # normalize coordinates
            target_meta_joints, neck2toros_scaler, toros_centre = normalize_coords(meta_joints)
        
            visibility = np.array(
                [idx for idx, v in enumerate(meta_visible.reshape(-1, )) if v==1]
            )
            
            input_meta_joints = target_meta_joints.copy()

            
            if random.random() > 0.2:
                # random_mask_num = int(random.random() * (self.cfg.random_mask_num))
                random_mask_num = min(int(random.random() * (self.cfg.random_mask_num+1)), len(visibility))
                    
                if random_mask_num != 0:
                    random_mask = np.random.choice(visibility, random_mask_num, replace=False)
                    input_meta_joints[random_mask, :] = 0

            assert input_meta_joints.shape[0] == self.cfg.joints_num
            
            input_meta_joints = input_meta_joints.flatten()
            target_meta_joints = target_meta_joints.flatten()
            returns = {
                'inputs': torch.from_numpy(input_meta_joints.copy()).float(),
                'targets': torch.from_numpy(target_meta_joints.copy()).float(),
                'scaler': neck2toros_scaler,
                'centre': torch.from_numpy(toros_centre.copy()).float(),
                'bbox': torch.from_numpy(np.array(meta_xyxy).copy()).float(),
                'img_path': [str(img_path)],
            }
            
            return returns
        else:
            # normalize coordinates
            target_meta_joints, neck2toros_scaler, toros_centre = normalize_coords(meta_joints)
        
            visibility = np.array(
                [idx for idx, v in enumerate(meta_visible.reshape(-1, ))]
            )
            
            input_meta_joints = target_meta_joints.copy()
            random_mask_num = min(int(random.random() * (self.cfg.random_mask_num))+1, len(visibility))

            # if random.random() > 0.15:
                # random_mask_num = int(random.random() * (self.cfg.random_mask_num))
            random_mask_num = int(random.random() * 3) + 1
            if random_mask_num != 0:
                random_mask = np.random.choice(visibility, random_mask_num, replace=False)
                
                input_meta_joints[random_mask, :] = 0
                
            assert input_meta_joints.shape[0] == self.cfg.joints_num
            
            input_meta_joints = input_meta_joints.flatten()
            target_meta_joints = target_meta_joints.flatten()

            returns = {
                'inputs': torch.from_numpy(input_meta_joints.copy()).float(),
                'targets': torch.from_numpy(target_meta_joints.copy()).float(),
                'scaler': neck2toros_scaler,
                'centre': torch.from_numpy(toros_centre.copy()).float(),
                'bbox': torch.from_numpy(np.array(meta_xyxy).copy()).float(),
                'img_path': [str(img_path)],
            }
            
            return returns