示例#1
0
    def __getitem__(self, index):
        if index < 10 and self.split == 'train':
            self.idxs = np.random.choice(self.num_samples,
                                         self.num_samples,
                                         replace=False)
        img = self._load_image(index)
        gt_3d, pts, c, s = self._get_part_info(index)

        r = 0
        s = np.array([s, s])
        s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side)

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_h, self.opt.input_w])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_h, self.opt.input_w),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_h, self.opt.output_w])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        reg_target = np.zeros((self.num_joints, 1), dtype=np.float32)
        reg_ind = np.zeros((self.num_joints), dtype=np.int64)
        reg_mask = np.zeros((self.num_joints), dtype=np.uint8)
        pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32)
        for i in range(self.num_joints):
            pt = affine_transform(pts[i, :2], trans_output).astype(np.int32)
            if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \
              and pt[1] < self.opt.output_h:
                pts_crop[i] = pt
                out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss)
                reg_target[i] = pts[i, 2] / s[0]  # assert not fit_short
                reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \
                             pt[0] * self.num_joints + i # note transposed
                reg_mask[i] = 1

        meta = {
            'index': self.idxs[index],
            'center': c,
            'scale': s,
            'gt_3d': gt_3d,
            'pts_crop': pts_crop
        }

        ret = {
            'input': inp,
            'target': out,
            'meta': meta,
            'reg_target': reg_target,
            'reg_ind': reg_ind,
            'reg_mask': reg_mask
        }

        return ret
示例#2
0
    def __getitem__(self, index):
        img = self._load_image(index)
        _, pts, c, s = self._get_part_info(index)
        r = 0

        if self.split == 'train':
            sf = self.opt.scale
            rf = self.opt.rotate
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if np.random.random() <= 0.6 else 0
        s = min(s, max(img.shape[0], img.shape[1])) * 1.0
        s = np.array([s, s])
        s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side)

        flipped = (
            self.split == 'train' and np.random.random() < self.opt.flip)
        if flipped:
            img = img[:, ::-1, :]
            c[0] = img.shape[1] - 1 - c[0]
            pts[:, 0] = img.shape[1] - 1 - pts[:, 0]
            for e in self.shuffle_ref:
                pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_h, self.opt.input_w])
        inp = cv2.warpAffine(
            img,
            trans_input,
            (self.opt.input_h,
             self.opt.input_w),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_h, self.opt.output_w])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32)
        for i in range(self.num_joints):
            if pts[i, 0] > 0 or pts[i, 1] > 0:
                pts_crop[i] = affine_transform(pts[i], trans_output)
                out[i] = draw_gaussian(out[i], pts_crop[i], self.opt.hm_gauss)

        meta = {'index': index, 'center': c, 'scale': s,
                'pts_crop': pts_crop}
        return {'input': inp, 'target': out, 'meta': meta}
示例#3
0
    def __getitem__(self, index):
        if index == 0 and self.split == 'train':
            self.idxs = np.random.choice(self.num_samples,
                                         self.num_samples,
                                         replace=False)
        img = self._load_image(index)
        gt_3d, pts, c, s = self._get_part_info(index)

        r = 0

        if self.split == 'train':
            sf = self.opt.scale
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            # rf = self.opt.rotate
            # r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
            #    if np.random.random() <= 0.6 else 0

        flipped = (self.split == 'train'
                   and np.random.random() < self.opt.flip)
        if flipped:
            img = img[:, ::-1, :]
            c[0] = img.shape[1] - 1 - c[0]
            gt_3d[:, 0] *= -1
            pts[:, 0] = img.shape[1] - 1 - pts[:, 0]
            for e in self.shuffle_ref_3d:
                gt_3d[e[0]], gt_3d[e[1]] = gt_3d[e[1]].copy(), gt_3d[
                    e[0]].copy()
            for e in self.shuffle_ref:
                pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

        s = min(s, max(img.shape[0], img.shape[1])) * 1.0
        s = np.array([s, s])
        s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side)

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_w, self.opt.output_h])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        reg_target = np.zeros((self.num_joints, 1), dtype=np.float32)
        reg_ind = np.zeros((self.num_joints), dtype=np.int64)
        reg_mask = np.zeros((self.num_joints), dtype=np.uint8)
        pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32)
        for i in range(self.num_joints):
            pt = affine_transform(pts[i, :2], trans_output).astype(np.int32)
            if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \
              and pt[1] < self.opt.output_h:
                pts_crop[i] = pt
                out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss)
                reg_target[i] = pts[i, 2] / s[
                    0]  # assert not self.opt.fit_short_side
                reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \
                             pt[0] * self.num_joints + i # note transposed

                reg_mask[i] = 1

        meta = {
            'index': self.idxs[index],
            'center': c,
            'scale': s,
            'gt_3d': gt_3d,
            'pts_crop': pts_crop
        }

        return {
            'input': inp,
            'target': out,
            'meta': meta,
            'reg_target': reg_target,
            'reg_ind': reg_ind,
            'reg_mask': reg_mask
        }
示例#4
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                     dtype=np.float32)  # center
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1  # size divisible by 32
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        # dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        if self.opt.cat_spec_wh:
            cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                                   dtype=np.float32)
            cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                     dtype=np.uint8)

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                if self.opt.cat_spec_wh:
                    cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                    cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                # if self.opt.dense_wh:
                #     draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        # if self.opt.dense_wh:
        #     hm_a = hm.max(axis=0, keepdims=True)
        #     dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
        #     ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
        #     del ret['wh']
        if self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                             np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
示例#5
0
    def __getitem__(self, index):
        img_id = self.images[index]
        image = cv2.imread(
            os.path.join(self.img_dir,
                         self.coco.loadImgs(ids=[img_id])[0]['file_name']))
        annotations = self.coco.loadAnns(ids=self.coco.getAnnIds(
            imgIds=[img_id]))

        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations])
        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([0])
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        sorted_inds = np.argsort(labels, axis=0)
        bboxes = bboxes[sorted_inds]
        labels = labels[sorted_inds]

        # random crop (for training) or center crop (for validation)
        if self.split == 'train':
            image, bboxes = random_crop(image,
                                        bboxes,
                                        random_scales=self.rand_scales,
                                        new_size=self.img_size,
                                        padding=self.padding)
        else:
            image, border, offset = crop_image(
                image,
                center=[image.shape[0] // 2, image.shape[1] // 2],
                new_size=[max(image.shape[0:2]),
                          max(image.shape[0:2])])
            bboxes[:, 0::2] += border[2]
            bboxes[:, 1::2] += border[0]

        # resize image and bbox
        height, width = image.shape[:2]
        image = cv2.resize(image, (self.img_size['w'], self.img_size['h']))
        bboxes[:, 0::2] *= self.img_size['w'] / width
        bboxes[:, 1::2] *= self.img_size['h'] / height

        # discard non-valid bboxes
        bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1)
        bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1)
        keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0,
                                   (bboxes[:, 3] - bboxes[:, 1]) > 0)
        bboxes = bboxes[keep_inds]
        labels = labels[keep_inds]

        # randomly flip image and bboxes
        if self.split == 'train' and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            bboxes[:, [0, 2]] = image.shape[1] - bboxes[:, [2, 0]] - 1

        # # ----------------------------- debug -----------------------------------------
        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.show()
        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # for lab, bbox in zip(labels, bboxes):
        #   plt.gca().add_patch(Rectangle(bbox[:2], bbox[2] - bbox[0], bbox[3] - bbox[1],
        #                                 linewidth=1, edgecolor='r', facecolor='none'))
        #   plt.text(bbox[0], bbox[1], self.class_name[lab + 1],
        #            bbox=dict(facecolor='b', alpha=0.5), fontsize=7, color='w')
        # plt.show()
        # # -----------------------------------------------------------------------------

        image = image.astype(np.float32) / 255.

        # randomly change color and lighting
        if self.split == 'train':
            color_jittering_(self.data_rng, image)
            lighting_(self.data_rng, image, 0.1, self.eig_val, self.eig_vec)

        image -= self.mean
        image /= self.std
        image = image.transpose((2, 0, 1))  # [H, W, C] to [C, H, W]

        hmap_tl = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)
        hmap_br = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)

        regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32)
        regs_br = np.zeros((self.max_objs, 2), dtype=np.float32)

        inds_tl = np.zeros((self.max_objs, ), dtype=np.int64)
        inds_br = np.zeros((self.max_objs, ), dtype=np.int64)

        num_objs = np.array(min(bboxes.shape[0], self.max_objs))
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        ind_masks[:num_objs] = 1

        for i, ((xtl, ytl, xbr, ybr), label) in enumerate(zip(bboxes, labels)):
            fxtl = (xtl * self.fmap_size['w'] / self.img_size['w'])
            fytl = (ytl * self.fmap_size['h'] / self.img_size['h'])
            fxbr = (xbr * self.fmap_size['w'] / self.img_size['w'])
            fybr = (ybr * self.fmap_size['h'] / self.img_size['h'])

            ixtl = int(fxtl)
            iytl = int(fytl)
            ixbr = int(fxbr)
            iybr = int(fybr)

            if self.gaussian:
                width = xbr - xtl
                height = ybr - ytl

                width = math.ceil(width * self.fmap_size['w'] /
                                  self.img_size['w'])
                height = math.ceil(height * self.fmap_size['h'] /
                                   self.img_size['h'])

                radius = max(
                    0, int(gaussian_radius((height, width),
                                           self.gaussian_iou)))

                draw_gaussian(hmap_tl[label], [ixtl, iytl], radius)
                draw_gaussian(hmap_br[label], [ixbr, iybr], radius)
            else:
                hmap_tl[label, iytl, ixtl] = 1
                hmap_br[label, iybr, ixbr] = 1

            regs_tl[i, :] = [fxtl - ixtl, fytl - iytl]
            regs_br[i, :] = [fxbr - ixbr, fybr - iybr]
            inds_tl[i] = iytl * self.fmap_size['w'] + ixtl
            inds_br[i] = iybr * self.fmap_size['w'] + ixbr

        return {
            'image': image,
            'hmap_tl': hmap_tl,
            'hmap_br': hmap_br,
            'regs_tl': regs_tl,
            'regs_br': regs_br,
            'inds_tl': inds_tl,
            'inds_br': inds_br,
            'ind_masks': ind_masks
        }
示例#6
0
    def __getitem__(self, index):
        ann = self.coco.loadAnns(ids=[self.idxs[index]])[0]
        clean_bbox = self.clean_bbox[index]
        img_info = self.coco.loadImgs(ids=[ann['image_id']])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)
        ids_all = self.coco.getAnnIds(imgIds=[ann['image_id']])
        ann_all = self.coco.loadAnns(ids=ids_all)
        pts_all = []
        for k in range(len(ann_all)):
            pts_k = np.array(ann_all[k]['keypoints'])
            pts_k = pts_k.reshape(self.num_joints, 3).astype(np.float32)
            pts_all.append(pts_k.copy())

        pts = np.array(ann['keypoints']).reshape(self.num_joints,
                                                 3).astype(np.float32)

        c, s = self._box2cs(clean_bbox)
        r = 0

        if self.split == 'train':
            sf = self.opt.scale
            rf = self.opt.rotate
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if np.random.random() <= 0.6 else 0

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_w, self.opt.output_h])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        for i in range(self.num_joints):
            if pts[i, 2] > 0:
                pt = affine_transform(pts[i], trans_output)
                out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss)
        '''
    out_all = np.zeros((self.num_joints, self.opt.output_w, self.opt.output_h), 
                       dtype=np.float32)
    for k in range(len(pts_all)):
      pts = pts_all[k]
      for i in range(self.num_joints):
        if pts[i, 2] > 0:
          pt = affine_transform(pts[i], trans_output)
          out_all[i] = np.maximum(
            out_all[i], draw_gaussian(out_all[i], pt, self.opt.hm_gauss))
    '''

        if self.split == 'train':
            if np.random.random() < self.opt.flip:
                inp = flip(inp)
                out = shuffle_lr(flip(out), self.shuffle_ref)
                # out_all = shuffle_lr(flip(out_all), self.shuffle_ref)

        meta = {
            'index': index,
            'id': self.idxs[index],
            'center': c,
            'scale': s,
            'rotate': r,
            'image_id': ann['image_id'],
            'vis': pts[:, 2],
            'score': 1
        }

        return {'input': inp, 'target': out, 'meta': meta}
示例#7
0
    def __get_rotated_coco(self, img, anns, num_objs):
        kpts = []
        kpts_tmp = []
        for k in range(num_objs):
            ann = anns[k]
            ann_rotated = get_annotation_with_angle(ann)
            ann_rotated[4] = ann_rotated[4]
            rot = rotate_bbox(*ann_rotated)
            kpts.extend([Keypoint(*x) for x in rot])

            if self.num_keypoints > 0:
                if 'keypoints' not in ann:
                    ann['keypoints'] = np.zeros((3 * self.num_keypoints, ))

                kpt = [
                    Keypoint(*x)
                    for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
                ]
                kpts_tmp.extend(kpt)

        idx_boxes = len(kpts)
        if self.num_keypoints > 0:
            kpts.extend(kpts_tmp)

        kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts)
        else:
            img_aug, kpts_aug = np.copy(img), kpts.copy()

        img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 3), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros(
            (self.max_detections, 7 if self.use_rotated_boxes else 6),
            dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        if self.num_keypoints > 0:
            kp = np.zeros((self.max_detections, self.num_keypoints * 2),
                          dtype=np.float32)
            gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2),
                             dtype=np.float32)
            kp_reg_mask = np.zeros(
                (self.max_detections, self.num_keypoints * 2), dtype=np.uint8)

        kpts_aug = self.resize_out(keypoints=kpts_aug)

        box_kpts_aug, kpts_aug = kpts_aug[:idx_boxes], kpts_aug[idx_boxes:]
        assert num_objs == len(box_kpts_aug) // 4

        for k in range(num_objs):
            ann = anns[k]
            points = []
            for p in box_kpts_aug[k * 4:k * 4 + 4]:
                box_kp = list(
                    (np.clip(p.x, 0,
                             output_w - 1), np.clip(p.y, 0, output_h - 1)))
                points.append(box_kp)

            points = np.array(points).astype(np.float32)
            cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points)

            if cv_wh[0] == 0 or cv_wh[1] == 0:
                continue

            cx, cy, w, h, angle = get_annotation_with_angle({
                'rbbox':
                np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle])
            })
            ct = np.array((cx, cy))

            cls_id = int(self.cat_mapping[ann['category_id']])

            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = w, h, angle
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id])

                if self.num_keypoints > 0:
                    valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1]
                    for i, p in enumerate(
                            kpts_aug[k * self.num_keypoints:k *
                                     self.num_keypoints + self.num_keypoints]):
                        kp[k][i * 2] = p.x - ct_int[0]
                        kp[k][i * 2 + 1] = p.y - ct_int[1]

                        is_valid = valid[i] == 2 and not p.is_out_of_image(
                            (output_w, output_w))
                        kp_reg_mask[k, i * 2] = int(is_valid)
                        kp_reg_mask[k, i * 2 + 1] = int(is_valid)
                        gt_kp[k][i] = p.x, p.y

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del box_kpts_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 7), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        if self.num_keypoints > 0:
            ret['kps'] = kp
            ret['gt_kps'] = gt_kp
            ret['kp_reg_mask'] = kp_reg_mask
            del kpts_aug

        return ret
示例#8
0
    def __get_default_coco(self, img, anns, num_objs):
        boxes = []
        if self.num_keypoints > 0:
            kpts = []

        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            boxes.append(BoundingBox(*bbox))

            if self.num_keypoints > 0:
                if 'keypoints' not in ann:
                    ann['keypoints'] = np.zeros((3 * self.num_keypoints, ))

                kpt = [
                    Keypoint(*x)
                    for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
                ]
                kpts.extend(kpt)

        bbs = BoundingBoxesOnImage(boxes, shape=img.shape)

        if self.num_keypoints > 0:
            kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            if self.num_keypoints > 0:
                img_aug, bbs_aug, kpts_aug = self.augmentation(
                    image=img, bounding_boxes=bbs, keypoints=kpts)
            else:
                img_aug, bbs_aug = self.augmentation(image=img,
                                                     bounding_boxes=bbs)
        else:
            if self.num_keypoints > 0:
                kpts_aug = kpts.copy()

            img_aug, bbs_aug = np.copy(img), bbs.copy()

        if self.num_keypoints > 0:
            img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug,
                                                     bounding_boxes=bbs_aug,
                                                     keypoints=kpts_aug)
        else:
            img_aug, bbs_aug = self.resize(image=img_aug,
                                           bounding_boxes=bbs_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 2), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros((self.max_detections, 6), dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        if self.num_keypoints > 0:
            kp = np.zeros((self.max_detections, self.num_keypoints * 2),
                          dtype=np.float32)
            gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2),
                             dtype=np.float32)
            kp_reg_mask = np.zeros(
                (self.max_detections, self.num_keypoints * 2), dtype=np.uint8)

            bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug,
                                                keypoints=kpts_aug)
        else:
            bbs_aug = self.resize_out(bounding_boxes=bbs_aug)

        for k in range(num_objs):
            ann = anns[k]
            bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h))
            bbox = np.array(
                [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2])

            cls_id = int(self.cat_mapping[ann['category_id']])

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

                if self.num_keypoints > 0:
                    valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1]
                    for i, p in enumerate(
                            kpts_aug[k * self.num_keypoints:k *
                                     self.num_keypoints + self.num_keypoints]):
                        kp[k][i * 2] = p.x - ct_int[0]
                        kp[k][i * 2 + 1] = p.y - ct_int[1]

                        is_valid = valid[i] == 2 and not p.is_out_of_image(
                            (output_w, output_w))
                        kp_reg_mask[k, i * 2] = int(is_valid)
                        kp_reg_mask[k, i * 2 + 1] = int(is_valid)
                        gt_kp[k][i] = p.x, p.y

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del bbs
        del bbs_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 6), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        if self.num_keypoints > 0:
            ret['kps'] = kp
            ret['gt_kps'] = gt_kp
            ret['kp_reg_mask'] = kp_reg_mask
            del kpts_aug

        return ret
示例#9
0
    def __getitem__(self, index):
        img_id = self.img_paths[index]
        img_set, img_vid, img_name = img_id.split("_", 2)
        img_name = img_name.replace("txt", "jpg")
        img_path = os.path.join(self.img_dir, img_set, img_vid)
        img_rgb = cv2.imread(os.path.join(img_path, "visible", img_name),
                             cv2.IMREAD_COLOR)
        img_ir = cv2.imread(os.path.join(img_path, "lwir", img_name),
                            cv2.IMREAD_GRAYSCALE)

        with open(os.path.join(self.annot_path,
                               self.img_paths[index])) as annot_file:
            annot_data = [line.rstrip('\n') for line in annot_file][1:]

        bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
        if len(annot_data) != 0:
            bboxes = bboxes.repeat(len(annot_data), axis=0)
            for i in range(len(annot_data)):
                line_data = annot_data[i].split()
                label = line_data[0]
                if self.split == "train":
                    if label not in ["person", "person?", "people"]:
                        continue
                elif label != "person":
                    continue
                bboxes[i, :] = list(map(int, line_data[1:5]))

        bboxes[:, 2:] += bboxes[:, :2]

        # resize image and bbox
        height, width = img_rgb.shape[:2]
        img_rgb = cv2.resize(img_rgb, (self.img_size['w'], self.img_size['h']))
        img_ir = cv2.resize(img_ir, (self.img_size['w'], self.img_size['h']))
        img_ir = np.expand_dims(img_ir, axis=2)
        bboxes[:, 0::2] *= self.img_size['w'] / width
        bboxes[:, 1::2] *= self.img_size['h'] / height

        # discard non-valid bboxes
        bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1)
        bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1)
        keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0,
                                   (bboxes[:, 3] - bboxes[:, 1]) > 0)
        bboxes = bboxes[keep_inds]

        # randomly flip image and bboxes
        if self.split == 'train' and np.random.uniform() > 0.5:
            img_rgb[:] = img_rgb[:, ::-1, :]
            img_ir[:] = img_ir[:, ::-1, :]
            bboxes[:, [0, 2]] = img_rgb.shape[1] - bboxes[:, [2, 0]] - 1

        img_rgb = img_rgb.astype(np.float32) / 255.
        img_ir = img_ir.astype(np.float32) / 255.

        img_rgb -= self.mean[0, 0, :3]
        img_rgb /= self.std[0, 0, :3]
        img_ir -= self.mean[0, 0, 3]
        img_ir /= self.std[0, 0, 3]
        img_rgb = img_rgb.transpose((2, 0, 1))  # [H, W, C] to [C, H, W]
        img_ir = img_ir.transpose((2, 0, 1))

        hmap_tl = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)
        hmap_br = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)

        regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32)
        regs_br = np.zeros((self.max_objs, 2), dtype=np.float32)

        inds_tl = np.zeros((self.max_objs, ), dtype=np.int64)
        inds_br = np.zeros((self.max_objs, ), dtype=np.int64)

        num_objs = np.array(min(bboxes.shape[0], self.max_objs))
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        ind_masks[:num_objs] = 1

        for i, (xtl, ytl, xbr, ybr) in enumerate(bboxes):
            fxtl = (xtl * self.fmap_size['w'] / self.img_size['w'])
            fytl = (ytl * self.fmap_size['h'] / self.img_size['h'])
            fxbr = (xbr * self.fmap_size['w'] / self.img_size['w'])
            fybr = (ybr * self.fmap_size['h'] / self.img_size['h'])

            ixtl = int(fxtl)
            iytl = int(fytl)
            ixbr = int(fxbr)
            iybr = int(fybr)

            if self.gaussian:
                width = xbr - xtl
                height = ybr - ytl

                width = math.ceil(width * self.fmap_size['w'] /
                                  self.img_size['w'])
                height = math.ceil(height * self.fmap_size['h'] /
                                   self.img_size['h'])

                radius = max(
                    0, int(gaussian_radius((height, width),
                                           self.gaussian_iou)))

                draw_gaussian(hmap_tl[0], [ixtl, iytl], radius)
                draw_gaussian(hmap_br[0], [ixbr, iybr], radius)
            else:
                hmap_tl[0, iytl, ixtl] = 1
                hmap_br[0, iybr, ixbr] = 1

            regs_tl[i, :] = [fxtl - ixtl, fytl - iytl]
            regs_br[i, :] = [fxbr - ixbr, fybr - iybr]
            inds_tl[i] = iytl * self.fmap_size['w'] + ixtl
            inds_br[i] = iybr * self.fmap_size['w'] + ixbr

        return {
            'img_rgb': img_rgb,
            'img_ir': img_ir,
            'hmap_tl': hmap_tl,
            'hmap_br': hmap_br,
            'regs_tl': regs_tl,
            'regs_br': regs_br,
            'inds_tl': inds_tl,
            'inds_br': inds_br,
            'ind_masks': ind_masks
        }
示例#10
0
    def __get_rotated_coco(self, img, anns, num_objs):
        kpts = []
        for k in range(num_objs):
            ann = get_annotation_with_angle(anns[k])
            ann[4] = np.radians(ann[4])
            rot = rotate_bbox(*ann)
            kpts.extend([Keypoint(*x) for x in rot])

        kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts)
        else:
            img_aug, kpts_aug = np.copy(img), kpts.copy()

        img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 3), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros(
            (self.max_detections, 7 if self.use_rotated_boxes else 6),
            dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        kpts_aug = self.resize_out(keypoints=kpts_aug)
        assert num_objs == len(kpts_aug) // 4

        for k in range(num_objs):
            ann = anns[k]
            points = []
            for p in kpts_aug[k * 4:k * 4 + 4]:
                kp = list(
                    (np.clip(p.x, 0,
                             output_w - 1), np.clip(p.y, 0, output_h - 1)))
                points.append(kp)

            points = np.array(points).astype(np.float32)
            cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points)

            if cv_wh[0] == 0 or cv_wh[1] == 0:
                continue

            cx, cy, w, h, angle = get_annotation_with_angle({
                'rbbox':
                np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle])
            })
            ct = np.array((cx, cy))

            cls_id = int(self.cat_mapping[ann['category_id']])

            if h > 0 and w > 0:
                angle = np.radians(angle)
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = w, h, angle
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id])

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del kpts
        del kpts_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 7), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        return ret
示例#11
0
    def __get_default_coco(self, img, anns, num_objs):
        boxes = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            boxes.append(BoundingBox(*bbox))

        bbs = BoundingBoxesOnImage(boxes, shape=img.shape)

        if self.augmentation is not None:
            img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs)
        else:
            img_aug, bbs_aug = np.copy(img), bbs.copy()

        img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros((self.max_detections, num_classes), dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        bbs_aug = self.resize_out(bounding_boxes=bbs_aug)

        for k in range(num_objs):
            ann = anns[k]
            bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h))
            bbox = np.array(
                [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2])

            cls_id = int(self.cat_mapping[ann['category_id']])

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del bbs
        del bbs_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 6), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        return ret