def __getitem__(self, index): if index < 10 and self.split == 'train': self.idxs = np.random.choice(self.num_samples, self.num_samples, replace=False) img = self._load_image(index) gt_3d, pts, c, s = self._get_part_info(index) r = 0 s = np.array([s, s]) s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side) trans_input = get_affine_transform( c, s, r, [self.opt.input_h, self.opt.input_w]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_h, self.opt.input_w), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_h, self.opt.output_w]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) reg_target = np.zeros((self.num_joints, 1), dtype=np.float32) reg_ind = np.zeros((self.num_joints), dtype=np.int64) reg_mask = np.zeros((self.num_joints), dtype=np.uint8) pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32) for i in range(self.num_joints): pt = affine_transform(pts[i, :2], trans_output).astype(np.int32) if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \ and pt[1] < self.opt.output_h: pts_crop[i] = pt out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss) reg_target[i] = pts[i, 2] / s[0] # assert not fit_short reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \ pt[0] * self.num_joints + i # note transposed reg_mask[i] = 1 meta = { 'index': self.idxs[index], 'center': c, 'scale': s, 'gt_3d': gt_3d, 'pts_crop': pts_crop } ret = { 'input': inp, 'target': out, 'meta': meta, 'reg_target': reg_target, 'reg_ind': reg_ind, 'reg_mask': reg_mask } return ret
def __getitem__(self, index): img = self._load_image(index) _, pts, c, s = self._get_part_info(index) r = 0 if self.split == 'train': sf = self.opt.scale rf = self.opt.rotate s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if np.random.random() <= 0.6 else 0 s = min(s, max(img.shape[0], img.shape[1])) * 1.0 s = np.array([s, s]) s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side) flipped = ( self.split == 'train' and np.random.random() < self.opt.flip) if flipped: img = img[:, ::-1, :] c[0] = img.shape[1] - 1 - c[0] pts[:, 0] = img.shape[1] - 1 - pts[:, 0] for e in self.shuffle_ref: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() trans_input = get_affine_transform( c, s, r, [self.opt.input_h, self.opt.input_w]) inp = cv2.warpAffine( img, trans_input, (self.opt.input_h, self.opt.input_w), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_h, self.opt.output_w]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32) for i in range(self.num_joints): if pts[i, 0] > 0 or pts[i, 1] > 0: pts_crop[i] = affine_transform(pts[i], trans_output) out[i] = draw_gaussian(out[i], pts_crop[i], self.opt.hm_gauss) meta = {'index': index, 'center': c, 'scale': s, 'pts_crop': pts_crop} return {'input': inp, 'target': out, 'meta': meta}
def __getitem__(self, index): if index == 0 and self.split == 'train': self.idxs = np.random.choice(self.num_samples, self.num_samples, replace=False) img = self._load_image(index) gt_3d, pts, c, s = self._get_part_info(index) r = 0 if self.split == 'train': sf = self.opt.scale s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # rf = self.opt.rotate # r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ # if np.random.random() <= 0.6 else 0 flipped = (self.split == 'train' and np.random.random() < self.opt.flip) if flipped: img = img[:, ::-1, :] c[0] = img.shape[1] - 1 - c[0] gt_3d[:, 0] *= -1 pts[:, 0] = img.shape[1] - 1 - pts[:, 0] for e in self.shuffle_ref_3d: gt_3d[e[0]], gt_3d[e[1]] = gt_3d[e[1]].copy(), gt_3d[ e[0]].copy() for e in self.shuffle_ref: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() s = min(s, max(img.shape[0], img.shape[1])) * 1.0 s = np.array([s, s]) s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side) trans_input = get_affine_transform( c, s, r, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_w, self.opt.output_h]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) reg_target = np.zeros((self.num_joints, 1), dtype=np.float32) reg_ind = np.zeros((self.num_joints), dtype=np.int64) reg_mask = np.zeros((self.num_joints), dtype=np.uint8) pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32) for i in range(self.num_joints): pt = affine_transform(pts[i, :2], trans_output).astype(np.int32) if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \ and pt[1] < self.opt.output_h: pts_crop[i] = pt out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss) reg_target[i] = pts[i, 2] / s[ 0] # assert not self.opt.fit_short_side reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \ pt[0] * self.num_joints + i # note transposed reg_mask[i] = 1 meta = { 'index': self.idxs[index], 'center': c, 'scale': s, 'gt_3d': gt_3d, 'pts_crop': pts_crop } return { 'input': inp, 'target': out, 'meta': meta, 'reg_target': reg_target, 'reg_ind': reg_ind, 'reg_mask': reg_mask }
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) # center if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 # size divisible by 32 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) # dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) if self.opt.cat_spec_wh: cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if self.opt.cat_spec_wh: cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 # if self.opt.dense_wh: # draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh } # if self.opt.dense_wh: # hm_a = hm.max(axis=0, keepdims=True) # dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) # ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) # del ret['wh'] if self.opt.cat_spec_wh: ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] image = cv2.imread( os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])) annotations = self.coco.loadAnns(ids=self.coco.getAnnIds( imgIds=[img_id])) labels = np.array( [self.cat_ids[anno['category_id']] for anno in annotations]) bboxes = np.array([anno['bbox'] for anno in annotations]) if len(bboxes) == 0: bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) labels = np.array([0]) bboxes[:, 2:] += bboxes[:, :2] # xywh to xyxy sorted_inds = np.argsort(labels, axis=0) bboxes = bboxes[sorted_inds] labels = labels[sorted_inds] # random crop (for training) or center crop (for validation) if self.split == 'train': image, bboxes = random_crop(image, bboxes, random_scales=self.rand_scales, new_size=self.img_size, padding=self.padding) else: image, border, offset = crop_image( image, center=[image.shape[0] // 2, image.shape[1] // 2], new_size=[max(image.shape[0:2]), max(image.shape[0:2])]) bboxes[:, 0::2] += border[2] bboxes[:, 1::2] += border[0] # resize image and bbox height, width = image.shape[:2] image = cv2.resize(image, (self.img_size['w'], self.img_size['h'])) bboxes[:, 0::2] *= self.img_size['w'] / width bboxes[:, 1::2] *= self.img_size['h'] / height # discard non-valid bboxes bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1) keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0, (bboxes[:, 3] - bboxes[:, 1]) > 0) bboxes = bboxes[keep_inds] labels = labels[keep_inds] # randomly flip image and bboxes if self.split == 'train' and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] bboxes[:, [0, 2]] = image.shape[1] - bboxes[:, [2, 0]] - 1 # # ----------------------------- debug ----------------------------------------- # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # plt.show() # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # for lab, bbox in zip(labels, bboxes): # plt.gca().add_patch(Rectangle(bbox[:2], bbox[2] - bbox[0], bbox[3] - bbox[1], # linewidth=1, edgecolor='r', facecolor='none')) # plt.text(bbox[0], bbox[1], self.class_name[lab + 1], # bbox=dict(facecolor='b', alpha=0.5), fontsize=7, color='w') # plt.show() # # ----------------------------------------------------------------------------- image = image.astype(np.float32) / 255. # randomly change color and lighting if self.split == 'train': color_jittering_(self.data_rng, image) lighting_(self.data_rng, image, 0.1, self.eig_val, self.eig_vec) image -= self.mean image /= self.std image = image.transpose((2, 0, 1)) # [H, W, C] to [C, H, W] hmap_tl = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) hmap_br = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32) regs_br = np.zeros((self.max_objs, 2), dtype=np.float32) inds_tl = np.zeros((self.max_objs, ), dtype=np.int64) inds_br = np.zeros((self.max_objs, ), dtype=np.int64) num_objs = np.array(min(bboxes.shape[0], self.max_objs)) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) ind_masks[:num_objs] = 1 for i, ((xtl, ytl, xbr, ybr), label) in enumerate(zip(bboxes, labels)): fxtl = (xtl * self.fmap_size['w'] / self.img_size['w']) fytl = (ytl * self.fmap_size['h'] / self.img_size['h']) fxbr = (xbr * self.fmap_size['w'] / self.img_size['w']) fybr = (ybr * self.fmap_size['h'] / self.img_size['h']) ixtl = int(fxtl) iytl = int(fytl) ixbr = int(fxbr) iybr = int(fybr) if self.gaussian: width = xbr - xtl height = ybr - ytl width = math.ceil(width * self.fmap_size['w'] / self.img_size['w']) height = math.ceil(height * self.fmap_size['h'] / self.img_size['h']) radius = max( 0, int(gaussian_radius((height, width), self.gaussian_iou))) draw_gaussian(hmap_tl[label], [ixtl, iytl], radius) draw_gaussian(hmap_br[label], [ixbr, iybr], radius) else: hmap_tl[label, iytl, ixtl] = 1 hmap_br[label, iybr, ixbr] = 1 regs_tl[i, :] = [fxtl - ixtl, fytl - iytl] regs_br[i, :] = [fxbr - ixbr, fybr - iybr] inds_tl[i] = iytl * self.fmap_size['w'] + ixtl inds_br[i] = iybr * self.fmap_size['w'] + ixbr return { 'image': image, 'hmap_tl': hmap_tl, 'hmap_br': hmap_br, 'regs_tl': regs_tl, 'regs_br': regs_br, 'inds_tl': inds_tl, 'inds_br': inds_br, 'ind_masks': ind_masks }
def __getitem__(self, index): ann = self.coco.loadAnns(ids=[self.idxs[index]])[0] clean_bbox = self.clean_bbox[index] img_info = self.coco.loadImgs(ids=[ann['image_id']])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) ids_all = self.coco.getAnnIds(imgIds=[ann['image_id']]) ann_all = self.coco.loadAnns(ids=ids_all) pts_all = [] for k in range(len(ann_all)): pts_k = np.array(ann_all[k]['keypoints']) pts_k = pts_k.reshape(self.num_joints, 3).astype(np.float32) pts_all.append(pts_k.copy()) pts = np.array(ann['keypoints']).reshape(self.num_joints, 3).astype(np.float32) c, s = self._box2cs(clean_bbox) r = 0 if self.split == 'train': sf = self.opt.scale rf = self.opt.rotate s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if np.random.random() <= 0.6 else 0 trans_input = get_affine_transform( c, s, r, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_w, self.opt.output_h]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) for i in range(self.num_joints): if pts[i, 2] > 0: pt = affine_transform(pts[i], trans_output) out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss) ''' out_all = np.zeros((self.num_joints, self.opt.output_w, self.opt.output_h), dtype=np.float32) for k in range(len(pts_all)): pts = pts_all[k] for i in range(self.num_joints): if pts[i, 2] > 0: pt = affine_transform(pts[i], trans_output) out_all[i] = np.maximum( out_all[i], draw_gaussian(out_all[i], pt, self.opt.hm_gauss)) ''' if self.split == 'train': if np.random.random() < self.opt.flip: inp = flip(inp) out = shuffle_lr(flip(out), self.shuffle_ref) # out_all = shuffle_lr(flip(out_all), self.shuffle_ref) meta = { 'index': index, 'id': self.idxs[index], 'center': c, 'scale': s, 'rotate': r, 'image_id': ann['image_id'], 'vis': pts[:, 2], 'score': 1 } return {'input': inp, 'target': out, 'meta': meta}
def __get_rotated_coco(self, img, anns, num_objs): kpts = [] kpts_tmp = [] for k in range(num_objs): ann = anns[k] ann_rotated = get_annotation_with_angle(ann) ann_rotated[4] = ann_rotated[4] rot = rotate_bbox(*ann_rotated) kpts.extend([Keypoint(*x) for x in rot]) if self.num_keypoints > 0: if 'keypoints' not in ann: ann['keypoints'] = np.zeros((3 * self.num_keypoints, )) kpt = [ Keypoint(*x) for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2] ] kpts_tmp.extend(kpt) idx_boxes = len(kpts) if self.num_keypoints > 0: kpts.extend(kpts_tmp) kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts) else: img_aug, kpts_aug = np.copy(img), kpts.copy() img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 3), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros( (self.max_detections, 7 if self.use_rotated_boxes else 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) if self.num_keypoints > 0: kp = np.zeros((self.max_detections, self.num_keypoints * 2), dtype=np.float32) gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2), dtype=np.float32) kp_reg_mask = np.zeros( (self.max_detections, self.num_keypoints * 2), dtype=np.uint8) kpts_aug = self.resize_out(keypoints=kpts_aug) box_kpts_aug, kpts_aug = kpts_aug[:idx_boxes], kpts_aug[idx_boxes:] assert num_objs == len(box_kpts_aug) // 4 for k in range(num_objs): ann = anns[k] points = [] for p in box_kpts_aug[k * 4:k * 4 + 4]: box_kp = list( (np.clip(p.x, 0, output_w - 1), np.clip(p.y, 0, output_h - 1))) points.append(box_kp) points = np.array(points).astype(np.float32) cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points) if cv_wh[0] == 0 or cv_wh[1] == 0: continue cx, cy, w, h, angle = get_annotation_with_angle({ 'rbbox': np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle]) }) ct = np.array((cx, cy)) cls_id = int(self.cat_mapping[ann['category_id']]) if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = w, h, angle ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id]) if self.num_keypoints > 0: valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1] for i, p in enumerate( kpts_aug[k * self.num_keypoints:k * self.num_keypoints + self.num_keypoints]): kp[k][i * 2] = p.x - ct_int[0] kp[k][i * 2 + 1] = p.y - ct_int[1] is_valid = valid[i] == 2 and not p.is_out_of_image( (output_w, output_w)) kp_reg_mask[k, i * 2] = int(is_valid) kp_reg_mask[k, i * 2 + 1] = int(is_valid) gt_kp[k][i] = p.x, p.y if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del box_kpts_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 7), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } if self.num_keypoints > 0: ret['kps'] = kp ret['gt_kps'] = gt_kp ret['kp_reg_mask'] = kp_reg_mask del kpts_aug return ret
def __get_default_coco(self, img, anns, num_objs): boxes = [] if self.num_keypoints > 0: kpts = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) boxes.append(BoundingBox(*bbox)) if self.num_keypoints > 0: if 'keypoints' not in ann: ann['keypoints'] = np.zeros((3 * self.num_keypoints, )) kpt = [ Keypoint(*x) for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2] ] kpts.extend(kpt) bbs = BoundingBoxesOnImage(boxes, shape=img.shape) if self.num_keypoints > 0: kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.augmentation( image=img, bounding_boxes=bbs, keypoints=kpts) else: img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs) else: if self.num_keypoints > 0: kpts_aug = kpts.copy() img_aug, bbs_aug = np.copy(img), bbs.copy() if self.num_keypoints > 0: img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug, keypoints=kpts_aug) else: img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 2), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros((self.max_detections, 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) if self.num_keypoints > 0: kp = np.zeros((self.max_detections, self.num_keypoints * 2), dtype=np.float32) gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2), dtype=np.float32) kp_reg_mask = np.zeros( (self.max_detections, self.num_keypoints * 2), dtype=np.uint8) bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug, keypoints=kpts_aug) else: bbs_aug = self.resize_out(bounding_boxes=bbs_aug) for k in range(num_objs): ann = anns[k] bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h)) bbox = np.array( [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]) cls_id = int(self.cat_mapping[ann['category_id']]) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if self.num_keypoints > 0: valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1] for i, p in enumerate( kpts_aug[k * self.num_keypoints:k * self.num_keypoints + self.num_keypoints]): kp[k][i * 2] = p.x - ct_int[0] kp[k][i * 2 + 1] = p.y - ct_int[1] is_valid = valid[i] == 2 and not p.is_out_of_image( (output_w, output_w)) kp_reg_mask[k, i * 2] = int(is_valid) kp_reg_mask[k, i * 2 + 1] = int(is_valid) gt_kp[k][i] = p.x, p.y if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del bbs del bbs_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 6), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } if self.num_keypoints > 0: ret['kps'] = kp ret['gt_kps'] = gt_kp ret['kp_reg_mask'] = kp_reg_mask del kpts_aug return ret
def __getitem__(self, index): img_id = self.img_paths[index] img_set, img_vid, img_name = img_id.split("_", 2) img_name = img_name.replace("txt", "jpg") img_path = os.path.join(self.img_dir, img_set, img_vid) img_rgb = cv2.imread(os.path.join(img_path, "visible", img_name), cv2.IMREAD_COLOR) img_ir = cv2.imread(os.path.join(img_path, "lwir", img_name), cv2.IMREAD_GRAYSCALE) with open(os.path.join(self.annot_path, self.img_paths[index])) as annot_file: annot_data = [line.rstrip('\n') for line in annot_file][1:] bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32) if len(annot_data) != 0: bboxes = bboxes.repeat(len(annot_data), axis=0) for i in range(len(annot_data)): line_data = annot_data[i].split() label = line_data[0] if self.split == "train": if label not in ["person", "person?", "people"]: continue elif label != "person": continue bboxes[i, :] = list(map(int, line_data[1:5])) bboxes[:, 2:] += bboxes[:, :2] # resize image and bbox height, width = img_rgb.shape[:2] img_rgb = cv2.resize(img_rgb, (self.img_size['w'], self.img_size['h'])) img_ir = cv2.resize(img_ir, (self.img_size['w'], self.img_size['h'])) img_ir = np.expand_dims(img_ir, axis=2) bboxes[:, 0::2] *= self.img_size['w'] / width bboxes[:, 1::2] *= self.img_size['h'] / height # discard non-valid bboxes bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1) keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0, (bboxes[:, 3] - bboxes[:, 1]) > 0) bboxes = bboxes[keep_inds] # randomly flip image and bboxes if self.split == 'train' and np.random.uniform() > 0.5: img_rgb[:] = img_rgb[:, ::-1, :] img_ir[:] = img_ir[:, ::-1, :] bboxes[:, [0, 2]] = img_rgb.shape[1] - bboxes[:, [2, 0]] - 1 img_rgb = img_rgb.astype(np.float32) / 255. img_ir = img_ir.astype(np.float32) / 255. img_rgb -= self.mean[0, 0, :3] img_rgb /= self.std[0, 0, :3] img_ir -= self.mean[0, 0, 3] img_ir /= self.std[0, 0, 3] img_rgb = img_rgb.transpose((2, 0, 1)) # [H, W, C] to [C, H, W] img_ir = img_ir.transpose((2, 0, 1)) hmap_tl = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) hmap_br = np.zeros( (self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32) regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32) regs_br = np.zeros((self.max_objs, 2), dtype=np.float32) inds_tl = np.zeros((self.max_objs, ), dtype=np.int64) inds_br = np.zeros((self.max_objs, ), dtype=np.int64) num_objs = np.array(min(bboxes.shape[0], self.max_objs)) ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8) ind_masks[:num_objs] = 1 for i, (xtl, ytl, xbr, ybr) in enumerate(bboxes): fxtl = (xtl * self.fmap_size['w'] / self.img_size['w']) fytl = (ytl * self.fmap_size['h'] / self.img_size['h']) fxbr = (xbr * self.fmap_size['w'] / self.img_size['w']) fybr = (ybr * self.fmap_size['h'] / self.img_size['h']) ixtl = int(fxtl) iytl = int(fytl) ixbr = int(fxbr) iybr = int(fybr) if self.gaussian: width = xbr - xtl height = ybr - ytl width = math.ceil(width * self.fmap_size['w'] / self.img_size['w']) height = math.ceil(height * self.fmap_size['h'] / self.img_size['h']) radius = max( 0, int(gaussian_radius((height, width), self.gaussian_iou))) draw_gaussian(hmap_tl[0], [ixtl, iytl], radius) draw_gaussian(hmap_br[0], [ixbr, iybr], radius) else: hmap_tl[0, iytl, ixtl] = 1 hmap_br[0, iybr, ixbr] = 1 regs_tl[i, :] = [fxtl - ixtl, fytl - iytl] regs_br[i, :] = [fxbr - ixbr, fybr - iybr] inds_tl[i] = iytl * self.fmap_size['w'] + ixtl inds_br[i] = iybr * self.fmap_size['w'] + ixbr return { 'img_rgb': img_rgb, 'img_ir': img_ir, 'hmap_tl': hmap_tl, 'hmap_br': hmap_br, 'regs_tl': regs_tl, 'regs_br': regs_br, 'inds_tl': inds_tl, 'inds_br': inds_br, 'ind_masks': ind_masks }
def __get_rotated_coco(self, img, anns, num_objs): kpts = [] for k in range(num_objs): ann = get_annotation_with_angle(anns[k]) ann[4] = np.radians(ann[4]) rot = rotate_bbox(*ann) kpts.extend([Keypoint(*x) for x in rot]) kpts = KeypointsOnImage(kpts, shape=img.shape) if self.augmentation is not None: img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts) else: img_aug, kpts_aug = np.copy(img), kpts.copy() img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 3), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros( (self.max_detections, 7 if self.use_rotated_boxes else 6), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) kpts_aug = self.resize_out(keypoints=kpts_aug) assert num_objs == len(kpts_aug) // 4 for k in range(num_objs): ann = anns[k] points = [] for p in kpts_aug[k * 4:k * 4 + 4]: kp = list( (np.clip(p.x, 0, output_w - 1), np.clip(p.y, 0, output_h - 1))) points.append(kp) points = np.array(points).astype(np.float32) cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points) if cv_wh[0] == 0 or cv_wh[1] == 0: continue cx, cy, w, h, angle = get_annotation_with_angle({ 'rbbox': np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle]) }) ct = np.array((cx, cy)) cls_id = int(self.cat_mapping[ann['category_id']]) if h > 0 and w > 0: angle = np.radians(angle) radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = w, h, angle ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id]) if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del kpts del kpts_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 7), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } return ret
def __get_default_coco(self, img, anns, num_objs): boxes = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) boxes.append(BoundingBox(*bbox)) bbs = BoundingBoxesOnImage(boxes, shape=img.shape) if self.augmentation is not None: img_aug, bbs_aug = self.augmentation(image=img, bounding_boxes=bbs) else: img_aug, bbs_aug = np.copy(img), bbs.copy() img_aug, bbs_aug = self.resize(image=img_aug, bounding_boxes=bbs_aug) img = (img_aug.astype(np.float32) / 255.) inp = (img - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = self.input_size[1] // self.down_ratio output_w = self.input_size[0] // self.down_ratio num_classes = self.num_classes hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_detections, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_detections, 2), dtype=np.float32) ind = np.zeros((self.max_detections), dtype=np.int64) reg_mask = np.zeros((self.max_detections), dtype=np.uint8) gt_det = np.zeros((self.max_detections, num_classes), dtype=np.float32) gt_areas = np.zeros((self.max_detections), dtype=np.float32) bbs_aug = self.resize_out(bounding_boxes=bbs_aug) for k in range(num_objs): ann = anns[k] bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h)) bbox = np.array( [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]) cls_id = int(self.cat_mapping[ann['category_id']]) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((np.ceil(h), np.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 gt_det[k] = ([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) if "area" not in ann: gt_areas[k] = w * h else: gt_areas[k] = ann["area"] del bbs del bbs_aug del img_aug gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros( (1, 6), dtype=np.float32) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'gt_dets': gt_det, 'gt_areas': gt_areas, } return ret