def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) pos = file_name.rfind('.') list_i = list(file_name) # str -> list list_i.insert(pos, '_A') newname = ''.join(list_i) # list -> str newpath = os.path.join(self.img_dir, newname) img1 = cv2.imread(img_path) img2 = cv2.imread(newpath) height, width = img1.shape[0], img1.shape[1] c = np.array([img1.shape[1] / 2., img1.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img1.shape[0], img1.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img1.shape[1]) h_border = self._get_border(128, img1.shape[0]) c[0] = np.random.randint(low=w_border, high=img1.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img1.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img1 = img1[:, ::-1, :] img2 = img2[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, 0, [input_w, input_h]) inp1 = cv2.warpAffine(img1, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp2 = cv2.warpAffine(img2, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp1 = (inp1.astype(np.float32) / 255.) inp2 = (inp2.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp1, self._eig_val, self._eig_vec) color_aug(self._data_rng, inp2, self._eig_val, self._eig_vec) inp1 = (inp1 - self.mean) / self.std inp1 = inp1.transpose(2, 0, 1) inp2 = (inp2 - self.mean) / self.std inp2 = inp2.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # 将处理后的两张图叠成六通道 inp = np.vstack((inp1, inp2)) ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh} if self.opt.dense_wh: hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) del ret['wh'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) # print("55555:", img_path) ann_ids = self.coco.getAnnIds( imgIds=[img_id]) # getAnnIds:通过输入图片的id来得到图片的anno_id anns = self.coco.loadAnns( ids=ann_ids) # loadAnns:通过anno_id,得到图片对应的详细anno信息 # print(111111, anns) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: # (not self.opt.not_rand_crop) = True s = s * np.random.choice(np.arange( 0.6, 1.4, 0.1)) # 从0.6-1.4中随机选取一个数字(步长为0.1) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale # 0.4 # when not using random crop apply scale augmentation. cf = self.opt.shift # 0.1 # when not using random crop apply shift augmentation. c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) # 由三对点计算仿射变换 inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # 对图像做仿射变换 inp = (inp.astype(np.float32) / 255.) # 归一化 if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std # !!!!!!! modified by zy inp = inp.transpose(2, 0, 1) # add by zy # inp = np.transpose(inp, [1, 2, 0]) # cv2.imshow('input', inp) # cv2.waitKey(0) output_h = input_h // self.opt.down_ratio # 网络输出的预测结果特征图是128*128,这里要将GTbox也缩小为128*128来计算loss output_w = input_w // self.opt.down_ratio # num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) ori_wh = np.zeros((self.max_objs, 2), dtype=np.float32) cxcy = np.zeros((self.max_objs, 2), dtype=np.float32) ori_cxcy = np.zeros((self.max_objs, 2), dtype=np.float32) cls_idx = np.zeros((self.max_objs), dtype=np.int64) dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) ori_h, ori_w = bbox[3] - bbox[1], bbox[2] - bbox[0] ori_cx, ori_cy = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 cls_id = int(self.cat_ids[ann['category_id']]) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 # print(111111, bbox) bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) # print(222222, bbox) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] cx, cy = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 # print(333333, h,w) if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ori_wh[k] = 1. * ori_w, 1. * ori_h cxcy[k] = 1. * cx, 1. * cy ori_cxcy[k] = 1. * ori_cx, 1. * ori_cy cls_idx[k] = cls_id ind[k] = ct_int[1] * output_w + ct_int[ 0] # ind这个参数是用来??? ind = int_cy*output_w + int_cx reg[k] = ct - ct_int # reg是用来回归精确小数与整数之间的误差的??? reg_mask[k] = 1 cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k] cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1 if self.opt.dense_wh: draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) # [tlx,tly,brx,bry,1,cls_id] ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'ori_wh': ori_wh, 'cxcy': cxcy, 'ori_cxcy': ori_cxcy, 'cls_idx': cls_idx } if self.opt.dense_wh: # FALSE , 'apply weighted regression near center or just apply regression on center point.' hm_a = hm.max(axis=0, keepdims=True) dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) del ret['wh'] elif self.opt.cat_spec_wh: # flase , 'category specific bounding box size.' ret.update({ 'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask }) del ret['wh'] if self.opt.reg_offset: # true ret.update({'reg': reg}) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta # print(ret['hm'].shape, ) # cx, cy = int(ret['cxcy'][0][0]), int(ret['cxcy'][0][1]) # w, h = int(ret['wh'][0][0]), int(ret['wh'][0][1]) # print(cx,cy,w,h, ret['hm'][0][cy][cx]) # print(1111111111111, ret['input'].shape, ret['hm'].shape, ret['wh'].shape, ret['reg'].shape, ret['reg_mask'].shape, ret['ind'].shape) # input_img = np.transpose([ret['hm'][cls_id]], [1,2,0]) # crop_img = input_img[(cy - h // 2):(cy + h // 2), (cx - w // 2):(cx + w // 2)] # cv2.rectangle(input_img, (cx - w // 2, cy - h // 2), # (cx + w // 2, cy + h // 2), (255, 250, 250), 2) # cv2.imshow('input', crop_img) # cv2.waitKey(0) # print(2222222222222, type(ret['input']), type(ret['hm']), type(ret['hm']), type(ret['reg']), type(ret['reg_mask']), type(ret['ind'])) # print(ret.keys()) # dict_keys(['ind', 'reg_mask', 'hm', 'input', 'reg', 'wh']) return ret
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) s = max(img.shape[0], img.shape[1]) * 1.0 rot = 0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.aug_rot: rf = self.opt.rotate rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform( c, s, rot, [self.opt.input_res, self.opt.input_res]) # reisize inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_joints = self.num_joints # 对坐标执行相同操作。 trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) # 热图 hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) # 关键点坐标 hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32) dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] # 左上左下两个点 bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(ann['category_id']) - 1 # 1 pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) # x, y, 可见不可见。 if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 pts[:, 0] = width - pts[:, 0] - 1 for e in self.flip_idx: pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox = np.clip(bbox, 0, output_res - 1) # 缩小后的坐标和大小 h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0) or (rot != 0): # 可以达到一定iou的半径,该方法解释可以列出等式求解。 radius = gaussian_radius((math.ceil(h), math.ceil(w))) # hp的时候使用固定半径 radius = self.opt.hm_gauss if self.opt.mse_loss else max( 0, int(radius)) # 中心坐标 ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) # 缩小后的大小 wh[k] = 1. * w, 1. * h # 把图拉直了的中心所在的坐标 ind[k] = ct_int[1] * output_res + ct_int[0] # 偏差 reg[k] = ct - ct_int # 计数存在的物体 reg_mask[k] = 1 num_kpts = pts[:, 2].sum() if num_kpts == 0: # 如果都是不可见的那么,热力图设置为0.99但是不计数。 hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 reg_mask[k] = 0 # 关键点半径 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = self.opt.hm_gauss \ if self.opt.mse_loss else max(0, int(hp_radius)) for j in range(num_joints): if pts[j, 2] > 0: # 获取缩小后的坐标 pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ pts[j, 1] >= 0 and pts[j, 1] < output_res: # kps是关键点的坐标 kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int kps_mask[k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) # 关键点的偏差。 hp_offset[k * num_joints + j] = pts[j, :2] - pt_int # 关键点拉直后的坐标和存在与否。 hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] hp_mask[k * num_joints + j] = 1 if self.opt.dense_hp: # must be before draw center hm gaussian draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int, radius, is_offset=True) draw_gaussian(dense_kps_mask[j], ct_int, radius) draw_gaussian(hm_hp[j], pt_int, hp_radius) draw_gaussian(hm[cls_id], ct_int, radius) gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1 ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) if rot != 0: hm = hm * 0 + 0.9999 reg_mask *= 0 kps_mask *= 0 # (hm:box的热图, reg_mask: 目标是否存在), ind: 拉直后中心位置。 wh: 物体在热图上的wh。 # kps: 关键点和中心的偏差。 kps_mask: 关键点坐标存在与不存在。 reg:box的偏差。 # hm_hp: 关键点热图。hp_offset:关键点与自己的偏差。hp_ind:关键点的位置存在与否。hp_mask:关键点存在不存在。 ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'hps': kps, 'hps_mask': kps_mask } if self.opt.dense_hp: dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res, output_res) dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res, output_res) ret.update({ 'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask }) del ret['hps'], ret['hps_mask'] if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.hm_hp: ret.update({'hm_hp': hm_hp}) if self.opt.reg_hp_offset: ret.update({ 'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask }) if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 40), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, f_idx): # 为子训练集计算起始index for i, c in enumerate(self.cds): if f_idx >= c: ds = list(self.label_files.keys())[i] start_index = c img_path = self.img_files[ds][f_idx - start_index] label_path = self.label_files[ds][f_idx - start_index] # Get image data and label imgs, labels, img_path, (input_h, input_w) = self.get_data(img_path, label_path) # print('input_h, input_w: %d %d' % (input_h, input_w)) # 存在多个子训练集时, 为每个子训练集合(视频seq)计算正确的起始index # @even: for MCMOT training if self.opt.id_weight > 0: for i, _ in enumerate(labels): if labels[i, 1] > -1: cls_id = int(labels[i][0]) start_idx = self.tid_start_idx_of_cls_ids[ds][cls_id] labels[i, 1] += start_idx output_h = imgs.shape[1] // self.opt.down_ratio # 向下取整除法 output_w = imgs.shape[2] // self.opt.down_ratio # print('output_h, output_w: %d %d' % (output_h, output_w)) # num_classes = self.num_classes # 图片中实际标注的目标数 num_objs = labels.shape[0] # --- GT of detection hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32) # C×H×W: heat-map通道数即类别数 wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs,), dtype=np.int64) # K个object reg_mask = np.zeros((self.max_objs,), dtype=np.uint8) # 只计算feature map有目标的像素的reg loss if self.opt.id_weight > 0: # --- GT of ReID ids = np.zeros((self.max_objs,), dtype=np.int64) # 一张图最多检测并ReID K个目标, 都初始化id为0 # @even: 每个目标类别都对应一组track ids cls_tr_ids = np.zeros((self.num_classes, output_h, output_w), dtype=np.int64) # @even, class id map: 每个(x, y)处的目标类别, 都初始化为-1 cls_id_map = np.full((1, output_h, output_w), -1, dtype=np.int64) # 1×H×W # Gauss function definition draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian # 遍历每一个ground truth检测目标 for k in range(num_objs): # 图片中实际的目标个数 label = labels[k] # 计算bbox的经过网络的输出GT值 # 0 1 2 3 bbox = label[2:] # center_x, center_y, bbox_w, bbox_h # 检测目标的类别(索引从0开始, 0代表背景类别) cls_id = int(label[0]) bbox[[0, 2]] = bbox[[0, 2]] * output_w bbox[[1, 3]] = bbox[[1, 3]] * output_h bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) w, h = bbox[2], bbox[3] if h > 0 and w > 0: # heat-map radius radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) # radius >= 0 radius = self.opt.hm_gauss if self.opt.mse_loss else radius # bbox center coordinate ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) # floor int # draw gauss weight for heat-map draw_gaussian(hm[cls_id], ct_int, radius) # hm # --- GT of detection wh[k] = float(w), float(h) # 记录feature map上有目标的坐标索引 ind[k] = ct_int[1] * output_w + ct_int[0] # feature map index:y*w+x # offset regression reg[k] = ct - ct_int reg_mask[k] = 1 # --- GT of ReID if self.opt.id_weight > 0: # @even: 取output feature map的每个(y, x)处的目标类别 cls_id_map[0][ct_int[1], ct_int[0]] = cls_id # 1×H×W # @even: 记录该类别对应的track ids cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = label[1] - 1 # track id从1开始的, 转换成从0开始 ids[k] = label[1] - 1 # 分类的idx: track id - 1 if self.opt.id_weight > 0: ret = {'input': imgs, 'hm': hm, 'reg': reg, 'wh': wh, 'ind': ind, 'reg_mask': reg_mask, 'ids': ids, 'cls_id_map': cls_id_map, # feature map上每个(x, y)处的目标类别id 'cls_tr_ids': cls_tr_ids} else: # only for detection ret = {'input': imgs, 'hm': hm, 'reg': reg, 'wh': wh, 'ind': ind, 'reg_mask': reg_mask} return ret # 返回一个字典(第一次见识这样的getitem)
def __getitem__(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) s = max(img.shape[0], img.shape[1]) * 1.0 flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] trans_input = get_affine_transform( c, s, 0, [self.opt.input_res, self.opt.input_res]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_res, self.opt.input_res), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_res = self.opt.output_res num_classes = self.opt.num_classes trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) num_hm = 1 if self.opt.agnostic_ex else num_classes hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32) hm_c = np.zeros((num_classes, output_res, output_res), dtype=np.float32) reg_t = np.zeros((self.max_objs, 2), dtype=np.float32) reg_l = np.zeros((self.max_objs, 2), dtype=np.float32) reg_b = np.zeros((self.max_objs, 2), dtype=np.float32) reg_r = np.zeros((self.max_objs, 2), dtype=np.float32) ind_t = np.zeros((self.max_objs), dtype=np.int64) ind_l = np.zeros((self.max_objs), dtype=np.int64) ind_b = np.zeros((self.max_objs), dtype=np.int64) ind_r = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian for k in range(num_objs): ann = anns[k] # bbox = self._coco_box_to_bbox(ann['bbox']) # tlbr pts = np.array(ann['extreme_points'], dtype=np.float32).reshape(4, 2) # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug cls_id = int(self.cat_ids[ann['category_id']]) hm_id = 0 if self.opt.agnostic_ex else cls_id if flipped: pts[:, 0] = width - pts[:, 0] - 1 pts[1], pts[3] = pts[3].copy(), pts[1].copy() for j in range(4): pts[j] = affine_transform(pts[j], trans_output) pts = np.clip(pts, 0, self.opt.output_res - 1) h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) pt_int = pts.astype(np.int32) draw_gaussian(hm_t[hm_id], pt_int[0], radius) draw_gaussian(hm_l[hm_id], pt_int[1], radius) draw_gaussian(hm_b[hm_id], pt_int[2], radius) draw_gaussian(hm_r[hm_id], pt_int[3], radius) reg_t[k] = pts[0] - pt_int[0] reg_l[k] = pts[1] - pt_int[1] reg_b[k] = pts[2] - pt_int[2] reg_r[k] = pts[3] - pt_int[3] ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0] ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0] ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0] ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0] ct = [ int((pts[3, 0] + pts[1, 0]) / 2), int((pts[0, 1] + pts[2, 1]) / 2) ] draw_gaussian(hm_c[cls_id], ct, radius) reg_mask[k] = 1 ret = { 'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b, 'hm_r': hm_r, 'hm_c': hm_c } if self.opt.reg_offset: ret.update({ 'reg_mask': reg_mask, 'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r, 'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r }) return ret
def __getitem__(self, files_index): # 为子训练集计算起始index for i, c in enumerate(self.cds): if files_index >= c: ds = list(self.label_files.keys())[i] start_index = c img_path = self.img_files[ds][files_index - start_index] label_path = self.label_files[ds][files_index - start_index] imgs, labels, img_path, (input_h, input_w) = self.get_data( img_path, label_path) # print('input_h, input_w: %d %d' % (input_h, input_w)) # 存在多个子训练集时, 为每个子训练集合(视频seq)计算正确的起始index for i, _ in enumerate(labels): if labels[i, 1] > -1: labels[i, 1] += self.tid_start_index[ds] output_h = imgs.shape[1] // self.opt.down_ratio # 向下取整除法 output_w = imgs.shape[2] // self.opt.down_ratio # print('output_h, output_w: %d %d' % (output_h, output_w)) num_classes = self.num_classes num_objs = labels.shape[0] # 图片中实际标注的目标数 hm = np.zeros( (num_classes, output_h, output_w), # C×H×W dtype=np.float32) # heatmap通道数即类别数 wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs, ), dtype=np.int64) # K个object reg_mask = np.zeros((self.max_objs, ), dtype=np.uint8) # 只计算feature map有目标的像素的reg loss ids = np.zeros((self.max_objs, ), dtype=np.int64) # 一张图最多检测并ReID K个目标, 都初始化id为0 # 设置用于heatmap初始化的高斯函数 draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian # 遍历每一个ground truth检测目标 for k in range(num_objs): # 图片中实际的目标个数 label = labels[k] # 计算bbox的经过网络的输出GT值 # 0 1 2 3 bbox = label[2:] # center_x, center_y, bbox_w, bbox_h cls_id = int(label[0]) # object's class label(index start from 0) bbox[[0, 2]] = bbox[[0, 2]] * output_w bbox[[1, 3]] = bbox[[1, 3]] * output_h bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) w, h = bbox[2], bbox[3] if h > 0 and w > 0: # heatmap radius radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) # radius >= 0 radius = self.opt.hm_gauss if self.opt.mse_loss else radius # bbox center coordinate ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) # floor int # draw gauss weight for heatmap # draw_gaussian(hm[cls_id], ct_int, radius) # hm draw_gaussian(hm[0], ct_int, radius) # hm wh[k] = 1. * w, 1. * h # 记录feature map上有目标的坐标索引 ind[k] = ct_int[1] * output_w + ct_int[ 0] # feature map index:y*w+x reg[k] = ct - ct_int reg_mask[k] = 1 # ids[k] = label[1] # track id的ground truth: 这里是不是应该-1(因为track id从1开始)? ids[k] = label[1] - 1 # 分类的idx: track id - 1 ret = { 'input': imgs, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'ids': ids } return ret # 返回一个字典(第一次见识这样的getitem)
def __getitem__(self, index): img_id = self.images[index] file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(self.img_dir, file_name) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) # delete not person and crowd anns = list( filter( lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns)) num_objs = min(len(anns), self.max_objs) img = cv2.imread(img_path) height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) if self.opt.keep_res: input_h = (height | self.opt.pad) + 1 input_w = (width | self.opt.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) else: s = max(img.shape[0], img.shape[1]) * 1.0 input_h, input_w = self.opt.input_h, self.opt.input_w flipped = False if self.split == 'train': if not self.opt.not_rand_crop: s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = self._get_border(128, img.shape[1]) h_border = self._get_border(128, img.shape[0]) c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) else: sf = self.opt.scale cf = self.opt.shift c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf) s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) if np.random.random() < self.opt.flip: flipped = True img = img[:, ::-1, :] c[0] = width - c[0] - 1 trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) if self.split == 'train' and not self.opt.no_color_aug: color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) output_h = input_h // self.opt.down_ratio output_w = input_w // self.opt.down_ratio num_classes = self.num_classes trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) trans_seg_output = get_affine_transform(c, s, 0, [output_w, output_h]) hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) seg = np.zeros((self.max_objs, output_h, output_w), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) segment = self.coco.annToMask(ann) if flipped: bbox[[0, 2]] = width - bbox[[2, 0]] - 1 segment = segment[:, ::-1] bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) segment = cv2.warpAffine(segment, trans_seg_output, (output_w, output_h), flags=cv2.INTER_LINEAR) segment = segment.astype(np.float32) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 pad_rate = 0.3 segment_mask = np.ones_like(segment) x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_w - 1)*2).astype(np.int), \ (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_h - 1)*2).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 0 segment[segment_mask == 1] = 255 seg[k] = segment # cv2.imshow('',segment-255) # cv2.waitKey(0) #seg_mask[k] = segment_mask # print(np.sum(segment)/np.sum(segment_mask)) ## pos / neg gt_det.append([ ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id ]) ret = { 'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg, 'seg': seg } if self.opt.debug > 0 or not self.split == 'train': gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} ret['meta'] = meta return ret
def __getitem__(self, files_index): for i, c in enumerate(self.cds): if files_index >= c: ds = list(self.label_files.keys())[i] start_index = c img_path = self.img_files[ds][files_index - start_index] label_path = self.label_files[ds][files_index - start_index] imgs, labels, img_path, (input_h, input_w), img = self.get_data( img_path, label_path) for i, _ in enumerate(labels): if labels[i, 1] > -1: labels[i, 1] += self.tid_start_index[ds] output_h = imgs.shape[1] // self.opt.down_ratio output_w = imgs.shape[2] // self.opt.down_ratio num_classes = self.num_classes num_objs = labels.shape[0] hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) ind = np.zeros((self.max_objs, ), dtype=np.int64) reg_mask = np.zeros((self.max_objs, ), dtype=np.uint8) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian for k in range(num_objs): label = labels[k] bbox = label[2:] cls_id = int(label[0]) bbox[[0, 2]] = bbox[[0, 2]] * output_w bbox[[1, 3]] = bbox[[1, 3]] * output_h bbox[0] = np.clip(bbox[0], 0, output_w - 1) bbox[1] = np.clip(bbox[1], 0, output_h - 1) h = bbox[3] w = bbox[2] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) radius = self.opt.hm_gauss if self.opt.mse_loss else radius ct = np.array([bbox[0], bbox[1]], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(hm[cls_id], ct_int, radius) wh[k] = 1. * w, 1. * h ind[k] = ct_int[1] * output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 # while True: # h = cv2.resize(hm.squeeze(), (1088, 608)) # cv2.imshow(f"hm", h) # cv2.imshow(f"img", img) # if cv2.waitKey(1000) & 0xff == ord('q'): # break ret = { 'input': imgs, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'reg': reg } return ret
def __getitem__(self, index): img_id = self.images[index] img_info = self.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) if 'calib' in img_info: calib = np.array(img_info['calib'], dtype=np.float32) else: calib = self.calib height, width = img.shape[0], img.shape[1] c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) if self.opt.keep_res: s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32) else: s = np.array([width, height], dtype=np.int32) aug = False if self.split == 'train' and np.random.random() < self.opt.aug_ddd: aug = True sf = self.opt.scale cf = self.opt.shift s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf) c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf) trans_input = get_affine_transform( c, s, 0, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) # if self.split == 'train' and not self.opt.no_color_aug: # color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) inp = (inp - self.mean) / self.std inp = inp.transpose(2, 0, 1) num_classes = self.opt.num_classes trans_output = get_affine_transform( c, s, 0, [self.opt.output_w, self.opt.output_h]) hm = np.zeros( (num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32) wh = np.zeros((self.max_objs, 2), dtype=np.float32) reg = np.zeros((self.max_objs, 2), dtype=np.float32) dep = np.zeros((self.max_objs, 1), dtype=np.float32) rotbin = np.zeros((self.max_objs, 2), dtype=np.int64) rotres = np.zeros((self.max_objs, 2), dtype=np.float32) dim = np.zeros((self.max_objs, 3), dtype=np.float32) ind = np.zeros((self.max_objs), dtype=np.int64) reg_mask = np.zeros((self.max_objs), dtype=np.uint8) rot_mask = np.zeros((self.max_objs), dtype=np.uint8) ann_ids = self.coco.getAnnIds(imgIds=[img_id]) anns = self.coco.loadAnns(ids=ann_ids) num_objs = min(len(anns), self.max_objs) draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ draw_umich_gaussian gt_det = [] for k in range(num_objs): ann = anns[k] bbox = self._coco_box_to_bbox(ann['bbox']) cls_id = int(self.cat_ids[ann['category_id']]) if cls_id <= -99: continue # if flipped: # bbox[[0, 2]] = width - bbox[[2, 0]] - 1 bbox[:2] = affine_transform(bbox[:2], trans_output) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((h, w)) radius = max(0, int(radius)) ct = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if cls_id < 0: ignore_id = [_ for _ in range(num_classes)] \ if cls_id == - 1 else [- cls_id - 2] if self.opt.rect_mask: hm[ignore_id, int(bbox[1]): int(bbox[3]) + 1, int(bbox[0]): int(bbox[2]) + 1] = 0.9999 else: for cc in ignore_id: draw_gaussian(hm[cc], ct, radius) hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999 continue draw_gaussian(hm[cls_id], ct, radius) wh[k] = 1. * w, 1. * h gt_det.append([ct[0], ct[1], 1] + \ self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \ [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id]) if self.opt.reg_bbox: gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]] # if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!! if 1: alpha = self._convert_alpha(ann['alpha']) # print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y']) if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = alpha - (-0.5 * np.pi) if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = alpha - (0.5 * np.pi) dep[k] = ann['depth'] dim[k] = ann['dim'] # print(' cat dim', cls_id, dim[k]) ind[k] = ct_int[1] * self.opt.output_w + ct_int[0] reg[k] = ct - ct_int reg_mask[k] = 1 if not aug else 0 rot_mask[k] = 1 # print('gt_det', gt_det) # print('') ret = {'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind, 'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask, 'rot_mask': rot_mask} if self.opt.reg_bbox: ret.update({'wh': wh}) if self.opt.reg_offset: ret.update({'reg': reg}) if self.opt.debug > 0 or not ('train' in self.split): gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 18), dtype=np.float32) meta = {'c': c, 's': s, 'gt_det': gt_det, 'calib': calib, 'image_path': img_path, 'img_id': img_id} ret['meta'] = meta return ret