def _get_pre_dets(self, anns, trans_input, trans_output): hm_h, hm_w = self.opt.height, self.opt.width down_ratio = self.opt.down_ratio trans = trans_input reutrn_hm = self.opt.pre_hm pre_hm = np.zeros( (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None pre_cts, track_ids = [], [] for ann in anns: cls_id = int(self.cat_ids[ann['category_id']]) if cls_id > self.opt.num_classes or cls_id <= -99 or \ ('iscrowd' in ann and ann['iscrowd'] > 0): continue bbox = self._coco_box_to_bbox(ann['bbox']) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] max_rad = 1 if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) max_rad = max(max_rad, radius) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct0 = ct.copy() conf = 1 ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h conf = 1 if np.random.random() > self.opt.lost_disturb else 0 ct_int = ct.astype(np.int32) if conf == 0: pre_cts.append(ct / down_ratio) else: pre_cts.append(ct0 / down_ratio) track_ids.append(ann['track_id'] if 'track_id' in ann else -1) if reutrn_hm: draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf) if np.random.random() < self.opt.fp_disturb and reutrn_hm: ct2 = ct0.copy() # Hard code heatmap disturb ratio, haven't tried other numbers. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w ct2[1] = ct2[1] + np.random.randn() * 0.05 * h ct2_int = ct2.astype(np.int32) draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf) return pre_hm, pre_cts, track_ids
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def get_image_info(self, index): info = self.gt_db[index] imgpath = info['image'] image = cv2.imread(imgpath)[:, :, ::-1] joints = info['joints_3d'] joints_vis = info['joints_3d_vis'][:, 0] c = info['center'] s = info['scale'] r = 0 if self.train_flag: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(c, s, r, (self.crop_size, self.crop_size)) dst_image = cv2.warpAffine(image, trans, (self.crop_size, self.crop_size), flags=cv2.INTER_LINEAR) for i in range(self.num_joints): if joints_vis[i] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) kp2d = np.concatenate([joints[:, 0:2], joints_vis[:, None]], 1)[self.mpii_2_lsp14] result_dir = '{}/{}'.format(self.save_dir, os.path.basename(imgpath)) metas = ('mpii', imgpath, result_dir, self.empty_kp3d, self.empty_kp3d, self.empty_param, self.empty_gr) return dst_image, kp2d, self.const_box, metas
def data_augmentation(sample, is_train): image_file = sample['image'] filename = sample['filename'] if 'filename' in sample else '' joints = sample['joints_3d'] joints_vis = sample['joints_3d_vis'] c = sample['center'] s = sample['scale'] score = sample['score'] if 'score' in sample else 1 # imgnum = sample['imgnum'] if 'imgnum' in sample else '' r = 0 # used for ce if 'ce_mode' in os.environ: random.seed(0) np.random.seed(0) data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if is_train: sf = cfg.SCALE_FACTOR rf = cfg.ROT_FACTOR s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if cfg.FLIP and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE) input = cv2.warpAffine(data_numpy, trans, (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) for i in range(cfg.NUM_JOINTS): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # Numpy target target, target_weight = generate_target(cfg, joints, joints_vis) if cfg.DEBUG: visualize(cfg, filename, data_numpy, input.copy(), joints, target) # Normalization input = input.astype('float32').transpose((2, 0, 1)) / 255 input -= np.array(cfg.MEAN).reshape((3, 1, 1)) input /= np.array(cfg.STD).reshape((3, 1, 1)) if is_train: return input, target, target_weight else: return input, target, target_weight, c, s, score, image_file
def __getitem__(self, idx: int): data = self.data[idx] frame_idx = data["image_id"] x,y,w,h = data['bbox'] # x1,y1,x2,y2 = data['orig_bbox'] self.cap.set(1, frame_idx) _, img = self.cap.read() aspect_ratio = self.cfg.MODEL.IMAGE_SIZE[1] / self.cfg.MODEL.IMAGE_SIZE[0] centre = np.array([x+w*.5, y+h*.5]) if w > aspect_ratio * h: h = w / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio scale = np.array([w, h]) * 1.25 rotation = 0 trans = get_affine_transform(centre, scale, rotation, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0])) cropped_img = cv2.warpAffine(img, trans, (self.cfg.MODEL.IMAGE_SIZE[1], self.cfg.MODEL.IMAGE_SIZE[0]), flags=cv2.INTER_LINEAR) cropped_img = normalize_input(cropped_img, self.cfg) # cv2.imshow("orig", img) # cropped_show = denormalize_input(cropped_img, self.cfg).copy().astype(np.uint8) # cv2.imshow("crop", cropped_show) # cv2.waitKey() # cv2.destroyAllWindows() estimated_joints = np.zeros((self.cfg.MODEL.NUM_JOINTS, 3), dtype=np.float) offsets = np.zeros((self.cfg.MODEL.NUM_JOINTS, 2), dtype=np.float) offsets[:, 0] = self.frame_area[0] offsets[:, 1] = self.frame_area[1] estimated_joints[:, :2] = np.array(data['joints']).reshape(self.cfg.MODEL.NUM_JOINTS, 2) estimated_joints[:, :2] += offsets estimated_joints[:, 2] = np.array(data['score']) for j in range(self.cfg.MODEL.NUM_JOINTS): if estimated_joints[j,2] > 0: estimated_joints[j,:2] = affine_transform(estimated_joints[j,:2], trans) estimated_joints[j, 2] *= ((estimated_joints[j,0] >= 0) & (estimated_joints[j,0] < self.cfg.MODEL.IMAGE_SIZE[1]) & (estimated_joints[j,1] >= 0) & (estimated_joints[j,1] < self.cfg.MODEL.IMAGE_SIZE[0])) input_pose_coord = estimated_joints[:,:2] input_pose_valid = np.array([1 if i not in self.cfg.ignore_kps else 0 for i in range(self.cfg.MODEL.NUM_JOINTS)]) input_pose_score = estimated_joints[:, 2] crop_info = np.asarray([centre[0]-scale[0]*0.5, centre[1]-scale[1]*0.5, centre[0]+scale[0]*0.5, centre[1]+scale[1]*0.5]) return [torch.from_numpy(cropped_img).float().permute(2, 0, 1), input_pose_coord, input_pose_valid, input_pose_score, crop_info, frame_idx, ]
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: print('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) target_map = torch.from_numpy(self.generate_paf(joints, joints_vis)) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, target_map, meta
def _get_bbox_output(self, bbox, trans_output, height, width): bbox = self._coco_box_to_bbox(bbox).copy() rect = np.array([[bbox[0], bbox[1]], [bbox[0], bbox[3]], [bbox[2], bbox[3]], [bbox[2], bbox[1]]], dtype=np.float32) for t in range(4): rect[t] = affine_transform(rect[t], trans_output) bbox[:2] = rect[:, 0].min(), rect[:, 1].min() bbox[2:] = rect[:, 0].max(), rect[:, 1].max() bbox_amodal = copy.deepcopy(bbox) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] return bbox, bbox_amodal
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize): """ Args: heatmap: array of size (n * k * h * w) -n: number of views, -k: number of joints -h: heatmap height, -w: heatmap width grid: list of k ndarrays of size (nbins * 3) -k: number of joints; 1 when the grid is shared in PSM -nbins: number of bins in the grid bbox2D: bounding box on which heatmap is computed Returns: unary_of_all_joints: a list of ndarray of size nbins, same order as heatmaps """ n, k = heatmap.shape[0], heatmap.shape[1] h, w = heatmap.shape[2], heatmap.shape[3] nbins = grid[0].shape[0] unary_of_all_joints = [] for j in range(k): unary = np.zeros(nbins) for c in range(n): grid_id = 0 if len(grid) == 1 else j xy = cameras.project_pose(grid[grid_id], cam[c]) trans = get_affine_transform(bbox2D[c]['center'], bbox2D[c]['scale'], 0, imgSize) xy = affine_transform(xy, trans) * np.array([w, h]) / imgSize # for i in range(nbins): # xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize hmap = heatmap[c, j, :, :] point_x, point_y = np.arange(hmap.shape[0]), np.arange( hmap.shape[1]) rgi = RegularGridInterpolator(points=[point_x, point_y], values=hmap.transpose(), bounds_error=False, fill_value=0) score = rgi(xy) unary = unary + np.reshape(score, newshape=unary.shape) unary_of_all_joints.append(unary) return unary_of_all_joints
def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w): num_joints = self.num_joints pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) \ if 'keypoints' in ann else np.zeros((self.num_joints, 3), np.float32) if self.opt.simple_radius > 0: hp_radius = int( simple_radius(h, w, min_overlap=self.opt.simple_radius)) else: hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) hp_radius = max(0, int(hp_radius)) for j in range(num_joints): pts[j, :2] = affine_transform(pts[j, :2], trans_output) if pts[j, 2] > 0: if pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and \ pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h: ret['hps'][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int ret['hps_mask'][k, j * 2:j * 2 + 2] = 1 pt_int = pts[j, :2].astype(np.int32) ret['hp_offset'][k * num_joints + j] = pts[j, :2] - pt_int ret['hp_ind'][k * num_joints + j] = \ pt_int[1] * self.opt.output_w + pt_int[0] ret['hp_offset_mask'][k * num_joints + j] = 1 ret['hm_hp_mask'][k * num_joints + j] = 1 ret['joint'][k * num_joints + j] = j draw_umich_gaussian(ret['hm_hp'][j], pt_int, hp_radius) if pts[j, 2] == 1: ret['hm_hp'][j, pt_int[1], pt_int[0]] = self.ignore_val ret['hp_offset_mask'][k * num_joints + j] = 0 ret['hm_hp_mask'][k * num_joints + j] = 0 else: pts[j, :2] *= 0 else: pts[j, :2] *= 0 self._ignore_region(ret['hm_hp'][j, int(bbox[1]):int(bbox[3]) + 1, int(bbox[0]):int(bbox[2]) + 1]) gt_det['hps'].append(pts[:, :2].reshape(num_joints * 2))
def get_pose_estimation_prediction(pose_model, image, centers, scales, transform): rotation = 0 # pose estimation transformation model_inputs = [] center_maps = [] for center, scale in zip(centers, scales): trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) # Crop smaller image of people c = affine_transform(center, trans) center_map = gaussian(np.zeros(cfg.MODEL.IMAGE_SIZE), c, cfg.MODEL.SIGMA) center_map = torch.from_numpy(center_map) center_maps.append(center_map) model_input = cv2.warpAffine( image, trans, (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) # hwc -> 1chw model_input = transform(model_input)#.unsqueeze(0) model_inputs.append(model_input) # n * 1chw -> nchw center_maps = torch.stack(center_maps) model_inputs = torch.stack(model_inputs) # compute output heatmap output = pose_model(model_inputs.to(CTX), center_maps.to(CTX)) coords, _ = get_final_preds( cfg, output.cpu().detach().numpy(), np.asarray(centers), np.asarray(scales)) return coords
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: # logger.error('=> fail to read {}'.format(image_file)) # raise ValueError('Fail to read {}'.format(image_file)) return None, None, None, None, None, None if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) joints = db_rec['joints_2d'] joints_vis = db_rec['joints_2d_vis'] joints_3d = db_rec['joints_3d'] joints_3d_vis = db_rec['joints_3d_vis'] nposes = len(joints) assert nposes <= self.maximum_person, 'too many persons' height, width, _ = data_numpy.shape c = np.array([width / 2.0, height / 2.0]) s = get_scale((width, height), self.image_size) r = 0 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for n in range(nposes): for i in range(len(joints[0])): if joints_vis[n][i, 0] > 0.0: joints[n][i, 0:2] = affine_transform(joints[n][i, 0:2], trans) if (np.min(joints[n][i, :2]) < 0 or joints[n][i, 0] >= self.image_size[0] or joints[n][i, 1] >= self.image_size[1]): joints_vis[n][i, :] = 0 if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None: # For convenience, we use predicted poses and corresponding values at the original heatmaps # to generate 2d heatmaps for Campus and Shelf dataset. # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly. pred_pose2d = db_rec['pred_pose2d'] for n in range(len(pred_pose2d)): for i in range(len(pred_pose2d[n])): pred_pose2d[n][i, 0:2] = affine_transform( pred_pose2d[n][i, 0:2], trans) input_heatmap = self.generate_input_heatmap(pred_pose2d) input_heatmap = torch.from_numpy(input_heatmap) else: input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS, self.heatmap_size[1], self.heatmap_size[0]) target_heatmap, target_weight = self.generate_target_heatmap( joints, joints_vis) target_heatmap = torch.from_numpy(target_heatmap) target_weight = torch.from_numpy(target_weight) # make joints and joints_vis having same shape joints_u = np.zeros((self.maximum_person, self.num_joints, 2)) joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2)) for i in range(nposes): joints_u[i] = joints[i] joints_vis_u[i] = joints_vis[i] joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3)) joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3)) for i in range(nposes): joints_3d_u[i] = joints_3d[i][:, 0:3] joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3] target_3d = self.generate_3d_target(joints_3d) target_3d = torch.from_numpy(target_3d) if isinstance(self.root_id, int): roots_3d = joints_3d_u[:, self.root_id] elif isinstance(self.root_id, list): roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id], axis=0) meta = { 'image': image_file, 'num_person': nposes, 'joints_3d': joints_3d_u, 'joints_3d_vis': joints_3d_vis_u, 'roots_3d': roots_3d, 'joints': joints_u, 'joints_vis': joints_vis_u, 'center': c, 'scale': s, 'rotation': r, 'camera': db_rec['camera'] } return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec["image"] filename = db_rec["filename"] if "filename" in db_rec else "" imgnum = db_rec["imgnum"] if "imgnum" in db_rec else "" if self.data_format == "zip": from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error("=> fail to read {}".format(image_file)) raise ValueError("Fail to read {}".format(image_file)) joints = db_rec["joints_3d"] joints_vis = db_rec["joints_3d_vis"] c = db_rec["center"] s = db_rec["scale"] score = db_rec["score"] if "score" in db_rec else 1 r = 0 if self.is_train: if np.sum(joints_vis[:, 0] ) > self.num_joints_half_body and np.random.rand( ) < self.prob_half_body: c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 joints_heatmap = joints.copy() trans = get_affine_transform(c, s, r, self.image_size) trans_heatmap = get_affine_transform(c, s, r, self.heatmap_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) joints_heatmap[i, 0:2] = affine_transform(joints_heatmap[i, 0:2], trans_heatmap) target, target_weight = self.generate_target(joints_heatmap, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { "image": image_file, "filename": filename, "imgnum": imgnum, "joints": joints, "joints_vis": joints_vis, "center": c, "scale": s, "rotation": r, "score": score, } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } from boxx import cf if cf.args.task == 'ssm': feat_stride = self.image_size / self.heatmap_size joints_h = copy.deepcopy(joints) # TODO 减少量化损失 joints_h[:, 0] = (joints_h[:, 0] / feat_stride[0] + 0.5) joints_h[:, 1] = (joints_h[:, 1] / feat_stride[1] + 0.5) joints_h = joints_h.astype(np.int32) meta['joints_h'] = joints_h return input, target, target_weight, meta
def _get_single_view_item(self, joints_3d, joints_3d_vis, cam): joints_3d = copy.deepcopy(joints_3d) joints_3d_vis = copy.deepcopy(joints_3d_vis) nposes = len(joints_3d) width = 360 height = 288 c = np.array([width / 2.0, height / 2.0], dtype=np.float32) # s = np.array( # [width / self.pixel_std, height / self.pixel_std], dtype=np.float32) s = get_scale((width, height), self.image_size) r = 0 joints = [] joints_vis = [] for n in range(nposes): pose2d = project_pose(joints_3d[n], cam) x_check = np.bitwise_and(pose2d[:, 0] >= 0, pose2d[:, 0] <= width - 1) y_check = np.bitwise_and(pose2d[:, 1] >= 0, pose2d[:, 1] <= height - 1) check = np.bitwise_and(x_check, y_check) vis = joints_3d_vis[n][:, 0] > 0 vis[np.logical_not(check)] = 0 joints.append(pose2d) joints_vis.append(np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1)) trans = get_affine_transform(c, s, r, self.image_size) input = np.ones((height, width, 3), dtype=np.float32) input = cv2.warpAffine( input, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for n in range(nposes): for i in range(len(joints[0])): if joints_vis[n][i, 0] > 0.0: joints[n][i, 0:2] = affine_transform(joints[n][i, 0:2], trans) if (np.min(joints[n][i, :2]) < 0 or joints[n][i, 0] >= self.image_size[0] or joints[n][i, 1] >= self.image_size[1]): joints_vis[n][i, :] = 0 input_heatmap, _ = self.generate_input_heatmap(joints, joints_vis) input_heatmap = torch.from_numpy(input_heatmap) target_heatmap = torch.zeros_like(input_heatmap) target_weight = torch.zeros(len(target_heatmap), 1) # make joints and joints_vis having same shape joints_u = np.zeros((self.maximum_person, len(joints[0]), 2)) joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2)) for i in range(nposes): joints_u[i] = joints[i] joints_vis_u[i] = joints_vis[i] joints_3d_u = np.zeros((self.maximum_person, len(joints[0]), 3)) joints_3d_vis_u = np.zeros((self.maximum_person, len(joints[0]), 3)) for i in range(nposes): joints_3d_u[i] = joints_3d[i][:, 0:3] joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3] target_3d = self.generate_3d_target(joints_3d) target_3d = torch.from_numpy(target_3d) meta = { 'image': '', 'num_person': nposes, 'joints_3d': joints_3d_u, 'roots_3d': (joints_3d_u[:, 11] + joints_3d_u[:, 12]) / 2.0, 'joints_3d_vis': joints_3d_vis_u, 'joints': joints_u, 'joints_vis': joints_vis_u, 'center': c, 'scale': s, 'rotation': r, 'camera': cam } return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 ############################################## data augmentation if self.is_train: # scale and rotation augmentation sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 # flips images if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # brighten/darken image by shifting all pixels. not sure if this actually helps # if self.brighten and random.random() <= 0.5: # shift = 2 * np.random.randn() # data_numpy = np.clip(data_numpy + shift, 0, 255).astype(np.uint8) trans = get_affine_transform(c, s, r, self.image_size) # NOTE: This scales images and crops them to be 256*256. During eval, replace with input = data_numpy input = data_numpy if not 'TEST_MODE' in self.cfg: input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) all_ins_target = np.maximum(inter_target, target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # AE labels All_joints = [joints] + interference_joints ae_targets = self.generate_joints_ae_targets(All_joints) # GPU formate all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) ae_targets = torch.from_numpy(ae_targets) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, 'interference_maps': inter_target, } return input, all_ins_target, all_ins_target_weight, ae_targets, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frame if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = image_file.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) if self.timestep_delta_rand: delta = -T + np.random.randint(T * 2 + 1) else: delta = self.timestep_delta sup_idx = ref_idx + delta ######## if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: sup_idx = np.clip(sup_idx, 1, nframes) else: sup_idx = np.clip(sup_idx, 0, nframes - 1) if not self.is_posetrack18: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(8) + '.jpg') else: new_sup_image_file = image_file.replace( prev_nm, str(sup_idx).zfill(6) + '.jpg') if os.path.exists(new_sup_image_file): sup_image_file = new_sup_image_file else: sup_image_file = image_file ########## data_numpy_sup = self.read_image(sup_image_file) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_sup = cv2.cvtColor(data_numpy_sup, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_sup is None: logger.error('=> SUP: fail to read {}'.format(sup_image_file)) raise ValueError('SUP: Fail to read {}'.format(sup_image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_sup = data_numpy_sup[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ##### supportingimage if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = cv2.warpAffine( data_numpy_sup, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_sup = self.transform(input_sup) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': sup_image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_sup, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) if db_rec['source'] == 'h36m' and self.no_distortion: image_dir_zip = 'images_nodistortion.zip@' else: image_dir_zip = 'images.zip@' image_dir = image_dir_zip if self.data_format == 'zip' else '' # special process for coco dataset if db_rec['source'] == 'coco': image_dir = '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if db_rec['source'] == 'h36m' and self.pseudo_label: joints = db_rec['joints_2d_pseudo'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis_pseudo'].copy()[:, :2] # [union_joints, 2] else: joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] != 'h36m': sf = self.aug_param_dict[db_rec['source']]['scale_factor'] rf = self.aug_param_dict[db_rec['source']]['rotation_factor'] scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.aug_param_dict[db_rec['source']]['flip'] and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: if self.color_jitter: input = input[:, :, ::-1] # BGR -> RGB input = self.color_jitter(input) r, g, b = input.split() input = Image.merge("RGB", (b, g, r)) # RGB -> BGR input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform(joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'subject': db_rec['subject'] if db_rec['source'] == 'h36m' else -1 } return input, target, target_weight, meta
def __getitem__(self, idx, source='h36m', **kwargs): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'heatmap_size': self.heatmap_size } if source == 'totalcapture': imubone_mapping = kwargs['tc_imubone_map'] meta['joints_gt'] = db_rec['joints_gt'] meta['bone_vec'] = db_rec['bone_vec'] meta['camera'] = db_rec['camera'] bone_vec_tc = meta['bone_vec'] bone_vectors = dict() for bone_name in imubone_mapping: bone_vectors[ imubone_mapping[bone_name]] = bone_vec_tc[bone_name] meta['bone_vectors'] = bone_vectors # if self.totalcapture_template_meta is None: # self.totalcapture_template_meta = meta elif source == 'h36m': meta['camera'] = db_rec['camera'] meta['joints_gt'] = cam_utils.camera_to_world_frame( db_rec['joints_3d'], db_rec['camera']['R'], db_rec['camera']['T']) else: # since tc is mixed with mpii, they should have same keys in meta, # otherwise will lead to error when collate data in dataloader meta['joints_gt'] = self.totalcapture_template_meta['joints_gt'] # meta['joints_gt'] = np.zeros((16,3)) meta['bone_vec'] = self.totalcapture_template_meta['bone_vec'] meta['camera'] = self.totalcapture_template_meta['camera'] meta['bone_vectors'] = self.totalcapture_template_meta[ 'bone_vectors'] return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # # sharpening # kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) # input = cv2.filter2D(input, -1, kernel) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } onehot_heatmap = self.render_onehot_heatmap(meta['joints'], input.shape[1]) return input, target, target_weight, meta, onehot_heatmap
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION ) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # print(joints) joints_copy = db_rec['joints_3d_copy'] joints_vis = db_rec['joints_3d_vis'] # body = db_rec['body_3d'] # body_vis = db_rec['body_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis ) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 随机缩放因子 r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 # 随机旋转因子 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints( joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # 加我们的对称 c[0] = data_numpy.shape[1] - c[0] - 1 # 重新确定镜像翻转后的中心点 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) body = np.zeros((self.num_body, 3), dtype=np.float) body_vis = np.zeros((self.num_body, 3), dtype=np.float) for idbody, skeleton in enumerate(self.skeletons): point_a = joints[skeleton[0]] # print(point_a) point_b = joints[skeleton[1]] # if point_a[2] == 0 or point_b[2] == 0: if joints_copy[skeleton[0]][2] == 0 or joints_copy[skeleton[1]][2] == 0: continue axis_x = (point_b - point_a)[:-1] # print(x) lx = np.sqrt(axis_x.dot(axis_x)) if lx == 0: continue ly = 1 cos_angle = axis_x.dot(self.axis_y) / (lx * ly) angle = np.arccos(cos_angle) angle = angle / np.pi # angle2 = angle * 180 / np.pi if axis_x[1] < 0: angle = - angle # print(angle2) # print(lx,angle2) body[idbody] = [lx/332.55, angle, 1] body_vis[idbody] = [1, 1, 0] joint_target, joint_target_weight = self.generate_target(joints, joints_vis) body_target, body_target_weight = self.generate_body_target(joints, joints_copy, body_vis) # for i in range(19): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'.jpg', np.uint8(body_target[i][:,:,np.newaxis]*255)) # for i in range(17): # # print(image_file) # cv2.imwrite('image/'+image_file.split('/')[-1][:-4]+'_'+str(i)+'_point.jpg', np.uint8(joint_target[i][:,:,np.newaxis]*255)) joint_target = torch.from_numpy(joint_target) joint_target_weight = torch.from_numpy(joint_target_weight) body_target = torch.from_numpy(body_target) body_target_weight = torch.from_numpy(body_target_weight) body = torch.from_numpy(body) body_vis = torch.from_numpy(body_vis) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'body': body, 'body_vis': body_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, joint_target, joint_target_weight, body_target, body_target_weight, body, body_vis, meta
def __getitem__(self, idx, source='h36m', **kwargs): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() joints_vis = db_rec['joints_vis'].copy() center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train: sf = self.scale_factor rf = self.rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 trans = get_affine_transform(center, scale, rotation, self.image_size) # ! Notice: this trans represents full image to cropped image, # not full image->heatmap input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) if (np.min(joints[i, :2]) < 0 or joints[i, 0] >= self.image_size[0] or joints[i, 1] >= self.image_size[1]): joints_vis[i, :] = 0 target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) # 3x3 data augmentation affine trans (scale rotate) # !!! Notice: this transformation contains both heatmap->image scale affine # and data augmentation affine aug_trans = np.eye(3, 3) aug_trans[0:2] = trans # full img -> cropped img hm_scale = self.heatmap_size / self.image_size scale_trans = np.eye(3,3) # cropped img -> heatmap scale_trans[0,0] = hm_scale[1] scale_trans[1, 1] = hm_scale[0] aug_trans = scale_trans @ aug_trans meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'], 'heatmap_size': self.heatmap_size, 'aug_trans': aug_trans, } if source == 'totalcapture': meta['joints_gt'] = db_rec['joints_gt'] meta['camera'] = db_rec['camera'] elif source in ['h36m']: meta['camera'] = db_rec['camera'] meta['joints_gt'] = cam_utils.camera_to_world_frame(db_rec['joints_3d'], db_rec['camera']['R'], db_rec['camera']['T']) elif source == 'panoptic': meta['camera'] = db_rec['camera'] meta['joints_gt'] = db_rec['joints_gt'] elif source in ['unrealcv']: meta['camera'] = db_rec['camera'] meta['joints_gt'] = db_rec['joints_gt'] else: assert 0==1, 'No such dataset definition in JointDataset' return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): prev_image_file1 = db_rec['image'] prev_image_file2 = db_rec['image'] next_image_file1 = db_rec['image'] next_image_file2 = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = self.read_image(image_file) ##### supporting frames if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): T = self.timestep_delta_range temp = prev_image_file1.split('/') prev_nm = temp[len(temp) - 1] ref_idx = int(prev_nm.replace('.jpg', '')) ### setting deltas prev_delta1 = -1 prev_delta2 = -2 next_delta1 = 1 next_delta2 = 2 #### image indices prev_idx1 = ref_idx + prev_delta1 prev_idx2 = ref_idx + prev_delta2 next_idx1 = ref_idx + next_delta1 next_idx2 = ref_idx + next_delta2 if 'nframes' in db_rec: nframes = db_rec['nframes'] if not self.is_posetrack18: prev_idx1 = np.clip(prev_idx1, 1, nframes) prev_idx2 = np.clip(prev_idx2, 1, nframes) next_idx1 = np.clip(next_idx1, 1, nframes) next_idx2 = np.clip(next_idx2, 1, nframes) else: prev_idx1 = np.clip(prev_idx1, 0, nframes - 1) prev_idx2 = np.clip(prev_idx2, 0, nframes - 1) next_idx1 = np.clip(next_idx1, 0, nframes - 1) next_idx2 = np.clip(next_idx2, 0, nframes - 1) if self.is_posetrack18: z = 6 else: z = 8 ### delta -1 new_prev_image_file1 = prev_image_file1.replace( prev_nm, str(prev_idx1).zfill(z) + '.jpg') #### delta -2 new_prev_image_file2 = prev_image_file1.replace( prev_nm, str(prev_idx2).zfill(z) + '.jpg') ### delta 1 new_next_image_file1 = next_image_file1.replace( prev_nm, str(next_idx1).zfill(z) + '.jpg') #### delta 2 new_next_image_file2 = next_image_file1.replace( prev_nm, str(next_idx2).zfill(z) + '.jpg') ###### checking for files existence if os.path.exists(new_prev_image_file1): prev_image_file1 = new_prev_image_file1 if os.path.exists(new_prev_image_file2): prev_image_file2 = new_prev_image_file2 if os.path.exists(new_next_image_file1): next_image_file1 = new_next_image_file1 if os.path.exists(new_next_image_file2): next_image_file2 = new_next_image_file2 ########## data_numpy_prev1 = self.read_image(prev_image_file1) data_numpy_prev2 = self.read_image(prev_image_file2) data_numpy_next1 = self.read_image(next_image_file1) data_numpy_next2 = self.read_image(next_image_file2) ########### if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): data_numpy_prev1 = cv2.cvtColor(data_numpy_prev1, cv2.COLOR_BGR2RGB) data_numpy_prev2 = cv2.cvtColor(data_numpy_prev2, cv2.COLOR_BGR2RGB) data_numpy_next1 = cv2.cvtColor(data_numpy_next1, cv2.COLOR_BGR2RGB) data_numpy_next2 = cv2.cvtColor(data_numpy_next2, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): if data_numpy_prev1 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file1)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file1)) if data_numpy_prev2 is None: logger.error( '=> PREV SUP: fail to read {}'.format(prev_image_file2)) raise ValueError( 'PREV SUP: Fail to read {}'.format(prev_image_file2)) if data_numpy_next1 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file1)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file1)) if data_numpy_next2 is None: logger.error( '=> NEXT SUP: fail to read {}'.format(next_image_file2)) raise ValueError( 'NEXT SUP: Fail to read {}'.format(next_image_file2)) ########## joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] ##### if (self.is_train and self.use_warping_train) or ( not self.is_train and self.use_warping_test): data_numpy_prev1 = data_numpy_prev1[:, ::-1, :] data_numpy_prev2 = data_numpy_prev2[:, ::-1, :] data_numpy_next1 = data_numpy_next1[:, ::-1, :] data_numpy_next2 = data_numpy_next2[:, ::-1, :] ########## joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = cv2.warpAffine( data_numpy_prev1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_prev2 = cv2.warpAffine( data_numpy_prev2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next1 = cv2.warpAffine( data_numpy_next1, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) input_next2 = cv2.warpAffine( data_numpy_next2, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) ######### if self.transform: input = self.transform(input) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): input_prev1 = self.transform(input_prev1) input_prev2 = self.transform(input_prev2) input_next1 = self.transform(input_next1) input_next2 = self.transform(input_next2) ############ for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) if (self.is_train and self.use_warping_train) or (not self.is_train and self.use_warping_test): meta = { 'image': image_file, 'sup_image': prev_image_file1, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta else: meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) # load data # xiaofeng modify it for data fetch accelerate data_numpy = db_rec['image'] filename = db_rec['filename'] if data_numpy is None: logger.error('=> fail to read {}'.format(idx)) raise ValueError('Fail to read {}'.format(idx)) if 'fovea' in db_rec.keys(): fovea = np.array(db_rec['fovea']) else: fovea = np.array([-1, -1]) # xiaofeng add for test # gray_trans = iaa.Grayscale(alpha=0.5) # im = data_numpy[:, :, ::-1] # Change channels to RGB # im = gray_trans.augment_image(im) # data_numpy = im[:, :, ::-1] # Change channels to RGB # alpha = 0.5 # img_temp = data_numpy.copy() # # img_gray = (img_temp[:, :, 0] + img_temp[:, :, 1] + img_temp[:, :, 2]) / 3 # img_gray = img_temp[:, :, 0] * 0.11 + img_temp[:, :, 1] * 0.59 + img_temp[:, :, 2] * 0.3 # img_gray2 = img_gray * alpha # img_gray2 = img_gray2.reshape(img_gray2.shape[0], img_gray2.shape[1], -1) # img_gray3 = np.tile(img_gray2, [1, 1, 3]) # data_numpy = data_numpy.astype(np.float) # data_numpy = data_numpy * alpha + img_gray3 # # cmax = data_numpy.max() # Thr0 = 250 # if (cmax > Thr0): # cmax = Thr0 # d2 = data_numpy[data_numpy <= Thr0] # cmax2 = d2.max() # data = (data_numpy.clip(0, cmax2)).astype(np.uint16) # else: # data = (data_numpy.clip(0, cmax)).astype(np.uint16) # cmax2 = cmax # # scale = float(255.0) / cmax2 # if scale == 0: # scale = 1 # bytedata = (data - 0) * scale # data_numpy = (bytedata.clip(0, 255)).astype(np.uint8) # xiaofeng -- end of the trick # data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2GRAY) # data_numpy = data_numpy.reshape(data_numpy.shape[0], data_numpy.shape[1], -1) # if data_numpy.shape[2] == 1: # # repeat 3 times to make fake RGB images # data_numpy = np.tile(data_numpy, [1, 1, 3]) # prepare for refuge2 final submission - 'Refuge2-Ext' # image size is 4288x2848 if self.trial_enable: dh, dw = data_numpy.shape[:2] # crop left 300, right 500 pw_l = 300 pw_r = 500 data_numpy = data_numpy[:, pw_l:(dw - pw_r), :] fovea[0] -= pw_l dh, dw = data_numpy.shape[:2] # TODO -- need to do sth for different image size if dh != self.image_size[1] or dw != self.image_size[0]: data_numpy = cv2.resize(data_numpy, dsize=(self.image_size[0], self.image_size[1]), interpolation=cv2.INTER_LINEAR) h_ratio = self.image_size[1] * 1.0 / dh w_ratio = self.image_size[0] * 1.0 / dw fovea[0] *= w_ratio fovea[1] *= h_ratio if self.is_train: if self.scale_factor > 0 and np.random.rand() > 0.5: sign = 1 if np.random.rand() > 0.5 else -1 scale_factor = 1.0 + np.random.rand() * self.scale_factor * sign dh, dw = data_numpy.shape[:2] nh, nw = int(dh * scale_factor), int(dw * scale_factor) data_numpy = cv2.resize(data_numpy, dsize=(nw, nh), interpolation=cv2.INTER_LINEAR) fovea[0] *= (nw * 1.0 / dw) fovea[1] *= (nh * 1.0 / dh) if sign > 0: # crop ph = (nh - self.image_size[1]) // 2 pw = (nw - self.image_size[0]) // 2 data_numpy = data_numpy[ph:ph + self.image_size[1], pw:pw + self.image_size[0], :] fovea[0] -= pw fovea[1] -= ph else: # pad ph = (self.image_size[1] - nh) // 2 pw = (self.image_size[0] - nw) // 2 data_numpy = np.pad(data_numpy, ((ph, self.image_size[1] - nh - ph), (pw, self.image_size[0] - nw - pw), (0, 0)), mode='constant') fovea[0] += pw fovea[1] += ph image_size = self.image_size # crop image from center crop_size = self.crop_size pw = (image_size[0] - crop_size[0]) // 2 ph = (image_size[1] - crop_size[1]) // 2 data_numpy = data_numpy[ph:ph + crop_size[1], pw:pw + crop_size[0], :] image_size = crop_size fovea[0] -= pw fovea[1] -= ph # get image transform for augmentation c = image_size * 0.5 r = 0 s = 0 if self.is_train: rf = self.rotation_factor sf = self.shift_factor sign = 1 if np.random.randn() > 0.5 else -1 r = np.clip(sign*np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 sign = 1 if np.random.randn() > 0.5 else -1 s = sign * np.random.rand() * sf if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] fovea = fliplr_coord(fovea, data_numpy.shape[1]) c[0] = data_numpy.shape[1] - c[0] - 1 # xiaofeng test, don't do affine always affine_applied = True if self.is_train and np.random.randn() > 0.9: r = 0 s = 0 affine_applied = False # print("ignore affine") trans = get_affine_transform(c, r, image_size, shift=s) input = cv2.warpAffine(data_numpy, trans, (int(image_size[0]), int(image_size[1])), flags=cv2.INTER_LINEAR) fovea = affine_transform(fovea, trans) if self.is_train: patch_size = self.patch_size.astype(np.int32) pw = np.random.randint(0, int(image_size[0] - patch_size[0] + 1)) ph = np.random.randint(0, int(image_size[1] - patch_size[1] + 1)) orig_fovea = copy.deepcopy(fovea) fovea[0] -= pw fovea[1] -= ph while (fovea[0] < 0 or fovea[1] < 0 or fovea[0] >= patch_size[0] or fovea[1] >= patch_size[1]): pw = np.random.randint(0, int(image_size[0] - patch_size[0] + 1)) ph = np.random.randint(0, int(image_size[1] - patch_size[1] + 1)) fovea[0] = orig_fovea[0] - pw fovea[1] = orig_fovea[1] - ph input = input[ph:ph + patch_size[1], pw:pw + patch_size[0], :] # print("fovea, orig_fovea, pw, ph, input.shape: ", fovea, orig_fovea, pw, ph, input.shape) # print("fovea, pw, ph, input.shape: ", fovea, pw, ph, input.shape) try: if self.transform: input = self.transform(input) except: print("crash info: ", fovea, input.shape, affine_applied) # print("image: %s=d" %(idx)) # print("fovea and size: ", fovea, input.shape) meta = {'fovea': fovea, 'image': filename} if self.is_train: heatmap_ds, heatmap_roi, roi_center, pixel_in_roi, offset_in_roi, fovea, fovea_in_roi, target_weight = \ self.generate_target(input, fovea) # xiaofeng change if self.clahe_enaled: data_numpy = copy.deepcopy(input) b, g, r = cv2.split(data_numpy) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) b = clahe.apply(b) g = clahe.apply(g) r = clahe.apply(r) data_numpy = cv2.merge([b, g, r]) input_roi = crop_and_resize( data_numpy.unsqueeze(0), torch.from_numpy(roi_center).unsqueeze(0), output_size=2 * self.region_radius, scale=1.0)[0] else: # crop ROI input_roi = crop_and_resize( input.unsqueeze(0), torch.from_numpy(roi_center).unsqueeze(0), output_size=2 * self.region_radius, scale=1.0)[0] heatmap_ds = torch.from_numpy(heatmap_ds).float() heatmap_roi = torch.from_numpy(heatmap_roi).float() roi_center = torch.from_numpy(roi_center).float() pixel_in_roi = torch.from_numpy(pixel_in_roi).float() offset_in_roi = torch.from_numpy(offset_in_roi).float() fovea = torch.from_numpy(fovea).float() fovea_in_roi = torch.from_numpy(fovea_in_roi).float() meta.update({ 'roi_center': roi_center, 'pixel_in_roi': pixel_in_roi, 'fovea_in_roi': fovea_in_roi }) return input, input_roi, heatmap_ds, heatmap_roi, offset_in_roi, target_weight, meta else: return input, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_dir = 'images.zip@' if self.data_format == 'zip' else '' image_file = osp.join(self.root, db_rec['source'], image_dir, 'images', db_rec['image']) if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) joints = db_rec['joints_2d'].copy() # [union_joints, 2] joints_vis = db_rec['joints_vis'].copy()[:, :2] # [union_joints, 2] assert len(joints) == self.num_joints assert len(joints_vis) == self.num_joints # crop and scale according to ground truth center = np.array(db_rec['center']).copy() scale = np.array(db_rec['scale']).copy() rotation = 0 if self.is_train and db_rec['source'] == 'mpii': sf = self.mpii_scale_factor rf = self.mpii_rotation_factor scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) rotation = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \ if random.random() <= 0.6 else 0 if self.mpii_flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.mpii_flip_pairs) center[0] = data_numpy.shape[1] - center[0] - 1 trans = get_affine_transform(center, scale, rotation, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) visible_joints = joints_vis[:, 0] > 0 if np.any(visible_joints): joints[visible_joints, :2] = affine_transform( joints[visible_joints, :2], trans) # zero_indices = np.any( # np.concatenate((joints[:, :2]<0, # joints[:, [0]] >= self.image_size[0], # joints[:, [1]] >= self.image_size[1]), # axis=1), # axis=1) # joints_vis[zero_indices, :] = 0 target, target_weight = self.generate_target(joints, joints_vis, db_rec['source']) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'scale': scale, 'center': center, 'rotation': rotation, 'joints_2d': db_rec['joints_2d'], 'joints_2d_transformed': joints, 'joints_vis': joints_vis, 'source': db_rec['source'] } return input, target, target_weight, meta
def __getitem__(self, idx): # 根据 idx 从db获取样本信息 db_rec = copy.deepcopy(self.db[idx]) # 获取图像名 image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 如果数据格式为zip则解压 if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 否则直接读取图像,获得像素值 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 转化为rgb格式 if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 如果读取到的数据不为numpy格式则报错 if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) # 获取人体关键点坐标 joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] # 获取训练样本转化之后的center以及scale c = db_rec['center'] s = db_rec['scale'] # 如果训练样本中没有设置score,则加载该属性,并且设置为1 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 # 如果是进行训练 if self.is_train: # 如果可见关键点大于人体一半关键点,并且生成的随机数小于self.prob_half_body=0.3 if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): # 重新调整center、scale c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 缩放因子scale_factor=0.35,以及旋转因子rotation_factor=0.35 sf = self.scale_factor rf = self.rotation_factor # s大小为[1-0.35=0.65, 1+0.35=1.35]之间 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # r大小为[-2*45=95, 2*45=90]之间 r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 进行数据水平翻转 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 # 进行仿射变换,样本数据关键点发生角度旋转之后,每个像素也旋转到对应位置 # 获得旋转矩阵 trans = get_affine_transform(c, s, r, self.image_size) # 根据旋转矩阵进行仿射变换 # 通过仿射变换截取实例图片 input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 进行正则化,形状改变等 if self.transform: input = self.transform(input) # 对人体关键点也进行仿射变换 for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # 获得ground truch,热图target[17, 64, 48],target_weight[17, 1] target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # cv2.imwrite('ori_img.jpg', data_numpy[:, :, ::-1]) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] if 'interference' in db_rec.keys(): interference_joints = db_rec['interference'] interference_joints_vis = db_rec['interference_vis'] else: interference_joints = [joints] interference_joints_vis = [joints_vis] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 size = db_rec['obj_size'] r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 for i in range(len(interference_joints)): interference_joints[i], interference_joints_vis[ i] = fliplr_joints(interference_joints[i], interference_joints_vis[i], data_numpy.shape[1], self.flip_pairs) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # cv2.imwrite('img.jpg',input[:,:,::-1]) if self.transform: input = self.transform(input) # relation_joints = [] for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # all_points = np.asarray(np.where(target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 1] # interference joints heatmaps inter_target = np.zeros_like(target) inter_target_weight = np.zeros_like(target_weight) for i in range(len(interference_joints)): inter_joints = interference_joints[i] inter_joints_vis = interference_joints_vis[i] for j in range(self.num_joints): if inter_joints_vis[j, 0] > 0.0: inter_joints[j, 0:2] = affine_transform( inter_joints[j, 0:2], trans) _inter_target, _inter_target_weight = self.generate_target( inter_joints, inter_joints_vis) inter_target = np.maximum(inter_target, _inter_target) inter_target_weight = np.maximum(inter_target_weight, _inter_target_weight) # if inter_target.max()>0: # all_points = np.asarray(np.where(inter_target == 1))[::-1].transpose() # for p in all_points: # relation_joints += [c[0], c[1], size[0], size[1], p[0], p[1], p[2], 0] # all_ins_target = np.maximum(inter_target, target) all_ins_target = np.maximum(inter_target * 0.5, target) # points = self.generate_candidate_points_from_heatmaps(inter_target) all_ins_target_weight = np.maximum(inter_target_weight, target_weight) # cv2.imwrite('heatmap.jpg',np.max(target,axis=0)*255) # cv2.imwrite('inter_heatmap.jpg', np.max(inter_target, axis=0) * 255) # relation labels # relation_joints = np.asarray(relation_joints).reshape((-1,8)) kpts_onehots = self.heatmap2onehot(target) # if kpts_onehots.shape[0]!=15: # print(target.shape) # target_amaps, target_aweights = self.generate_association_map_from_gt_heatmaps(target, all_ins_target) # amaps = self.generate_association_map_from_labels(relation_joints) # max_points = self.num_joints * 5 # num_points = len(relation_joints) if len(relation_joints) <= max_points else max_points # target_relation_points = np.zeros((max_points, 8)) # target_amaps = np.zeros((max_points, max_points)) # target_relation_points[:num_points] = relation_joints[:num_points] # target_amaps[:num_points, :num_points] = amaps[:num_points, :num_points] # heatmap labels target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) all_ins_target = torch.from_numpy(all_ins_target) all_ins_target_weight = torch.from_numpy(all_ins_target_weight) # target_amaps = torch.from_numpy(target_amaps) # target_aweights = torch.from_numpy(target_aweights) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score, # 'relation_joints': target_relation_points, # 'num_points': num_points, # 'association_maps': target_amaps, # 'association_weights': target_aweights, 'interference_maps': inter_target, 'kpt_cat_maps': kpts_onehots, } # return input, target, target_weight, meta return input, target, target_weight, all_ins_target, all_ins_target_weight, meta
def __getitem__(self, idx): # 【c】db_rec是db的其中一个,是啥来着,一张图及其相关信息? db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] # db是数据集 filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' # 【c】总数?batch? if self.data_format == 'zip': # 解压 from utils import zipreader # 【see】如果要用才导 data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【l】 else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # 【】随便挑一个选项? if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) # 【l】为啥要转,不是该rgb2bgr? if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) # 【see】语法不会报错但是完全影响了后面的结果,因此让其主动报错 raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] # 【c】3d? joints_vis = db_rec['joints_3d_vis'] # 【】之前那个joints_vis就是从这儿获取的吧? c = db_rec['center'] s = db_rec['scale'] # 数据集标注的 # 【】谁的score,还是说暂时只用来说明非空 score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: # 训练集才求半身 if (np.sum(joints_vis[:, 0]) > self. num_joints_half_body # 【】第0列元素求和;那么就是第一列为0,1?那么就是所有的点都有? and np.random.rand() < self.prob_half_body): # 【c】第二个是要采取半身的概率,为什么不在预处理做 c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body # 取到了上半身或下半身的点就将c和s替换掉原标注的 sf = self.scale_factor rf = self.rotation_factor # 缩放旋转因子 s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) # 【l】取最大? r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 # 【c】 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] # 将图像值水平翻转 joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) # GT坐标 c[0] = data_numpy.shape[1] - c[0] - 1 # 最右-原==翻转过的因为宽比最右多1 trans = get_affine_transform( c, s, r, self.image_size) # 缩放旋转在transform里定义的,旋转空白怎么解决的? input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) # 【l】应用缩放旋转变换,input的size也变了吧? if self.transform: input = self.transform(input) # 【c】还有另外的变换?从哪儿传入的哪儿定义的? # cut_trans = self._cutpoint(8, 1, 1, point) # input = cut_trans(input) # for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: # 【c】第一列不是0,1?有权重?只对可见点执行?还是说vis是未缺失有标记的点? # 【】对GT坐标也执行,怎么上面那个用的是warpAffine有何不同? joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) # 权重代表什么? # 【】上面都是在对numpy进行变换 target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) # 【c】 meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } # 【】有何用,日志? return input, target, target_weight, meta # 【c】input是Tensor?
def __getitem__(self, idx): db_rec = copy.deepcopy(self.db[idx]) image_file = db_rec['image'] filename = db_rec['filename'] if 'filename' in db_rec else '' imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' if self.data_format == 'zip': from utils import zipreader data_numpy = zipreader.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: data_numpy = cv2.imread( image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) if data_numpy is None: logger.error('=> fail to read {}'.format(image_file)) raise ValueError('Fail to read {}'.format(image_file)) joints = db_rec['joints_3d'] joints_vis = db_rec['joints_3d_vis'] c = db_rec['center'] s = db_rec['scale'] score = db_rec['score'] if 'score' in db_rec else 1 r = 0 if self.is_train: if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body): c_half_body, s_half_body = self.half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if random.random() <= 0.6 else 0 if self.flip and random.random() <= 0.5: data_numpy = data_numpy[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs) c[0] = data_numpy.shape[1] - c[0] - 1 trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) if self.transform: input = self.transform(input) for i in range(self.num_joints): if joints_vis[i, 0] > 0.0: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) target, target_weight = self.generate_target(joints, joints_vis) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) meta = { 'image': image_file, 'filename': filename, 'imgnum': imgnum, 'joints': joints, 'joints_vis': joints_vis, 'center': c, 'scale': s, 'rotation': r, 'score': score } return input, target, target_weight, meta
def __getitem__(self, index): i_name = self.data_image[index] i_meta = self.data_annot[index] # filename check meta_name = i_meta["filename"] assert meta_name == i_name # image load img_path = self.cfg.image / i_name img = cv2.imread(str(img_path)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # visibility and joints pose load meta_visible = sorted(i_meta["is_visible"].items(), key=lambda x: int(x[0])) meta_joints = sorted(i_meta["joint_pos"].items(), key=lambda x: int(x[0])) meta_xywh = i_meta["bbox"] meta_xyxy = [ int(meta_xywh[0] - meta_xywh[2]*.5*1.2), int(meta_xywh[1] - meta_xywh[3]*.5*1.1), int(meta_xywh[0] + meta_xywh[2]*.5*1.3), int(meta_xywh[1] + meta_xywh[3]*.5*1.2), ] # meta_visible = np.array([v[1] for v in meta_visible if v[0] not in ["6", "7"]]).reshape(14, 1) # meta_joints = np.array([v[1] for v in meta_joints if v[0] not in [6, 7]]).reshape(14, 2) meta_visible = np.array([v[1] for v in meta_visible]).reshape(16, 1) meta_joints = np.array([v[1] for v in meta_joints]).reshape(16, 2) # debuging if self.cfg.debug: debug_visualize(self.cfg.debug_path, index, img, meta_visible, meta_joints, meta_xyxy, True, postfix="Original") if self.train: # lr flipping if self.cfg.flip: if np.random.random() <= 0.5: img = img[:, ::-1, :] meta_xywh[0] = img.shape[1] - 1 - meta_xywh[0] meta_joints[:, 0] = img.shape[1] - 1 - meta_joints[:, 0] for (q, w) in self.cfg.flip_pairs: meta_joints_q, meta_joints_w = ( meta_joints[q, :].copy(), meta_joints[w, :].copy(), ) meta_joints[w, :], meta_joints[q, :] = meta_joints_q, meta_joints_w # if self.cfg.debug: # debug_visualize(self.cfg.debug_path, index, img, meta_visible, meta_joints, meta_xyxy, True, postfix="Flipped") # rotating and cropped if self.cfg.affine_transform: centre = np.array([ img.shape[1]/2., img.shape[0]/2. ]) scale = np.array(img.shape[:2][::-1]) rotation = 0 if self.cfg.rotate: if random.random() <= 0.6: rotation = np.clip(np.random.randn()*self.cfg.rotate_factor, -self.cfg.rotate_factor, self.cfg.rotate_factor) trans = get_affine_transform(centre, scale, rotation, (img.shape[1], img.shape[0])) # cropped_img = cv2.warpAffine(img, trans, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR) for j in range(self.cfg.joints_num): meta_joints[j, :2] = affine_transform( meta_joints[j, :2], trans) # if self.cfg.debug: # debug_visualize(self.cfg.debug_path, index, cropped_img, meta_visible, meta_joints, meta_xyxy, True, postfix="rotated") # normalize coordinates target_meta_joints, neck2toros_scaler, toros_centre = normalize_coords(meta_joints) visibility = np.array( [idx for idx, v in enumerate(meta_visible.reshape(-1, )) if v==1] ) input_meta_joints = target_meta_joints.copy() if random.random() > 0.2: # random_mask_num = int(random.random() * (self.cfg.random_mask_num)) random_mask_num = min(int(random.random() * (self.cfg.random_mask_num+1)), len(visibility)) if random_mask_num != 0: random_mask = np.random.choice(visibility, random_mask_num, replace=False) input_meta_joints[random_mask, :] = 0 assert input_meta_joints.shape[0] == self.cfg.joints_num input_meta_joints = input_meta_joints.flatten() target_meta_joints = target_meta_joints.flatten() returns = { 'inputs': torch.from_numpy(input_meta_joints.copy()).float(), 'targets': torch.from_numpy(target_meta_joints.copy()).float(), 'scaler': neck2toros_scaler, 'centre': torch.from_numpy(toros_centre.copy()).float(), 'bbox': torch.from_numpy(np.array(meta_xyxy).copy()).float(), 'img_path': [str(img_path)], } return returns else: # normalize coordinates target_meta_joints, neck2toros_scaler, toros_centre = normalize_coords(meta_joints) visibility = np.array( [idx for idx, v in enumerate(meta_visible.reshape(-1, ))] ) input_meta_joints = target_meta_joints.copy() random_mask_num = min(int(random.random() * (self.cfg.random_mask_num))+1, len(visibility)) # if random.random() > 0.15: # random_mask_num = int(random.random() * (self.cfg.random_mask_num)) random_mask_num = int(random.random() * 3) + 1 if random_mask_num != 0: random_mask = np.random.choice(visibility, random_mask_num, replace=False) input_meta_joints[random_mask, :] = 0 assert input_meta_joints.shape[0] == self.cfg.joints_num input_meta_joints = input_meta_joints.flatten() target_meta_joints = target_meta_joints.flatten() returns = { 'inputs': torch.from_numpy(input_meta_joints.copy()).float(), 'targets': torch.from_numpy(target_meta_joints.copy()).float(), 'scaler': neck2toros_scaler, 'centre': torch.from_numpy(toros_centre.copy()).float(), 'bbox': torch.from_numpy(np.array(meta_xyxy).copy()).float(), 'img_path': [str(img_path)], } return returns