def train_dataloader(self): if self.hparams.no_data_aug: tfms = None else: tfms = augmentation(0.9) return self.__dataloader(tfms, self.train_fnames)
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, skeleton_path, original_shape, action_label, frame_num, start_frame_idx = data[ 'img_path'], data['skeleton_path'], data['original_shape'], data[ 'action_label'], data['frame_num'], data['start_frame_idx'] # video load video, skeleton_frame_idxs = load_video(img_path, frame_num, start_frame_idx) resized_shape = video.shape[1:3] # augmentation video, img2aug_trans, aug2img_trans, do_flip = augmentation( video, self.data_split) video = video.transpose(0, 3, 1, 2).astype( np.float32) / 255. # frame_num, channel_dim, height, width # skeleton information load pose_coords, pose_scores = self.load_skeleton(skeleton_path, skeleton_frame_idxs, original_shape, resized_shape) # process skeleton information pose_coords, pose_scores = process_skeleton(pose_coords, pose_scores, img2aug_trans, do_flip, self.flip_pairs, self.joint_num, resized_shape) """ # for debug # keypoint visualization for i in range(cfg.frame_per_seg): img = video[i,::-1,:,:].transpose(1,2,0) * 255 person_num = len(pose_coords[i]) for p in range(person_num): #for j in range(self.joint_num): #coord = (int(pose_coords[i][p][j][0]), int(pose_coords[i][p][j][1])) #cv2.circle(img, coord, radius=3, color=(255,0,0), thickness=-1, lineType=cv2.LINE_AA) #cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '_' + str(j) + '.jpg', img) coord = pose_coords[i][p].copy() coord[:,0] = coord[:,0] / cfg.input_hm_shape[1] * cfg.input_img_shape[1] coord[:,1] = coord[:,1] / cfg.input_hm_shape[0] * cfg.input_img_shape[0] img = vis_keypoints(img, pose_coords[i][p] * 4, self.skeleton) cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '.jpg', img) """ inputs = { 'video': video, 'pose_coords': pose_coords, 'pose_scores': pose_scores } targets = {'action_label': action_label} meta_info = {'img_id': data['img_id']} return inputs, targets, meta_info
def __getitem__(self, idx): data = self.datalist[idx] img_path, bbox, joint, hand_type, hand_type_valid = data[ 'img_path'], data['bbox'], data['joint'], data['hand_type'], data[ 'hand_type_valid'] joint_cam = joint['cam_coord'].copy() joint_img = joint['img_coord'].copy() joint_valid = joint['valid'].copy() hand_type = self.handtype_str2array(hand_type) joint_coord = np.concatenate((joint_img, joint_cam[:, 2, None]), 1) # image load img = load_img(img_path) # augmentation img, joint_coord, joint_valid, hand_type, inv_trans = augmentation( img, bbox, joint_coord, joint_valid, hand_type, self.mode, self.joint_type) rel_root_depth = np.array([ joint_coord[self.root_joint_idx['left'], 2] - joint_coord[self.root_joint_idx['right'], 2] ], dtype=np.float32).reshape(1) root_valid = np.array( [ joint_valid[self.root_joint_idx['right']] * joint_valid[self.root_joint_idx['left']] ], dtype=np.float32).reshape( 1) if hand_type[0] * hand_type[1] == 1 else np.zeros( (1), dtype=np.float32) # transform to output heatmap space joint_coord, joint_valid, rel_root_depth, root_valid = transform_input_to_output_space( joint_coord, joint_valid, rel_root_depth, root_valid, self.root_joint_idx, self.joint_type) img = self.transform(img.astype(np.float32)) / 255. inputs = {'img': img} targets = { 'joint_coord': joint_coord, 'rel_root_depth': rel_root_depth, 'hand_type': hand_type } meta_info = { 'joint_valid': joint_valid, 'root_valid': root_valid, 'hand_type_valid': hand_type_valid, 'inv_trans': inv_trans, 'capture': int(data['capture']), 'cam': int(data['cam']), 'frame': int(data['frame']) } return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, bbox, smpl_param = data['img_path'], data['bbox'], data['smpl_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, _, _ = augmentation(img, bbox, self.data_split) img = self.transform(img.astype(np.float32))/255. # smpl coordinates smpl_mesh_cam, smpl_joint_cam = self.get_smpl_coord(smpl_param) inputs = {'img': img} targets = {'fit_mesh_coord_cam': smpl_mesh_cam} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def get_batch_from_txt_files_(self): self.index += 1 #for i, f in enumerate(fs): img = load_image(osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip()) box = self.bs[self.index].split(",") bbox = [] for b in box: bbox.append(float(b.replace("\n", ""))) img, inv_trans = augmentation(img, np.array(bbox), None, None, None, 'test', None) img = self.transform(img.astype(np.float32))/255. # image = cv2.imread(osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip()) # image = cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[0])+int(bbox[2]), int(bbox[0]) + int(bbox[3])), (255,0,0), 3) # print(cv2.imwrite("bbox_testing/" + str(self.index)+".jpg", image)) return img, osp.join('/home/ubuntu/3d-testing/InterHand2.6M/main/', self.fs[self.index]).strip(), inv_trans
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'joint_cam'], data['cam_param'], data['mano_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, _ = augmentation( img, bbox, self.data_split, exclude_flip=True ) # FreiHAND dataset only contains right hands. do not perform flip aug. img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # mano coordinates mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord( mano_param, cam_param) mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth mano_coord_img_xy1 = np.concatenate( (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])), 1) mano_coord_img[:, :2] = np.dot(img2bb_trans, mano_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] root_joint_depth = mano_coord_cam[self.vertex_num + self.root_joint_idx][2] mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] mano_coord_img[:, 2] = (mano_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates mano_mesh_img = mano_coord_img[:self.vertex_num] mano_joint_img = mano_coord_img[self.vertex_num:] mano_mesh_trunc = mano_trunc[:self.vertex_num] mano_joint_trunc = mano_trunc[self.vertex_num:] # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter mano_pose = mano_pose.reshape(-1, 3) root_pose = mano_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) mano_pose[self.root_joint_idx] = root_pose.reshape(3) mano_pose = mano_pose.reshape(-1) # mano coordinate mano_joint_cam = mano_joint_cam - mano_joint_cam[ self.root_joint_idx, None] # root-relative mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose( 1, 0)).transpose(1, 0) orig_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_valid = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy orig_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy inputs = {'img': img} targets = { 'orig_joint_img': orig_joint_img, 'fit_joint_img': mano_joint_img, 'fit_mesh_img': mano_mesh_img, 'orig_joint_cam': orig_joint_cam, 'fit_joint_cam': mano_joint_cam, 'pose_param': mano_pose, 'shape_param': mano_shape } meta_info = { 'orig_joint_valid': orig_joint_valid, 'orig_joint_trunc': orig_joint_trunc, 'fit_joint_trunc': mano_joint_trunc, 'fit_mesh_trunc': mano_mesh_trunc, 'is_valid_fit': float(True), 'is_3D': float(True) } else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, smpl_param, cam_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'smpl_param'], data['cam_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. # muco gt muco_joint_img = data['joint_img'] muco_joint_cam = data['joint_cam'] muco_joint_cam = muco_joint_cam - muco_joint_cam[ self.muco_root_joint_idx, None, :] # root-relative muco_joint_valid = data['joint_valid'] if do_flip: muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0] muco_joint_cam[:, 0] = -muco_joint_cam[:, 0] for pair in self.muco_flip_pairs: muco_joint_img[pair[0], :], muco_joint_img[ pair[1], :] = muco_joint_img[ pair[1], :].copy(), muco_joint_img[pair[0], :].copy() muco_joint_cam[pair[0], :], muco_joint_cam[ pair[1], :] = muco_joint_cam[ pair[1], :].copy(), muco_joint_cam[pair[0], :].copy() muco_joint_valid[pair[0], :], muco_joint_valid[ pair[1], :] = muco_joint_valid[pair[1], :].copy( ), muco_joint_valid[pair[0], :].copy() muco_joint_img_xy1 = np.concatenate( (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1) muco_joint_img[:, :2] = np.dot(img2bb_trans, muco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[ self.muco_root_joint_idx][2] # root-relative muco_joint_img[:, 2] = ( muco_joint_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \ (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # transform muco joints to target db joints muco_joint_img = transform_joint_to_other_db(muco_joint_img, self.muco_joints_name, self.joints_name) muco_joint_cam = transform_joint_to_other_db(muco_joint_cam, self.muco_joints_name, self.joints_name) muco_joint_valid = transform_joint_to_other_db(muco_joint_valid, self.muco_joints_name, self.joints_name) muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc, self.muco_joints_name, self.joints_name) if smpl_param is not None: # smpl coordinates smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])), 1) smpl_coord_img[:, :2] = np.dot(img2bb_trans, smpl_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = ( smpl_coord_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from muco gt, discard it is_valid_fit = True error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam, do_flip) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) # dummy is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # muco coordinate muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter inputs = {'img': img} targets = { 'orig_joint_img': muco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': muco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': muco_joint_valid, 'orig_joint_trunc': muco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(True) } return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[ 'bbox'] # image load and affine transform img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # coco gt coco_joint_img = data['joint_img'] coco_joint_valid = data['joint_valid'] if do_flip: coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0] for pair in self.coco_flip_pairs: coco_joint_img[pair[0], :], coco_joint_img[ pair[1], :] = coco_joint_img[pair[1], :].copy( ), coco_joint_img[pair[0], :].copy() coco_joint_valid[pair[0], :], coco_joint_valid[ pair[1], :] = coco_joint_valid[pair[1], :].copy( ), coco_joint_valid[pair[0], :].copy() coco_joint_img_xy1 = np.concatenate( (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])), 1) coco_joint_img[:, :2] = np.dot(img2bb_trans, coco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] # backup for calculating fitting error _coco_joint_img = coco_joint_img.copy() _coco_joint_valid = coco_joint_valid.copy() # check truncation coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32) # transform coco joints to target db joints coco_joint_img = transform_joint_to_other_db( coco_joint_img, self.coco_joints_name, self.joints_name) coco_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy coco_joint_valid = transform_joint_to_other_db( coco_joint_valid, self.coco_joints_name, self.joints_name) coco_joint_trunc = transform_joint_to_other_db( coco_joint_trunc, self.coco_joints_name, self.joints_name) smplify_result = data['smplify_result'] if smplify_result is not None: # use fitted mesh smpl_param, cam_param = smplify_result[ 'smpl_param'], smplify_result['cam_param'] smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate( (smpl_mesh_cam, smpl_joint_cam)) smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'], cam_param['princpt']) # x,y affine transform, root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like( smpl_coord_img[:, 0:1])), 1) smpl_coord_img[:, :2] = np.dot( img2bb_trans, smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from h36m gt, discard it is_valid_fit = True error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam, cam_param, img2bb_trans, _coco_joint_valid) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[ self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) inputs = {'img': img} targets = { 'orig_joint_img': coco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': coco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': coco_joint_valid, 'orig_joint_trunc': coco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(False) } return inputs, targets, meta_info else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def run_webcam(): args = parse_args() cfg.set_args(args.gpu_ids) cudnn.benchmark = True if cfg.dataset == 'InterHand2.6M': assert args.test_set, 'Test set is required. Select one of test/val' assert args.annot_subset, "Please set proper annotation subset. Select one of all, human_annot, machine_annot" else: args.test_set = 'test' args.annot_subset = 'all' tester = Tester(args.test_epoch) tester._make_batch_generator(args.test_set, args.annot_subset) tester._make_model() detector = YOLO_AKASH("best.pt", save_img=True) test_loader = CustomLoader(transforms.ToTensor(), 'my_hand', 1) cap = cv2.VideoCapture(0) with torch.no_grad(): while (True): ret, frame = cap.read() inputs = {} preds = { 'joint_coord': [], 'rel_root_depth': [], 'hand_type': [] } img = test_loader.process_frame(frame) inputs['img'] = torch.reshape(img, (1, 3, 256, 256)) boxes, confs, labels = detector.detect_image(img) img, inv_trans = augmentation(img, np.array(bbox), None, None, None, 'test', None) # forward out = tester.model(inputs) joint_coord_out = out['joint_coord'].cpu().numpy() rel_root_depth_out = out['rel_root_depth'].cpu().numpy() hand_type_out = out['hand_type'].cpu().numpy() preds['joint_coord'].append(joint_coord_out) preds['rel_root_depth'].append(rel_root_depth_out) preds['hand_type'].append(hand_type_out) # evaluate preds = {k: np.concatenate(v) for k, v in preds.items()} ##Changes need to be done from here onwards !!!!!!!!!!!!!!!!!!!!!!!! keypoint_frame = test_loader.visualize(preds, filename, img_path, inv_trans) cv2.imshow('webcam', keypoint_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()