def read_data(folder): dataset = { 'img_name' : [], 'joints2D': [], 'bbox': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = sorted(glob.glob(folder + '/labels/'+'*.mat')) for fname in tqdm(file_names): vid_dict=load_mat(fname) imgs = sorted(glob.glob(folder + '/frames/'+ fname.strip().split('/')[-1].split('.')[0]+'/*.jpg')) kp_2d = np.zeros((vid_dict['nframes'], 13, 3)) perm_idxs = get_perm_idxs('pennaction', 'common') kp_2d[:, :, 0] = vid_dict['x'] kp_2d[:, :, 1] = vid_dict['y'] kp_2d[:, :, 2] = vid_dict['visibility'] kp_2d = kp_2d[:, perm_idxs, :] # fix inconsistency n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3)) n_kp_2d[:, :12, :] = kp_2d[:, :-1, :] n_kp_2d[:, 13, :] = kp_2d[:, 12, :] kp_2d = n_kp_2d bbox = np.zeros((vid_dict['nframes'], 4)) for fr_id, fr in enumerate(kp_2d): u, d, l, r = calc_kpt_bound(fr) center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32) c_x, c_y = center[0], center[1] w, h = r - l, d - u w = h = np.where(w / h > 1, w, h) bbox[fr_id,:] = np.array([c_x, c_y, w, h]) dataset['vid_name'].append(np.array([f'{fname}']* vid_dict['nframes'])) dataset['img_name'].append(np.array(imgs)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(bbox) features = extract_features(model, np.array(imgs) , bbox, dataset='pennaction', debug=False) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) return dataset
def read_data_train(dataset_path, debug=False): h, w = 2048, 2048 dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'bbox': [], 'img_name': [], 'features': [], } model = spin.get_pretrained_hmr() # training data user_list = range(1, 9) seq_list = range(1, 3) vid_list = list(range(3)) + list(range(4, 9)) # product = product(user_list, seq_list, vid_list) # user_i, seq_i, vid_i = product[process_id] for user_i in user_list: for seq_i in seq_list: seq_path = os.path.join(dataset_path, 'S' + str(user_i), 'Seq' + str(seq_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot.mat') annot2 = sio.loadmat(annot_file)['annot2'] annot3 = sio.loadmat(annot_file)['annot3'] # calibration file and camera parameters for j, vid_i in enumerate(vid_list): # image folder imgs_path = os.path.join(seq_path, 'video_' + str(vid_i)) # per frame pattern = os.path.join(imgs_path, '*.jpg') img_list = sorted(glob.glob(pattern)) vid_used_frames = [] vid_used_joints = [] vid_used_bbox = [] vid_segments = [] vid_uniq_id = "subj" + str(user_i) + '_seq' + str( seq_i) + "_vid" + str(vid_i) + "_seg0" for i, img_i in tqdm_enumerate(img_list): # for each image we store the relevant annotations img_name = img_i.split('/')[-1] joints_2d_raw = np.reshape(annot2[vid_i][0][i], (1, 28, 2)) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 28, 1)), axis=2) joints_2d = convert_kps(joints_2d_raw, "mpii3d", "spin").reshape((-1, 3)) # visualize = True # if visualize == True and i == 500: # import matplotlib.pyplot as plt # # frame = cv2.cvtColor(cv2.imread(img_i), cv2.COLOR_BGR2RGB) # # for k in range(49): # kp = joints_2d[k] # # frame = cv2.circle( # frame.copy(), # (int(kp[0]), int(kp[1])), # thickness=3, # color=(255, 0, 0), # radius=5, # ) # # cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, # (0, 255, 0), # thickness=3) # # plt.imshow(frame) # plt.show() joints_3d_raw = np.reshape(annot3[vid_i][0][i], (1, 28, 3)) / 1000 joints_3d = convert_kps(joints_3d_raw, "mpii3d", "spin").reshape((-1, 3)) bbox = get_bbox_from_kp2d( joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4) joints_3d = joints_3d - joints_3d[39] # 4 is the root # check that all joints are visible x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0) y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < joints_2d.shape[0]: vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1])+ "_seg" +\ str(int(dataset['vid_name'][-1].split("_")[-1][3:])+1) continue dataset['vid_name'].append(vid_uniq_id) dataset['frame_id'].append(img_name.split(".")[0]) dataset['img_name'].append(img_i) dataset['joints2D'].append(joints_2d) dataset['joints3D'].append(joints_3d) dataset['bbox'].append(bbox) vid_segments.append(vid_uniq_id) vid_used_frames.append(img_i) vid_used_joints.append(joints_2d) vid_used_bbox.append(bbox) vid_segments = np.array(vid_segments) ids = np.zeros((len(set(vid_segments)) + 1)) ids[-1] = len(vid_used_frames) + 1 if (np.where( vid_segments[:-1] != vid_segments[1:])[0]).size != 0: ids[1:-1] = (np.where( vid_segments[:-1] != vid_segments[1:])[0]) + 1 for i in tqdm(range(len(set(vid_segments)))): features = extract_features( model, np.array(vid_used_frames)[int(ids[i]):int(ids[i + 1])], vid_used_bbox[int(ids[i]):int((ids[i + 1]))], kp_2d=np.array( vid_used_joints)[int(ids[i]):int(ids[i + 1])], dataset='spin', debug=False) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) dataset['features'] = np.concatenate(dataset['features']) return dataset
def read_test_data(dataset_path): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'bbox': [], 'img_name': [], 'features': [], "valid_i": [] } model = spin.get_pretrained_hmr() user_list = range(1, 7) for user_i in user_list: print('Subject', user_i) seq_path = os.path.join(dataset_path, 'mpi_inf_3dhp_test_set', 'TS' + str(user_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot_data.mat') mat_as_h5 = h5py.File(annot_file, 'r') annot2 = np.array(mat_as_h5['annot2']) annot3 = np.array(mat_as_h5['univ_annot3']) valid = np.array(mat_as_h5['valid_frame']) vid_used_frames = [] vid_used_joints = [] vid_used_bbox = [] vid_segments = [] vid_uniq_id = "subj" + str(user_i) + "_seg0" for frame_i, valid_i in tqdm(enumerate(valid)): img_i = os.path.join('mpi_inf_3dhp_test_set', 'TS' + str(user_i), 'imageSequence', 'img_' + str(frame_i + 1).zfill(6) + '.jpg') joints_2d_raw = np.expand_dims(annot2[frame_i, 0, :, :], axis=0) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 17, 1)), axis=2) joints_2d = convert_kps(joints_2d_raw, src="mpii3d_test", dst="spin").reshape((-1, 3)) # visualize = True # if visualize == True: # import matplotlib.pyplot as plt # # frame = cv2.cvtColor(cv2.imread(os.path.join(dataset_path, img_i)), cv2.COLOR_BGR2RGB) # # for k in range(49): # kp = joints_2d[k] # # frame = cv2.circle( # frame.copy(), # (int(kp[0]), int(kp[1])), # thickness=3, # color=(255, 0, 0), # radius=5, # ) # # cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), # thickness=3) # # plt.imshow(frame) # plt.show() joints_3d_raw = np.reshape(annot3[frame_i, 0, :, :], (1, 17, 3)) / 1000 joints_3d = convert_kps(joints_3d_raw, "mpii3d_test", "spin").reshape((-1, 3)) joints_3d = joints_3d - joints_3d[ 39] # substract pelvis zero is the root for test bbox = get_bbox_from_kp2d( joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4) # check that all joints are visible img_file = os.path.join(dataset_path, img_i) I = cv2.imread(img_file) h, w, _ = I.shape x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0) y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < joints_2d.shape[0]: vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1]) + "_seg" + \ str(int(dataset['vid_name'][-1].split("_")[-1][3:]) + 1) continue dataset['vid_name'].append(vid_uniq_id) dataset['frame_id'].append(img_file.split("/")[-1].split(".")[0]) dataset['img_name'].append(img_file) dataset['joints2D'].append(joints_2d) dataset['joints3D'].append(joints_3d) dataset['bbox'].append(bbox) dataset['valid_i'].append(valid_i) vid_segments.append(vid_uniq_id) vid_used_frames.append(img_file) vid_used_joints.append(joints_2d) vid_used_bbox.append(bbox) vid_segments = np.array(vid_segments) ids = np.zeros((len(set(vid_segments)) + 1)) ids[-1] = len(vid_used_frames) + 1 if (np.where(vid_segments[:-1] != vid_segments[1:])[0]).size != 0: ids[1:-1] = (np.where( vid_segments[:-1] != vid_segments[1:])[0]) + 1 for i in tqdm(range(len(set(vid_segments)))): features = extract_features( model, np.array(vid_used_frames)[int(ids[i]):int(ids[i + 1])], vid_used_bbox[int(ids[i]):int(ids[i + 1])], kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i + 1])], dataset='spin', debug=False) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) dataset['features'] = np.concatenate(dataset['features']) return dataset
def read_single_record(fname): dataset = { 'vid_name': [], 'frame_id': [], 'joints2D': [], # should contain openpose keypoints only 'features': [], } model = spin.get_pretrained_hmr() sess = tf.Session() for vid_idx, serialized_ex in tqdm( enumerate(tf.python_io.tf_record_iterator(fname))): example = tf.train.Example() example.ParseFromString(serialized_ex) N = int(example.features.feature['meta/N'].int64_list.value[0]) # print(fname, vid_idx, N) # This is a list of length N images_data = example.features.feature[ 'image/encoded'].bytes_list.value xys = example.features.feature['image/xys'].float_list.value xys = np.array(xys).reshape(-1, 2, 14) face_pts = example.features.feature['image/face_pts'].float_list.value face_pts = np.array(face_pts).reshape(-1, 3, 5) toe_pts = example.features.feature['image/toe_pts'].float_list.value if len(toe_pts) == 0: toe_pts = np.zeros(xys.shape[0], 3, 6) toe_pts = np.array(toe_pts).reshape(-1, 3, 6) visibles = example.features.feature[ 'image/visibilities'].int64_list.value visibles = np.array(visibles).reshape(-1, 1, 14) video = [] kp_2d = [] for i in range(N): image = np.expand_dims(sess.run( tf.image.decode_jpeg(images_data[i], channels=3)), axis=0) video.append(image) kp = np.vstack((xys[i], visibles[i])) faces = face_pts[i] toes = toe_pts[i] kp = np.hstack((kp, faces, toes)) if 'image/phis' in example.features.feature.keys(): # Preprocessed, so kps are in [-1, 1] img_shape = 224 # image.shape[0] vis = kp[2, :] kp = ((kp[:2, :] + 1) * 0.5) * img_shape kp = np.vstack((kp, vis)) kp_2d.append(np.expand_dims(kp.T, axis=0)) video = np.concatenate(video, axis=0) kp_2d = np.concatenate(kp_2d, axis=0) vid_name = f'{fname}-{vid_idx}' frame_id = np.arange(N) joints2D = kp_2d dataset['vid_name'].append(np.array([vid_name] * N)) dataset['frame_id'].append(frame_id) dataset['joints2D'].append(joints2D) features = extract_features(model, video, bbox=None, kp_2d=kp_2d, dataset='insta', debug=False) dataset['features'].append(features) print(features.shape) assert features.shape[0] == N for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) for k, v in dataset.items(): print(k, len(v)) return dataset
def read_data(folder, set): dataset = { 'img_name': [], 'joints2D': [], 'bbox': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = glob.glob( osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json')) file_names = sorted(file_names) nn_corrupted = 0 tot_frames = 0 min_frame_number = 8 for fid, fname in tqdm_enumerate(file_names): if fname == osp.join(folder, 'annotations/train/021133_mpii_train.json'): continue with open(fname, 'r') as entry: anns = json.load(entry) # num_frames = anns['images'][0]['nframes'] anns['images'] = [ item for item in anns['images'] if item['is_labeled'] ] num_frames = len(anns['images']) frame2imgname = dict() for el in anns['images']: frame2imgname[el['frame_id']] = el['file_name'] num_people = -1 for x in anns['annotations']: if num_people < x['track_id']: num_people = x['track_id'] num_people += 1 posetrack_joints = get_posetrack_original_kp_names() idxs = [ anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints'] ] for x in anns['annotations']: kps = np.array(x['keypoints']).reshape((17, 3)) kps = kps[idxs, :] x['keypoints'] = list(kps.flatten()) tot_frames += num_people * num_frames for p_id in range(num_people): annot_pid = [(item['keypoints'], item['bbox'], item['image_id']) for item in anns['annotations'] if item['track_id'] == p_id and not (np.count_nonzero(item['keypoints']) == 0)] if len(annot_pid) < min_frame_number: nn_corrupted += len(annot_pid) continue bbox = np.zeros((len(annot_pid), 4)) # perm_idxs = get_perm_idxs('posetrack', 'common') kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0]) // 3, 3)) img_paths = np.zeros((len(annot_pid))) for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid): if (bbox_p[2] == 0 or bbox_p[3] == 0): nn_corrupted += 1 continue img_paths[i] = image_id key2djnts[2::3] = len(key2djnts[2::3]) * [1] kp_2d[i, :] = np.array(key2djnts).reshape( int(len(key2djnts) / 3), 3) # [perm_idxs, :] for kp_loc in kp_2d[i, :]: if kp_loc[0] == 0 and kp_loc[1] == 0: kp_loc[2] = 0 x_tl = bbox_p[0] y_tl = bbox_p[1] w = bbox_p[2] h = bbox_p[3] bbox_p[0] = x_tl + w / 2 bbox_p[1] = y_tl + h / 2 # w = h = np.where(w / h > 1, w, h) w = h = h * 0.8 bbox_p[2] = w bbox_p[3] = h bbox[i, :] = bbox_p img_paths = list(img_paths) img_paths = [ osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ] bbx_idxs = [] for bbx_id, bbx in enumerate(bbox): if np.count_nonzero(bbx) == 0: bbx_idxs += [bbx_id] kp_2d = np.delete(kp_2d, bbx_idxs, 0) img_paths = np.delete(np.array(img_paths), bbx_idxs, 0) bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0) # Convert to common 2d keypoint format if bbox.size == 0 or bbox.shape[0] < min_frame_number: nn_corrupted += 1 continue kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin') dataset['vid_name'].append( np.array([f'{fname}_{p_id}'] * img_paths.shape[0])) dataset['img_name'].append(np.array(img_paths)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(np.array(bbox)) # compute_features features = extract_features( model, np.array(img_paths), bbox, kp_2d=kp_2d, dataset='spin', debug=False, ) assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0] dataset['features'].append(features) print(nn_corrupted, tot_frames) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) for k, v in dataset.items(): print(k, v.shape) return dataset
def read_data(folder, set, debug=False): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], 'valid': [], } model = spin.get_pretrained_hmr() if set == 'val': set = 'test' sequences = [ x.split('.')[0] for x in os.listdir(osp.join(folder, 'sequenceFiles', set)) ] J_regressor = None smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) if set == 'test': J_regressor = torch.from_numpy( np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float() for i, seq in tqdm(enumerate(sequences)): data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl') data = pkl.load(open(data_file, 'rb'), encoding='latin1') img_dir = osp.join(folder, 'imageFiles', seq) num_people = len(data['poses']) num_frames = len(data['img_frame_ids']) assert (data['poses2d'][0].shape[0] == num_frames) for p_id in range(num_people): pose = torch.from_numpy(data['poses'][p_id]).float() shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat( pose.size(0), 1) trans = torch.from_numpy(data['trans'][p_id]).float() j2d = data['poses2d'][p_id].transpose(0, 2, 1) cam_pose = data['cam_poses'] campose_valid = data['campose_valid'][p_id] # ======== Align the mesh params ======== # rot = pose[:, :3] rot_mat = batch_rodrigues(rot) Rc = torch.from_numpy(cam_pose[:, :3, :3]).float() Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3)) rot = rotation_matrix_to_angle_axis(Rs) pose[:, :3] = rot # ======== Align the mesh params ======== # output = smpl(betas=shape, body_pose=pose[:, 3:], global_orient=pose[:, :3], transl=trans) # verts = output.vertices j3d = output.joints if J_regressor is not None: vertices = output.vertices J_regressor_batch = J_regressor[None, :].expand( vertices.shape[0], -1, -1).to(vertices.device) j3d = torch.matmul(J_regressor_batch, vertices) j3d = j3d[:, H36M_TO_J14, :] img_paths = [] for i_frame in range(num_frames): img_path = os.path.join(img_dir + '/image_{:05d}.jpg'.format(i_frame)) img_paths.append(img_path) bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( j2d, vis_thresh=VIS_THRESH, sigma=8) # process bbox_params c_x = bbox_params[:, 0] c_y = bbox_params[:, 1] scale = bbox_params[:, 2] w = h = 150. / scale w = h = h * 1.1 bbox = np.vstack([c_x, c_y, w, h]).T # process keypoints j2d[:, :, 2] = j2d[:, :, 2] > 0.3 # set the visibility flags # Convert to common 2d keypoint format perm_idxs = get_perm_idxs('3dpw', 'common') perm_idxs += [0, 0] # no neck, top head j2d = j2d[:, perm_idxs] j2d[:, 12:, 2] = 0.0 # print('j2d', j2d[time_pt1:time_pt2].shape) # print('campose', campose_valid[time_pt1:time_pt2].shape) img_paths_array = np.array(img_paths)[time_pt1:time_pt2] dataset['vid_name'].append( np.array([f'{seq}_{p_id}'] * num_frames)[time_pt1:time_pt2]) dataset['frame_id'].append( np.arange(0, num_frames)[time_pt1:time_pt2]) dataset['img_name'].append(img_paths_array) dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2]) dataset['joints2D'].append(j2d[time_pt1:time_pt2]) dataset['shape'].append(shape.numpy()[time_pt1:time_pt2]) dataset['pose'].append(pose.numpy()[time_pt1:time_pt2]) dataset['bbox'].append(bbox) dataset['valid'].append(campose_valid[time_pt1:time_pt2]) features = extract_features(model, img_paths_array, bbox, kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) # Filter out keypoints indices_to_use = np.where( (dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] for k in dataset.keys(): dataset[k] = dataset[k][indices_to_use] return dataset