if len(full_arch_name) > 15: full_arch_name = full_arch_name[:8] + '...' print( '| ' + full_arch_name + ' ' + ' '.join(['| {:.3f}'.format(value) for value in values]) + ' |' ) if __name__ == '__main__': args = parser.parse_args() parse_args(args) if args.regressor == 'pymaf_net': model = pymaf_net(path_config.SMPL_MEAN_PARAMS, pretrained=True) if args.regressor == 'hmr': model = hmr(path_config.SMPL_MEAN_PARAMS) if args.checkpoint is not None: checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=True) model.eval() dataset = COCODataset(None, args.dataset, 'val2014', is_train=False) # Run evaluation args.result_file = None run_evaluation(model, args.dataset, dataset, args.result_file, batch_size=args.batch_size,
def init_fn(self): if self.options.rank == 0: self.summary_writer.add_text('command_args', print_args()) if self.options.regressor == 'hmr': # HMR/SPIN model self.model = hmr(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = SMPL(path_config.SMPL_MODEL_DIR, batch_size=cfg.TRAIN.BATCH_SIZE, create_transl=False).to(self.device) elif self.options.regressor == 'pymaf_net': # PyMAF model self.model = pymaf_net(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = self.model.regressor[0].smpl if self.options.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if self.options.gpu is not None: torch.cuda.set_device(self.options.gpu) self.model.cuda(self.options.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have self.options.batch_size = int(self.options.batch_size / self.options.ngpus_per_node) self.options.workers = int( (self.options.workers + self.options.ngpus_per_node - 1) / self.options.ngpus_per_node) self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( self.model) self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=[self.options.gpu], output_device=self.options.gpu, find_unused_parameters=True) else: self.model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set self.model = torch.nn.parallel.DistributedDataParallel( self.model, find_unused_parameters=True) self.models_dict = {'model': self.model.module} else: self.model = self.model.to(self.device) self.models_dict = {'model': self.model} cudnn.benchmark = True # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=0) self.optimizers_dict = {'optimizer': self.optimizer} if self.options.single_dataset: self.train_ds = BaseDataset(self.options, self.options.single_dataname, is_train=True) else: self.train_ds = MixedDataset(self.options, is_train=True) self.valid_ds = BaseDataset(self.options, self.options.eval_dataset, is_train=False) if self.options.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( self.train_ds) val_sampler = None else: train_sampler = None val_sampler = None self.train_data_loader = DataLoader(self.train_ds, batch_size=self.options.batch_size, num_workers=self.options.workers, pin_memory=cfg.TRAIN.PIN_MEMORY, shuffle=(train_sampler is None), sampler=train_sampler) self.valid_loader = DataLoader(dataset=self.valid_ds, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.TRAIN.PIN_MEMORY, sampler=val_sampler) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) self.evaluation_accumulators = dict.fromkeys([ 'pred_j3d', 'target_j3d', 'target_theta', 'pred_verts', 'target_verts' ]) # Create renderer try: self.renderer = OpenDRenderer() except: print('No renderer for visualization.') self.renderer = None if cfg.MODEL.PyMAF.AUX_SUPV_ON: self.iuv_maker = IUV_Renderer( output_size=cfg.MODEL.PyMAF.DP_HEATMAP_SIZE) self.decay_steps_ind = 1 self.decay_epochs_ind = 1
def main(args): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') if args.image_folder is None: video_file = args.vid_file # ========= [Optional] download the youtube video ========= # if video_file.startswith('https://www.youtube.com'): print(f'Donwloading YouTube video \"{video_file}\"') video_file = download_youtube_clip(video_file, '/tmp') if video_file is None: exit('Youtube url is not valid!') print(f'YouTube Video has been downloaded to {video_file}...') if not os.path.isfile(video_file): exit(f'Input video \"{video_file}\" does not exist!') output_path = os.path.join( args.output_folder, os.path.basename(video_file).replace('.mp4', '')) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) else: image_folder = args.image_folder num_frames = len(os.listdir(image_folder)) img_shape = cv2.imread( osp.join(image_folder, os.listdir(image_folder)[0])).shape output_path = os.path.join(args.output_folder, osp.split(image_folder)[-1]) os.makedirs(output_path, exist_ok=True) print(f'Input video number of frames {num_frames}') if not args.image_based: orig_height, orig_width = img_shape[:2] total_time = time.time() # ========= Run tracking ========= # bbox_scale = 1.0 if args.use_gt: with open(args.anno_file) as f: tracking_anno = json.load(f) tracking_results = {} for tracklet in tracking_anno: track_id = tracklet['idx'] frames = tracklet['frames'] f_id = [] bbox = [] for f in frames: f_id.append(f['frame_id']) x_tl, y_tl = f['rect']['tl']['x'] * orig_width, f['rect'][ 'tl']['y'] * orig_height x_br, y_br = f['rect']['br']['x'] * orig_width, f['rect'][ 'br']['y'] * orig_height x_c, y_c = (x_br + x_tl) / 2., (y_br + y_tl) / 2. w, h = x_br - x_tl, y_br - y_tl wh_max = max(w, h) x_tl, y_tl = x_c - wh_max / 2., y_c - wh_max / 2. bbox.append(np.array([x_c, y_c, wh_max, wh_max])) f_id = np.array(f_id) bbox = np.array(bbox) tracking_results[track_id] = {'frames': f_id, 'bbox': bbox} else: # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Define model ========= # if args.regressor == 'pymaf_net': model = pymaf_net(path_config.SMPL_MEAN_PARAMS, pretrained=True).to(device) elif args.regressor == 'hmr': model = hmr(path_config.SMPL_MEAN_PARAMS).to(device) # ========= Load pretrained weights ========= # if args.checkpoint is not None: checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=True) model.eval() print(f'Loaded pretrained weights from \"{args.checkpoint}\"') # ========= Run pred on each person ========= # if args.recon_result_file: pred_results = joblib.load(args.recon_result_file) print('Loaded results from ' + args.recon_result_file) else: if args.pre_load_imgs: image_file_names = [ osp.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ] image_file_names = sorted(image_file_names) image_file_names = np.array(image_file_names) pre_load_imgs = [] for file_name in image_file_names: pre_load_imgs.append( cv2.cvtColor(cv2.imread(file_name), cv2.COLOR_BGR2RGB)) pre_load_imgs = np.array(pre_load_imgs) print('image_file_names', pre_load_imgs.shape) else: image_file_names = None print(f'Running reconstruction on each tracklet...') pred_time = time.time() pred_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None if args.tracking_method == 'bbox': bboxes = tracking_results[person_id]['bbox'] elif args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] frames = tracking_results[person_id]['frames'] if args.pre_load_imgs: print('image_file_names frames', pre_load_imgs[frames].shape) dataset = Inference(image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, pre_load_imgs=pre_load_imgs[frames]) else: dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) if args.image_based: img_shape = cv2.imread( osp.join(image_folder, os.listdir(image_folder)[frames[0]])).shape orig_height, orig_width = img_shape[:2] bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False dataloader = DataLoader(dataset, batch_size=args.model_batch_size, num_workers=16) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) # batch = batch.unsqueeze(0) batch = batch.to(device) # batch_size, seqlen = batch.shape[:2] batch_size = batch.shape[0] seqlen = 1 preds_dict, _ = model(batch) output = preds_dict['smpl_out'][-1] pred_cam.append(output['theta'][:, :3].reshape( batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape( batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:, 3:75].reshape( batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, 75:].reshape( batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape( batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() orig_cam = convert_crop_cam_to_orig_img(cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } pred_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - pred_time) print(f'FPS: {fps:.2f}') total_time = time.time() - total_time print( f'Total time spent: {total_time:.2f} seconds (including model loading time).' ) print( f'Total FPS (including model loading time): {num_frames / total_time:.2f}.' ) print( f'Saving output results to \"{os.path.join(output_path, "_output.pkl")}\".' ) joblib.dump(pred_results, os.path.join(output_path, "_output.pkl")) if not args.no_render: # ========= Render results as a single video ========= # if args.use_opendr: renderer = OpenDRenderer(resolution=(orig_height, orig_width)) else: renderer = PyRenderer(resolution=(orig_width, orig_height)) output_img_folder = os.path.join( output_path, osp.split(image_folder)[-1] + '_output') os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(pred_results, num_frames) image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) if args.regressor == 'hmr': color_type = 'pink' elif cfg.MODEL.PyMAF.N_ITER == 0 and cfg.MODEL.PyMAF.AUX_SUPV_ON == False: color_type = 'neutral' else: color_type = 'purple' for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if args.render_ratio != 1: img = resize(img, (int(img.shape[0] * args.render_ratio), int(img.shape[1] * args.render_ratio)), anti_aliasing=True) img = (img * 255).astype(np.uint8) raw_img = img.copy() # if args.sideview: # side_img = np.zeros_like(img) if args.empty_bg: empty_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') if args.empty_bg: img, empty_img = renderer(frame_verts[None, :, :] if args.use_opendr else frame_verts, img=[img, empty_img], cam=frame_cam, color_type=color_type, mesh_filename=mesh_filename) else: img = renderer(frame_verts[None, :, :] if args.use_opendr else frame_verts, img=img, cam=frame_cam, color_type=color_type, mesh_filename=mesh_filename) # if args.sideview: # side_img = renderer( # frame_verts, # img=side_img, # cam=frame_cam, # color_type=color_type, # angle=270, # axis=[0,1,0], # ) if args.with_raw: img = np.concatenate([raw_img, img], axis=1) if args.empty_bg: img = np.concatenate([img, empty_img], axis=1) # if args.sideview: # img = np.concatenate([img, side_img], axis=1) # cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.image_based: imsave( os.path.join(output_img_folder, osp.split(img_fname)[-1][:-4] + '.png'), img) else: imsave(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = osp.split( image_folder )[-1] if args.image_folder is not None else os.path.basename( video_file) save_name = f'{vid_name.replace(".mp4", "")}_result.mp4' save_name = os.path.join(output_path, save_name) if not args.image_based: print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) # shutil.rmtree(output_img_folder) # shutil.rmtree(image_folder) print('================= END =================')