def load_pose_model(args): pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() return pose_model
def __init__(self): pose_dataset = Mscoco() if args.fast_inference: # True pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() self.pose_model = pose_model
def load_model(opt): pose_dataset = Mscoco() pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) det_model = Darknet("yolo/cfg/yolov3-spp.cfg") det_model.load_weights('models/yolo/yolov3-spp.weights') det_model.net_info['height'] = opt.inp_dim pose_model.cuda() pose_model.eval() det_model.cuda() det_model.eval() return det_model, pose_model
def downModel(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load pose model print('Loading YOLO model...') pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.to(device) pose_model.eval() return pose_model
def __init__(self, videofile, mode='normal'): self.videofile = videofile self.mode = mode self.data_loader = VideoLoader(self.videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = self.data_loader.videoinfo() self.fourcc = fourcc self.fps = fps self.frameSize = frameSize self.det_loader = DetectionLoader(self.data_loader, batchSize=args.detbatch).start() self.det_processor = DetectionProcessor(self.det_loader).start() self.pose_dataset = Mscoco() save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(self.videofile).split('.')[0] + '.mp4') self.writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'DIVX'), self.fps, self.frameSize).start() self.results = list()
frameSize).start() # Load YOLO model print('Loading YOLO model..') sys.stdout.flush() det_model = Darknet("yolo/cfg/yolov3.cfg") det_model.load_weights('models/yolo/yolov3.weights') det_model.net_info['height'] = args.inp_dim det_inp_dim = int(det_model.net_info['height']) assert det_inp_dim % 32 == 0 assert det_inp_dim > 32 det_model.cuda() det_model.eval() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'ld': [], 'dt': [], 'dn': [], 'pt': [], 'pn': []} print('Starting webcam demo, press Ctrl + C to terminate...') sys.stdout.flush() im_names_desc = tqdm(loop()) for i in im_names_desc: try: start_time = getTime()
def main(args): # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_skeleton, pretrained = False) model = torch.nn.DataParallel(model).cuda() test_loader = torch.utils.data.DataLoader( Mscoco(cfg, is_train=False), batch_size=args.batch, shuffle=False, num_workers=args.workers, pin_memory=True) # load training weights checkpoint_file = os.path.join(args.checkpoint, args.test+'.pth.tar') checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) # switch to evaluation mode model.eval() print('testing...') full_result = [] for i, (inputs, meta) in tqdm(enumerate(test_loader)): with torch.no_grad(): input_var = torch.autograd.Variable(inputs.cuda()) # flip the input image if args.flip == True: flip_inputs = inputs.clone() for i, finp in enumerate(flip_inputs): finp = im_to_numpy(finp) finp = cv2.flip(finp, 1) flip_inputs[i] = im_to_torch(finp) flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # compute output global_outputs, refine_output = model(input_var) score_map = refine_output.data.cpu() score_map = score_map.numpy() if args.flip == True: flip_global_outputs, flip_output = model(flip_input_var) flip_score_map = flip_output.data.cpu() flip_score_map = flip_score_map.numpy() for i, fscore in enumerate(flip_score_map): fscore = fscore.transpose((1,2,0)) fscore = cv2.flip(fscore, 1) fscore = list(fscore.transpose((2,0,1))) for (q, w) in cfg.symmetry: fscore[q], fscore[w] = fscore[w], fscore[q] fscore = np.array(fscore) score_map[i] += fscore score_map[i] /= 2 ids = meta['img_id'].numpy() det_scores = meta['det_scores'] for b in range(inputs.size(0)): details = meta['augmentation_details'] single_result_dict = {} single_result = [] single_map = score_map[b] r0 = single_map.copy() r0 /= 255 r0 += 0.5 v_score = np.zeros(17) for p in range(17): single_map[p] /= np.amax(single_map[p]) border = 10 dr = np.zeros((cfg.output_shape[0] + 2*border, cfg.output_shape[1]+2*border)) dr[border:-border, border:-border] = single_map[p].copy() dr = cv2.GaussianBlur(dr, (21, 21), 0) lb = dr.argmax() y, x = np.unravel_index(lb, dr.shape) dr[y, x] = 0 lb = dr.argmax() py, px = np.unravel_index(lb, dr.shape) y -= border x -= border py -= border + y px -= border + x ln = (px ** 2 + py ** 2) ** 0.5 delta = 0.25 if ln > 1e-3: x += delta * px / ln y += delta * py / ln x = max(0, min(x, cfg.output_shape[1] - 1)) y = max(0, min(y, cfg.output_shape[0] - 1)) resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) v_score[p] = float(r0[p, int(round(y)+1e-10), int(round(x)+1e-10)]) single_result.append(resx) single_result.append(resy) single_result.append(1) if len(single_result) != 0: single_result_dict['image_id'] = int(ids[b]) single_result_dict['category_id'] = 1 single_result_dict['keypoints'] = single_result single_result_dict['score'] = float(det_scores[b])*v_score.mean() full_result.append(single_result_dict) result_path = args.result if not isdir(result_path): mkdir_p(result_path) result_file = os.path.join(result_path, 'result.json') with open(result_file,'w') as wf: json.dump(full_result, wf) # evaluate on COCO eval_gt = COCO(cfg.ori_gt_path) eval_dt = eval_gt.loadRes(result_file) cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def handle_video(videofile): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # 获取第 0 个框的人 kpts = [] for i in range(len(final_result)): try: preds = final_result[i]['result'] # preds[i]['keypoints'] (17,2) # preds[i]['kp_score'] (17,1) # preds[i]['proposal_score'] (1) # 选择 y 坐标最大的人 —— 用于打羽毛球视频 max_index = 0 min_index = 0 # max_y = np.mean(preds[0]['keypoints'].data.numpy()[:, 1]) min_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) max_x = np.mean(preds[0]['keypoints'].data.numpy()[:, 0]) for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x < min_x: min_index = k # max_y = tmp_y min_x = tmp_x for k in range(len(preds)): # tmp_y = np.mean(preds[k]['keypoints'].data.numpy()[:, 1]) tmp_x = np.mean(preds[k]['keypoints'].data.numpy()[:, 0]) # if tmp_y > max_y: if tmp_x > max_x: max_index = k max_x = tmp_x mid_index = 0 for k in range(len(preds)): if k == max_index or k == min_index: continue mid_index = k kpt = preds[mid_index]['keypoints'] # kpt = final_result[i]['result'][0]['keypoints'] kpts.append(kpt.data.numpy()) except: # print(sys.exc_info()) print('error...') filename = os.path.basename(args.video).split('.')[0] name = filename + '.npz' kpts = np.array(kpts).astype(np.float32) # print('kpts npz save in ', name) # np.savez_compressed(name, kpts=kpts) return kpts
def call_alphapose(input_dir, output_dir, format='open', batchSize=1): if not os.path.exists(output_dir): os.mkdir(output_dir) for root, dirs, files in os.walk(input_dir): im_names = files print(files) data_loader = ImageLoader(im_names, batchSize=batchSize, format='yolo', dir_path=input_dir).start() det_loader = DetectionLoader(data_loader, batchSize=batchSize).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(False).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, output_dir, _format=format) correct_json_save(output_dir) print('Over')
def handle_video(videofile, no_nan=True): args.video = videofile videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() cam_w = frameSize[0] cam_h = frameSize[1] print('the video is {} f/s'.format(fps)) # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch frames_w_pose = [] frame_cnt = 0 for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break frame_cnt += 1 if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue frames_w_pose.append(frame_cnt - 1) ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() kpts = [] if not no_nan: for i in range(frame_cnt): # initialize to NaN so we can interpolate later kpts.append(np.full((17, 2), np.nan, dtype=np.float32)) for i in range(len(final_result)): try: kpt = final_result[i]['result'][0]['keypoints'] if not no_nan: kpts[frames_w_pose[i]] = kpt.data.numpy() else: kpts.append(kpt.data.numpy()) except: print('error...') kpts = np.array(kpts).astype(np.float32) #filename = os.path.basename(args.video).split('.')[0] #name = filename + '.npz' #print('kpts npz save in ', name) #np.savez_compressed(name, kpts=kpts, fps=fps, cam_w=cam_w, cam_h=cam_h) return kpts, fps, cam_w, cam_h
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== # =========== image =============== # img_path = f'outputs/alpha_pose_{video_name}/split_image/' # args.inputpath = img_path # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # if not len(video_file): # # raise IOError('Error: must contain --video') # if len(img_path) and img_path != '/': # for root, dirs, files in os.walk(img_path): # im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) # else: # raise IOError('Error: must contain either --indir/--list') # # Load input images # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== args.outputpath = f'outputs/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) videofile = args.video mode = args.mode if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model #.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)] #.cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result, video_name
raise IOError('Error: must contain either --indir/--list') # Load input images meanwhile start processes, threads data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo', reso=int(args.inp_dim)).start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() # for multithread displaying det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() # is_train, res, joints, rot_factor if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer, for video. writer = DataWriter(args.save_video).start() # save_video default: False data_len = data_loader.length() im_names_desc = tqdm(range(data_len))
def main(args): inputpath = args.inputpath inputlist = args.inputlist mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if len(inputlist): im_names = open(inputlist, 'r').readlines() elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files else: raise IOError('Error: must contain either --indir/--list') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = { 'dt': [], 'pt': [], 'pn': [] } # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}'.format( dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn'])) ) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print('===========================> Rendering remaining images in the queue...') print('===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).') while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)
raise IOError('Error: must contain either --indir/--list') # Load input images # data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='mtcnn').start() # Load detection loader print('Loading det model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco(format=data_loader.format) if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video, format='mtcnn').start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len))
def alphapose_process_video(videofile, pose_result_handler, inference_steps=1): if not len(videofile): raise IOError("Error: must contain --video") # Load input video print(f"Opening video {videofile}") data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print("Loading YOLO model..") sys.stdout.flush() det_loader = DetectionLoader( data_loader, batchSize=args.detbatch, path=ALPHAPOSE_DIR, inference_steps=inference_steps, ).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset, path=ALPHAPOSE_DIR) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset, path=ALPHAPOSE_DIR) pose_model.cuda() pose_model.eval() runtime_profile = {"dt": [], "pt": [], "pn": []} # Data writer args.save_video = args.video_savefile is not None writer = DataWriter( save_video=args. save_video, # Note: DataWriter uses args.save_video internally as well savepath=args.video_savefile, fourcc=cv2.VideoWriter_fourcc(*"XVID"), fps=fps, frameSize=frameSize, result_handler=pose_result_handler, ).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split("/")[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile["dt"].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile["pt"].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split("/")[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile["pn"].append(post_time) if args.profile: # TQDM im_names_desc.set_description( "det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}" .format( dt=np.mean(runtime_profile["dt"]), pt=np.mean(runtime_profile["pt"]), pn=np.mean(runtime_profile["pn"]), )) print("===========================> Finish Model Running.") if (args.save_img or args.video_savefile) and not args.vis_fast: print( "===========================> Rendering remaining images in the queue..." ) print( "===========================> If this step takes too long, you can enable " "the --vis_fast flag to use fast rendering (real-time).") while writer.running(): pass writer.stop() return writer.results()
def main(args): # check point directory if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_skeleton, pretrained=True) model = torch.nn.DataParallel(model).cuda() # criterion criterion1 = torch.nn.MSELoss().cuda() # global loss criterion2 = torch.nn.MSELoss(reduce=False).cuda() # refine loss # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader(Mscoco(cfg), batch_size=cfg.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train one step train_loss = train(train_loader, model, [criterion1, criterion2], optimizer) print('train_loss:', train_loss) # logging logger.append([epoch + 1, lr, train_loss]) # saving save_model( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
def main(file_name): # videofile = args.video videofile = file_name mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if not len(videofile): raise IOError('Error: must contain --video') # Load input video data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() (fourcc, fps, frameSize) = data_loader.videoinfo() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(videofile).split('.')[0] + '.avi') writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data import ipdb ipdb.set_trace() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath)