def main(args): weight_file = args.weight process_speed = args.process_speed resize_fac = args.resize_factor print('start processing...') # Video input & output data_dir = args.dir json_path = args.coco # load model print('[*]Loading model...') model = get_model('vgg19') model.load_state_dict(torch.load(weight_file)) model = torch.nn.DataParallel(model).cuda() model.float() model.eval() print('Model Ready!') # Video reader t0 = time.time() acc_count = 0 data = get_data(data_dir, json_path) data_count = len(data) for i, (input_path, keypoints) in enumerate(data): input_image = cv2.imread(input_path) t1 = time.time() # generate image with body parts resized_image = cv2.resize(input_image, (0, 0), fx=1 * resize_fac, fy=1 * resize_fac, interpolation=cv2.INTER_CUBIC) to_plot, canvas, joint_list, person_to_joint_assoc = process( model, resized_image, process_speed) kp_count = 0 for c, kps in keypoints: kp_count += c if len(person_to_joint_assoc) == len(keypoints): # human count equal acc_count += 1 if args.verb: cv2.imshow('preview', to_plot) cv2.waitKey(1) t2 = time.time() processBar( i, data_count, '[{}/{}]find {} keypoints in {} humans, groundtruth is {} kps in {} humans. acc:{} process time:{:.3f}, total time:{:.3f}' .format(i, data_count, len(joint_list), len(person_to_joint_assoc), kp_count, len(keypoints), acc_count / (i + 1), (t2 - t1), (t2 - t0)), length=20, end="\n") cv2.destroyAllWindows() processBar(data_count, data_count, '{}/{}, acc:{} total time:{:.3f}'.format( data_count, data_count, acc_count / data_count, (time.time() - t0)), length=20)
def load_openpose_model(weights=os.path.join(dir_name, '../PoseEstimation/network/weight/pose_model.pth'), model_type='vgg19'): model = get_model(model_type) model.load_state_dict(torch.load(weights)) model = torch.nn.DataParallel(model).to(device) model.float() model.eval() return model
def __init__(self, max_size=600): cur_dir = os.path.dirname(os.path.abspath(__file__)) weight_name = os.path.join(cur_dir, 'pose_model_scratch.pth') assert os.path.exists( weight_name), 'open pose model not found at {}'.format(weight_name) self.model = get_model('vgg19') state_dict = torch.load(weight_name) # remove 'module.' prefix state_dict = {k[7:]: v for k, v in state_dict.items()} self.model.load_state_dict(state_dict) self.model = torch.nn.DataParallel(self.model).cuda() self.model.float() self.model.eval() self.max_size = max_size
def create_pose_estimation_model(pretrained, dataset, arch, load_vgg19=None, parallel=True, device_ids=None): # noinspection PyGlobalUndefined global msglogger model = None dataset = dataset.lower() if dataset == 'coco': if arch == 'shufflenetv2': model = rtpose_shufflenetV2.Network(width_multiplier=1.0) if pretrained: msglogger.info( 'No pretrained ShuffleNetV2 model available. Init randomly.' ) elif arch == 'vgg19': model = rtpose_vgg.get_model(trunk='vgg19') if pretrained: model_dir = Path('./pretrained') model_dir.mkdir(exist_ok=True) rtpose_vgg.use_vgg(model, model_path, 'vgg19') if load_vgg19: model.load_state_dict(torch.load(load_vgg19)) elif arch == 'hourglass': model = rtpose_hourglass.hg(num_stacks=8, num_blocks=1, paf_classes=38, ht_classes=19) if pretrained: msglogger.info( 'No pretrained Hourglass model available. Init randomly.') else: raise ValueError('Could not recognize dataset {}'.format(dataset)) msglogger.info("=> creating a %s%s model with the %s dataset" % ('pretrained ' if pretrained else '', arch, dataset)) if torch.cuda.is_available() and device_ids != -1: device = 'cuda' if parallel: print('Data parallel: device_ids =', device_ids) net = torch.nn.DataParallel(model, device_ids=device_ids) else: device = 'cpu' return model.to(device)
def pose_model(): # Pose estimation (OpenPose) openpose_dir = Path('../src/pytorch_Realtime_Multi-Person_Pose_Estimation/') sys.path.append(str(openpose_dir)) # get_ipython().run_line_magic('load_ext', 'autoreload') # get_ipython().run_line_magic('autoreload', '2') # openpose from network.rtpose_vgg import get_model weight_name = openpose_dir.joinpath('network/weight/pose_model.pth') # weight_name.mkdir(exist_ok=True) model = get_model('vgg19') model.load_state_dict(torch.load(str(weight_name))) model = torch.nn.DataParallel(model).cuda() # model.float() # model.eval() return model
# validation data valid_data = get_loader(args.json_path, args.data_dir, args.mask_dir, 368, 8, preprocess='vgg', training=False, batch_size=args.batch_size, params_transform=params_transform, shuffle=False, num_workers=4) print('val dataset len: {}'.format(len(valid_data.dataset))) # model model = get_model(trunk='vgg19') #model = encoding.nn.DataParallelModel(model, device_ids=args.gpu_ids) model = torch.nn.DataParallel(model).cuda() # load pretrained use_vgg(model, args.model_path, 'vgg19') # Fix the VGG weights first, and then the weights will be released for i in range(20): for param in model.module.model0[i].parameters(): param.requires_grad = False trainable_vars = [param for param in model.parameters() if param.requires_grad] optimizer = torch.optim.SGD(trainable_vars, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay,
def _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs, restore): # create tensorboard writer = SummaryWriter(path_to_logs_dir) # dataloader train_dataset = Dataset(class_name=class_name, path_to_data=path_to_data_dir) train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=0, drop_last=True) val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val') val_dataloader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=0, drop_last=True) # load model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') # restore model if restore: model.load_state_dict(torch.load(restore)) model.train() # freeze low-level layer for i in range(20): for param in model.model0[i].parameters(): param.requires_grad = False trainable_vars = [ param for param in model.parameters() if param.requires_grad ] optimizer = torch.optim.Adam(trainable_vars, lr=0.0001) epoch = 0 step = 1 best_mse = 1.0 while epoch != epochs: for batch_index, (images, heatmaps_target, pafs_target, _, _) in enumerate(train_dataloader): images = images.cuda() _, saved_for_loss = model(images) loss, heatmaps_losses, pafs_losses = _loss(saved_for_loss, heatmaps_target.cuda(), pafs_target.cuda()) optimizer.zero_grad() loss.backward() optimizer.step() if step % 10 == 0: print('Epoch: {}, Step: {}, Loss: {}'.format( epoch, step, loss.data.item())) writer.add_scalar('train_total_loss/loss', loss, step) for stage, (heatmaps_loss, pafs_loss) in enumerate( zip(heatmaps_losses, pafs_losses)): writer.add_scalar( 'train_heatmaps_loss/stage_{}'.format(str(stage)), heatmaps_loss, step) writer.add_scalar( 'train_pafs_loss/stage_{}'.format(str(stage)), pafs_loss, step) if step % 1000 == 0: pafs_loss, heatmaps_loss = _validate(model, val_dataloader) total_loss = pafs_loss + heatmaps_loss print('Validation Paf MSE: {} Heatmap MSE: {} Total MSE: {}'. format(pafs_loss, heatmaps_loss, total_loss)) writer.add_scalar('val/heatmaps_loss', heatmaps_loss, step) writer.add_scalar('val/pafs_loss', pafs_loss, step) writer.add_scalar('val/total_loss', total_loss, step) if total_loss < best_mse: print('Save checkpoint') torch.save( model.state_dict(), os.path.join( path_to_logs_dir, '{}-checkpoint-best.pth'.format(class_name))) best_mse = total_loss print('Best MSE: {}'.format(total_loss)) model.train() step += 1 epoch += 1 print('Save checkpoint') torch.save( model.state_dict(), os.path.join(path_to_logs_dir, '{}-checkpoint-last.pth'.format(class_name)))
def main(args): input_data = args.input weight_file = args.weight frame_rate_ratio = args.frame_ratio process_speed = args.process_speed resize_fac = args.resize_factor output_dir = args.output output_format = '.h5' save_demo = args.verb print('start processing...') # Video input & output io_paths = organize_1to1_io_paths(input_data, VIDEO_EXT, output_dir, output_format) # load model print('[*]Loading model...') model = get_model('vgg19') model.load_state_dict(torch.load(weight_file)) model = torch.nn.DataParallel(model).cuda() model.float() model.eval() # Video reader for input_path, output_path in zip(io_paths["input"], io_paths["output"]): print('[*]Process video {} into {}'.format(input_path, output_path)) os.makedirs(os.path.dirname(output_path), exist_ok=True) # input video info cap = cv2.VideoCapture(input_path) input_fps = cap.get(cv2.CAP_PROP_FPS) height = int(resize_fac * cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(resize_fac * cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) ending_frame = args.out_length if ending_frame is None: ending_frame = video_length out_h5 = h5py.File(output_path, mode="w") out_h5["height"] = height out_h5["width"] = width if save_demo: # Video writer demo_path = os.path.splitext(output_path)[0] + ".mp4" output_fps = input_fps / frame_rate_ratio fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_demo = cv2.VideoWriter(demo_path, fourcc, output_fps, (width, height)) i = 0 # default is 0 t0 = time.time() while (cap.isOpened()) and i < ending_frame: ret_val, input_image = cap.read() if not ret_val: break if i % frame_rate_ratio == 0: t1 = time.time() # generate image with body parts resized_image = cv2.resize(input_image, (0, 0), fx=1 * resize_fac, fy=1 * resize_fac, interpolation=cv2.INTER_CUBIC) to_plot, canvas, joint_list, person_to_joint_assoc = process( model, resized_image, process_speed) frame_h5 = out_h5.create_group("frame%d" % i) frame_h5.create_dataset("joint_list", data=joint_list) frame_h5.create_dataset("person_to_joint_assoc", data=person_to_joint_assoc) if save_demo: out_demo.write(canvas) t2 = time.time() processBar( i, ending_frame, '{}/{}, process time:{:.3f}, total time:{:.3f}'.format( i, ending_frame, (t2 - t1), (t2 - t0)), length=20) i += 1 if save_demo: out_demo.release() out_h5.close() processBar(ending_frame, ending_frame, '{}/{}, total time:{:.3f}'.format(i, ending_frame, (time.time() - t0)), length=45)
13-'left_ankle' 14-'right_eye' 15-'left_eye' 16-'right_ear' 17-'left_ear' ) ''' orderCOCO = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10] mid_1 = [1, 8, 9, 1, 11, 12, 1, 2, 3, 2, 1, 5, 6, 5, 1, 0, 0, 14, 15] mid_2 = [8, 9, 10, 11, 12, 13, 2, 3, 4, 16, 5, 6, 7, 17, 0, 14, 15, 16, 17] # This txt file is get at the caffe_rtpose repository: # https://github.com/CMU-Perceptual-Computing-Lab/caffe_rtpose/blob/master/image_info_val2014_1k.txt image_dir = '/data/coco/val2014/' save_dir = '/data/coco/val2014_features/' model = get_model('vgg19') model = torch.nn.DataParallel(model).cuda() # model = get_ying_model(stages=5, have_bn=True, have_bias=False) # Load our model weight_name = './network/weight/pose_model_scratch.pth' model.load_state_dict(torch.load(weight_name)) model = model.module # model.load_state_dict(torch.load('pose_model.pth')) # model.load_state_dict(torch.load('../caffe_model/dilated3_5stage_merged.pth')) # model.load_state_dict(torch.load('../caffe_model/dilated3_remove_stage1_test.pth')) model.eval() model.float() model.cuda() feature_extractor = FeatureExtractor(model.model0)
def main(args): output_dir = args.output_dir if output_dir: os.makedirs(output_dir, exist_ok=True) path_to_data_dir = args.path_to_data_dir if not os.path.exists(path_to_data_dir): raise FileNotFoundError(path_to_data_dir) path_to_checkpoint = args.checkpoint if not os.path.exists(path_to_checkpoint): raise FileNotFoundError(path_to_data_dir) class_name = args.class_name fps = args.fps img_prefix = args.img_prefix # load pre-trained model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') print("=> Load pre-trained model from {}".format(path_to_checkpoint)) model.load_state_dict(torch.load(path_to_checkpoint)) model.eval() # parameter of object size for pnp solver print("=> Load {} object size".format(class_name)) path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json') if not os.path.exists(path_to_object_seetings): raise FileNotFoundError(path_to_object_seetings) object_list = json.load(open(path_to_object_seetings))['exported_objects'] object_size = None for obj in object_list: if obj['class'].find(class_name) != -1: object_size = obj['cuboid_dimensions'] if not object_size: raise ValueError("Object size is none") _cuboid3d = Cuboid3d(object_size) cuboid3d_points = np.array(_cuboid3d.get_vertices()) # parameter of camera for pnp solver path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json') if not os.path.exists(path_to_camera_seetings): raise FileNotFoundError(path_to_camera_seetings) intrinsic_settings = json.load(open( path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings'] matrix_camera = np.zeros((3, 3)) matrix_camera[0, 0] = intrinsic_settings['fx'] matrix_camera[1, 1] = intrinsic_settings['fy'] matrix_camera[0, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[1, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[2, 2] = 1 try: dist_coeffs = np.array( json.load(open(path_to_camera_seetings))['camera_settings'][0] ["distortion_coefficients"]) except KeyError: dist_coeffs = np.zeros((4, 1)) path_to_sequences = sorted( glob.glob(os.path.join(path_to_data_dir, '*.{}'.format(img_prefix)))) for img_path in path_to_sequences: original_img = crop(cv2.imread(img_path)) ratio = max(original_img.shape[:2]) / Config.crop_size img = cv2.resize(original_img, (Config.crop_size, Config.crop_size)) img = preprocess(img).float() img = torch.unsqueeze(img, 0) out, _ = model(img.cuda()) line, vertex = out[0].squeeze(), out[1].squeeze() objects, peaks = find_objects(vertex, line) original_img = cv2.putText(original_img, "Class name: {}".format(class_name), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) if len(objects) > 0: for object in objects: cuboid2d_points = object[1] + [ (object[0][0] * 8, object[0][1] * 8) ] cuboid3d_points = np.array(cuboid3d_points) location = None quaternion = None obj_2d_points = [] obj_3d_points = [] for i in range(8): check_point_2d = cuboid2d_points[i] # Ignore invalid points if check_point_2d is None: continue elif check_point_2d[0] < 0 or check_point_2d[ 1] < 0 or check_point_2d[ 0] >= Config.crop_size / Config.stride or check_point_2d[ 1] >= Config.crop_size / Config.stride: continue else: check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio) obj_2d_points.append(check_point_2d) obj_3d_points.append(cuboid3d_points[i]) centroid = tuple([ int(point * Config.stride * ratio) for point in object[0] ]) original_img = cv2.circle(original_img, centroid, 5, -1) obj_2d_points = np.array(obj_2d_points, dtype=float) obj_3d_points = np.array(obj_3d_points, dtype=float) valid_point_count = len(obj_2d_points) if valid_point_count >= 5: ret, rvec, tvec = cv2.solvePnP( obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) if ret: location = list(x[0] for x in tvec) quaternion = convert_rvec_to_quaternion(rvec) projected_points, _ = cv2.projectPoints( cuboid3d_points, rvec, tvec, matrix_camera, dist_coeffs) projected_points = np.squeeze(projected_points) # If the location.Z is negative or object is behind the camera then flip both location and rotation x, y, z = location original_img = cv2.putText( original_img, "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}" .format(x / 10, y / 10, z / 10), (50, 150), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) print( "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}" .format(x / 10, y / 10, z / 10)) if z < 0: # Get the opposite location location = [-x, -y, -z] # Change the rotation by 180 degree rotate_angle = np.pi rotate_quaternion = Quaternion.from_axis_rotation( location, rotate_angle) quaternion = rotate_quaternion.cross(quaternion) vertexes = [tuple(p) for p in projected_points] plot(original_img, vertexes) if args.save: if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, img_path.split('/')[-1]) print('=> Save {}'.format(output_path)) cv2.imwrite(output_path, original_img) if args.plot: original_img = cv2.resize(original_img, (600, 600)) cv2.imshow('prediction', original_img) cv2.waitKey(int(1000 / fps))
def main(args): input_data = args.input weight_file = args.weight process_speed = args.process_speed resize_fac = args.resize_factor output_dir = args.output output_format = '.h5' save_demo = args.verb print('start processing...') # Video input & output if args.input_type == 'serial': io_paths = organize_Nto1_io_paths(input_data, IMAGE_EXT, output_dir, output_format) else: io_paths = organize_1to1_io_paths(input_data, IMAGE_EXT, output_dir, output_format) data_count = len(io_paths["input"]) # load model print('[*]Loading model...') model = get_model('vgg19') model.load_state_dict(torch.load(weight_file)) model = torch.nn.DataParallel(model).cuda() model.float() model.eval() print('Model Ready!') # Video reader t0 = time.time() for i, (input_path, output_path) in enumerate( zip(io_paths["input"], io_paths["output"])): if io_paths["type"] == "1to1": print('[*]Process {} into {}'.format(input_path, output_path)) os.makedirs(os.path.dirname(output_path), exist_ok=True) input_image = cv2.imread(input_path) out_h5 = h5py.File(output_path, mode="w") out_h5["height"] = input_image.shape[0] out_h5["width"] = input_image.shape[1] t1 = time.time() # generate image with body parts resized_image = cv2.resize(input_image, (0, 0), fx=1 * resize_fac, fy=1 * resize_fac, interpolation=cv2.INTER_CUBIC) to_plot, canvas, joint_list, person_to_joint_assoc = process( model, resized_image, process_speed) frame_h5 = out_h5.create_group("frame0") frame_h5.create_dataset("joint_list", data=joint_list) frame_h5.create_dataset("person_to_joint_assoc", data=person_to_joint_assoc) if save_demo: demo_path = os.path.splitext(output_path)[0] + ".jpg" cv2.imwrite(demo_path, canvas) t2 = time.time() processBar(i, data_count, '{}/{}, process time:{:.3f}, total time:{:.3f}'.format( i, data_count, (t2 - t1), (t2 - t0)), length=20) elif len(input_path[0]) > 0: print('[*]Process {} into {}'.format( os.path.dirname(input_path[0]), output_path)) os.makedirs(os.path.dirname(output_path), exist_ok=True) input_image = cv2.imread(input_path[0]) height = input_image.shape[0] width = input_image.shape[1] out_h5 = h5py.File(output_path, mode="w") out_h5["height"] = height out_h5["width"] = width if save_demo: # Video writer demo_path = os.path.splitext(output_path)[0] + ".mp4" output_fps = 15 fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_demo = cv2.VideoWriter(demo_path, fourcc, output_fps, (width, height)) i = 0 # default is 0 t0 = time.time() for j, path in enumerate(input_path): input_image = cv2.imread(path) t1 = time.time() # generate image with body parts resized_image = cv2.resize(input_image, (0, 0), fx=1 * resize_fac, fy=1 * resize_fac, interpolation=cv2.INTER_CUBIC) to_plot, canvas, joint_list, person_to_joint_assoc = process( model, resized_image, process_speed) frame_h5 = out_h5.create_group("frame%d" % j) frame_h5.create_dataset("joint_list", data=joint_list) frame_h5.create_dataset("person_to_joint_assoc", data=person_to_joint_assoc) if save_demo: out_demo.write(canvas) t2 = time.time() processBar( j, len(input_path), '{}/{}, process time:{:.3f}, total time:{:.3f}'.format( j, len(input_path), (t2 - t1), (t2 - t0)), length=20) if save_demo: out_demo.release() out_h5.close() processBar(len(input_path), len(input_path), '{}/{}, total time:{:.3f}'.format( j, len(input_path), (time.time() - t0)), length=45) cv2.destroyAllWindows() processBar(data_count, data_count, '{}/{}, total time:{:.3f}'.format(i, data_count, (time.time() - t0)), length=45)
vis_dir = './result_vis' # ------------------------------------------------------------------------------ tic = datetime.datetime.now() if vis_dir is not None: if not os.path.exists(vis_dir): os.mkdir(vis_dir) print('mkdir:', vis_dir) print('save vis images to:', vis_dir) with torch.autograd.no_grad(): if model_name == 'vgg19': # --- VGG19 model = get_model(trunk='vgg19', numkeypoints=CF.NUM_KEYPOINTS, numlims=CF.NUM_LIMBS) preprocess = 'vgg' elif model_name == 'shufflenet': # --- ShuffleNet model = rtpose_shufflenetV2.Network(width_multiplier=1.0, numkeypoints=CF.NUM_KEYPOINTS, numlims=CF.NUM_LIMBS, multistage=multistage) preprocess = 'rtpose' else: print('Please check the model name.') exit(0) print('Network backbone:{}'.format(model_name)) # this path is with respect to the root of the project state_dict = torch.load(weight_name)
# validation data valid_data = None if params.val_nbatch > 0: valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, preprocess='vgg', training=False, batch_size=params.batch_size, shuffle=True) print('val dataset len: {}'.format(len(valid_data.dataset))) # model model = get_model(trunk=trunk) # load pretrained if params.ckpt is None: use_vgg(model, model_path, trunk) # Fix the VGG weights first, and then the weights will be released for i in range(20): for param in model.model0[i].parameters(): param.requires_grad = False trainable_vars = [param for param in model.parameters() if param.requires_grad] params.optimizer = torch.optim.SGD(trainable_vars, lr=params.init_lr, momentum=momentum, weight_decay=weight_decay,
def extract_pose_main(video_path, source_path): #if __name__ == "__main__": ''' :param video_path: viedo's img size size is 256*256 :param source_path: Image size must be 256*176 and .jpg :return: ''' os.environ["CUDA_VISIBLE_DEVICES"] = '7' model = get_model('vgg19') model.load_state_dict(torch.load(weight_name)) model.cuda() model.float() model.eval() video = video_path print("video path is ", video_path) source_img = cv2.imread(source_path) video_capture = cv2.VideoCapture(video) pairLst = "./demo_data/demo-resize-pairs-test.csv" fps = video_capture.get(cv2.CAP_PROP_FPS) print("fps is ", fps) a = 0 while video_capture.isOpened(): if a == 0: result_file = open("./demo_data/demo-resize-annotation-test.csv", 'w') print("name:keypoints_y:keypoints_x", file=result_file) result_file1 = open("./demo_data/demo-resize-pairs-test.csv", 'w') writer = csv.writer(result_file1) writer.writerow(["from", "to"]) extract_pose( video_path.split("/")[-1][:-4], source_img, source_path.split("/")[-1][:-4], result_file, model) ret, oriImg = video_capture.read() print(oriImg.shape) #oriImg = oriImg[27:710,430:900,:] oriImg = oriImg[40:808, 8:536, :] oriImg = cv2.copyMakeBorder(oriImg, 0, 0, 120, 120, cv2.BORDER_CONSTANT, value=[255, 255, 255]) # cv2.imwrite('../demo_data/test/{}.jpg'.format(a), oriImg) # break oriImg = cv2.resize(oriImg, (256, 256), interpolation=cv2.INTER_LINEAR) shape_dst = np.min(oriImg.shape[0:2]) extract_pose( video_path.split("/")[-1][:-4], oriImg, a, result_file, model) # pairLst writer.writerow([source_path.split("/")[-1], str(a) + ".jpg"]) print("finished {} pics".format(a)) # cv2.imshow('Video', to_plot) a = a + 1 if a > 100: break if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows() torch.cuda.empty_cache()
def _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix): # load pre-trained model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') print("=> Load pre-trained model from {}".format(path_to_checkpoint)) model.load_state_dict(torch.load(path_to_checkpoint)) model.eval() # parameter of object size for pnp solver print("=> Load {} object size".format(class_name)) path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json') if not os.path.exists(path_to_object_seetings): raise FileNotFoundError(path_to_object_seetings) object_list = json.load(open(path_to_object_seetings))['exported_objects'] object_size = None for obj in object_list: if obj['class'].find(class_name) != -1: object_size = obj['cuboid_dimensions'] if not object_size: raise ValueError("Object size is none") _cuboid3d = Cuboid3d(object_size) cuboid3d_points = np.array(_cuboid3d.get_vertices()) # parameter of camera for pnp solver path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json') if not os.path.exists(path_to_camera_seetings): raise FileNotFoundError(path_to_camera_seetings) intrinsic_settings = json.load(open( path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings'] matrix_camera = np.zeros((3, 3)) matrix_camera[0, 0] = intrinsic_settings['fx'] matrix_camera[1, 1] = intrinsic_settings['fy'] matrix_camera[0, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[1, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[2, 2] = 1 try: dist_coeffs = np.array( json.load(open(path_to_camera_seetings))['camera_settings'][0] ["distortion_coefficients"]) except KeyError: dist_coeffs = np.zeros((4, 1)) # dataloader val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val', img_prefix=img_prefix) val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, drop_last=False) correct = 0 wrong = 0 # set threshold (cm) threshold = 3.0 for batch_index, (images, _, _, location_targets, ratio) in tqdm(enumerate(val_dataloader)): images = images.cuda() output, _ = model(images) line, vertex = output[0], output[1] line, vertex = line.squeeze(), vertex.squeeze() objects, peaks = find_objects(vertex, line) location_predictions = [] if len(objects) > 0: for object in objects: cuboid2d_points = object[1] + [ (object[0][0] * 8, object[0][1] * 8) ] cuboid3d_points = np.array(cuboid3d_points) location = None quaternion = None obj_2d_points = [] obj_3d_points = [] for i in range(8): check_point_2d = cuboid2d_points[i] # Ignore invalid points if (check_point_2d is None): continue elif check_point_2d[0] < 0 or check_point_2d[ 1] < 0 or check_point_2d[ 0] >= Config.crop_size / Config.stride or check_point_2d[ 1] >= Config.crop_size / Config.stride: continue else: check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio) obj_2d_points.append(check_point_2d) obj_3d_points.append(cuboid3d_points[i]) projected_points = object[1] vertexes = projected_points.copy() centroid = tuple([ int(point * Config.stride * ratio) for point in object[0] ]) obj_2d_points = np.array(obj_2d_points, dtype=np.float32) obj_3d_points = np.array(obj_3d_points, dtype=np.float32) valid_point_count = len(obj_2d_points) if valid_point_count >= 4: ret, rvec, tvec = cv2.solvePnP( obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) if ret: location = list(x[0] for x in tvec) quaternion = convert_rvec_to_quaternion(rvec) projected_points, _ = cv2.projectPoints( cuboid3d_points, rvec, tvec, matrix_camera, dist_coeffs) projected_points = np.squeeze(projected_points) # If the location.Z is negative or object is behind the camera then flip both location and rotation x, y, z = location if z < 0: # Get the opposite location location = [-x, -y, -z] # Change the rotation by 180 degree rotate_angle = np.pi rotate_quaternion = Quaternion.from_axis_rotation( location, rotate_angle) quaternion = rotate_quaternion.cross(quaternion) vertexes = [tuple(p) for p in projected_points] location_predictions.append(location) location_predictions = np.array(location_predictions) if len(location_targets) == 0: wrong += len(location_predictions) else: location_targets = location_targets.cpu().data.numpy()[0] for location_target in location_targets: distances = [ np.sqrt( np.sum( np.square(location_target - location_prediction / 10.0))) for location_prediction in location_predictions ] if len(distances) == 0: pass wrong += 1 elif min(distances) > threshold: wrong += 1 else: correct += 1 print('Object: {} Accuracy: {}%'.format( class_name, correct / (wrong + correct) * 100.0))
def main(args): input_data = args.input_dir input_type = args.input_type # choose from ["image", "video"] output_dir = args.output_dir weight_file = args.weight input_ext = args.input_ext output_ext = args.out_ext frame_rate_ratio = args.frame_ratio # analyze every [n] frames process_speed = args.process_speed # int, 1 (fastest, lowest quality) to 4 (slowest, highest quality) resize_fac = args.resize_factor # minification factor output_length = args.out_length # int, frame count for output, None for input length show_visualize_process = args.verb # show canvas through matplotlib rebuild_exist_file = args.rebuild ## Load Model model = get_model('vgg19') model.load_state_dict(t.load(weight_file)) model = t.nn.DataParallel(model) model.cuda() model.float() model.eval() print("Model Ready!") ## Init I/O Paths _input_ext_ = IMAGE_EXT if input_ext == "image" \ else VIDEO_EXT if input_ext == "video" \ else input_ext if isinstance(input_ext, list) \ else [input_ext] if input_type == "1to1": io_paths = organize_1to1_io_paths(input_data, _input_ext_, output_dir, output_ext) else: io_paths = organize_Nto1_io_paths(input_data, _input_ext_, output_dir, output_ext) total_item = len(io_paths["input"]) print("Items count: ", total_item) ignore_item = 0 for i, (input_dir, output_path) in enumerate( zip(io_paths["input"], io_paths["output"])): if os.path.isfile(output_path): if rebuild_exist_file: title = '[{}/{}]Rebuild {} from {}' else: print('[{}/{}]{} already exist, pass'.format( i, total_item, output_path)) ignore_item += 1 continue else: title = '[{}/{}]Build {} from {}' if isinstance(input_dir, str): # process video source_position = input_dir loader = load_video_frames(input_dir, output_length, frame_rate_ratio) length, h, w = get_video_size(input_dir, output_length) elif isinstance(input_dir, list): # process images source_position = os.path.dirname(input_dir[0]) loader = load_images_list(input_dir, output_length, frame_rate_ratio) length, h, w = get_images_size(input_dir, output_length) else: raise TypeError("Expected string or list(string), but got %s" % type(input_dir)) print(title.format(i, total_item, output_path, source_position)) # Video writer try: os.makedirs(os.path.dirname(output_path), exist_ok=True) output_fps = 15 fourcc = cv2.VideoWriter_fourcc(*'mp4v') height = int(resize_fac * h) width = int(resize_fac * w) print("source:{}x{} target:{}x{}".format(h, w, height, width)) out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height)) out_h5 = h5py.File(output_path + ".h5", mode="w") out_h5["height"] = height out_h5["width"] = width t0 = time.time() for i, frame in enumerate(loader): t1 = time.time() # generate image with body parts resized_image = cv2.resize(frame, (0, 0), fx=1 * resize_fac, fy=1 * resize_fac, interpolation=cv2.INTER_CUBIC) to_plot, canvas, joint_list, person_to_joint_assoc = process( model, resized_image, process_speed) # save outputs out.write(canvas) frame_h5 = out_h5.create_group("frame%d" % i) frame_h5.create_dataset("joint_list", data=joint_list) frame_h5.create_dataset("person_to_joint_assoc", data=person_to_joint_assoc) t2 = time.time() # print messages print( '{}[{}/{}] process time:{:.3f}s total time:{:.3f}s'.format( time.strftime('%H:%M:%S'), i, length, (t2 - t1), (t2 - t0))) if show_visualize_process: cv2.imshow(os.path.basename(output_path), to_plot) cv2.waitKey(1) finally: out.release() out_h5.close() cv2.destroyAllWindows() print("Prosessed {} items, ignore {} existing items. Saved into {}".format( total_item - ignore_item, ignore_item, output_dir)) print("All work are Finished!")