def create_pose_estimation_model(pretrained, dataset, arch, load_vgg19=None, parallel=True, device_ids=None): # noinspection PyGlobalUndefined global msglogger model = None dataset = dataset.lower() if dataset == 'coco': if arch == 'shufflenetv2': model = rtpose_shufflenetV2.Network(width_multiplier=1.0) if pretrained: msglogger.info( 'No pretrained ShuffleNetV2 model available. Init randomly.' ) elif arch == 'vgg19': model = rtpose_vgg.get_model(trunk='vgg19') if pretrained: model_dir = Path('./pretrained') model_dir.mkdir(exist_ok=True) rtpose_vgg.use_vgg(model, model_path, 'vgg19') if load_vgg19: model.load_state_dict(torch.load(load_vgg19)) elif arch == 'hourglass': model = rtpose_hourglass.hg(num_stacks=8, num_blocks=1, paf_classes=38, ht_classes=19) if pretrained: msglogger.info( 'No pretrained Hourglass model available. Init randomly.') else: raise ValueError('Could not recognize dataset {}'.format(dataset)) msglogger.info("=> creating a %s%s model with the %s dataset" % ('pretrained ' if pretrained else '', arch, dataset)) if torch.cuda.is_available() and device_ids != -1: device = 'cuda' if parallel: print('Data parallel: device_ids =', device_ids) net = torch.nn.DataParallel(model, device_ids=device_ids) else: device = 'cpu' return model.to(device)
368, 8, preprocess='vgg', training=False, batch_size=args.batch_size, params_transform=params_transform, shuffle=False, num_workers=4) print('val dataset len: {}'.format(len(valid_data.dataset))) # model model = get_model(trunk='vgg19') #model = encoding.nn.DataParallelModel(model, device_ids=args.gpu_ids) model = torch.nn.DataParallel(model).cuda() # load pretrained use_vgg(model, args.model_path, 'vgg19') # Fix the VGG weights first, and then the weights will be released for i in range(20): for param in model.module.model0[i].parameters(): param.requires_grad = False trainable_vars = [param for param in model.parameters() if param.requires_grad] optimizer = torch.optim.SGD(trainable_vars, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) writer = SummaryWriter(log_dir=args.logdir)
def _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs, restore): # create tensorboard writer = SummaryWriter(path_to_logs_dir) # dataloader train_dataset = Dataset(class_name=class_name, path_to_data=path_to_data_dir) train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=0, drop_last=True) val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val') val_dataloader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=0, drop_last=True) # load model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') # restore model if restore: model.load_state_dict(torch.load(restore)) model.train() # freeze low-level layer for i in range(20): for param in model.model0[i].parameters(): param.requires_grad = False trainable_vars = [ param for param in model.parameters() if param.requires_grad ] optimizer = torch.optim.Adam(trainable_vars, lr=0.0001) epoch = 0 step = 1 best_mse = 1.0 while epoch != epochs: for batch_index, (images, heatmaps_target, pafs_target, _, _) in enumerate(train_dataloader): images = images.cuda() _, saved_for_loss = model(images) loss, heatmaps_losses, pafs_losses = _loss(saved_for_loss, heatmaps_target.cuda(), pafs_target.cuda()) optimizer.zero_grad() loss.backward() optimizer.step() if step % 10 == 0: print('Epoch: {}, Step: {}, Loss: {}'.format( epoch, step, loss.data.item())) writer.add_scalar('train_total_loss/loss', loss, step) for stage, (heatmaps_loss, pafs_loss) in enumerate( zip(heatmaps_losses, pafs_losses)): writer.add_scalar( 'train_heatmaps_loss/stage_{}'.format(str(stage)), heatmaps_loss, step) writer.add_scalar( 'train_pafs_loss/stage_{}'.format(str(stage)), pafs_loss, step) if step % 1000 == 0: pafs_loss, heatmaps_loss = _validate(model, val_dataloader) total_loss = pafs_loss + heatmaps_loss print('Validation Paf MSE: {} Heatmap MSE: {} Total MSE: {}'. format(pafs_loss, heatmaps_loss, total_loss)) writer.add_scalar('val/heatmaps_loss', heatmaps_loss, step) writer.add_scalar('val/pafs_loss', pafs_loss, step) writer.add_scalar('val/total_loss', total_loss, step) if total_loss < best_mse: print('Save checkpoint') torch.save( model.state_dict(), os.path.join( path_to_logs_dir, '{}-checkpoint-best.pth'.format(class_name))) best_mse = total_loss print('Best MSE: {}'.format(total_loss)) model.train() step += 1 epoch += 1 print('Save checkpoint') torch.save( model.state_dict(), os.path.join(path_to_logs_dir, '{}-checkpoint-last.pth'.format(class_name)))
def main(args): output_dir = args.output_dir if output_dir: os.makedirs(output_dir, exist_ok=True) path_to_data_dir = args.path_to_data_dir if not os.path.exists(path_to_data_dir): raise FileNotFoundError(path_to_data_dir) path_to_checkpoint = args.checkpoint if not os.path.exists(path_to_checkpoint): raise FileNotFoundError(path_to_data_dir) class_name = args.class_name fps = args.fps img_prefix = args.img_prefix # load pre-trained model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') print("=> Load pre-trained model from {}".format(path_to_checkpoint)) model.load_state_dict(torch.load(path_to_checkpoint)) model.eval() # parameter of object size for pnp solver print("=> Load {} object size".format(class_name)) path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json') if not os.path.exists(path_to_object_seetings): raise FileNotFoundError(path_to_object_seetings) object_list = json.load(open(path_to_object_seetings))['exported_objects'] object_size = None for obj in object_list: if obj['class'].find(class_name) != -1: object_size = obj['cuboid_dimensions'] if not object_size: raise ValueError("Object size is none") _cuboid3d = Cuboid3d(object_size) cuboid3d_points = np.array(_cuboid3d.get_vertices()) # parameter of camera for pnp solver path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json') if not os.path.exists(path_to_camera_seetings): raise FileNotFoundError(path_to_camera_seetings) intrinsic_settings = json.load(open( path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings'] matrix_camera = np.zeros((3, 3)) matrix_camera[0, 0] = intrinsic_settings['fx'] matrix_camera[1, 1] = intrinsic_settings['fy'] matrix_camera[0, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[1, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[2, 2] = 1 try: dist_coeffs = np.array( json.load(open(path_to_camera_seetings))['camera_settings'][0] ["distortion_coefficients"]) except KeyError: dist_coeffs = np.zeros((4, 1)) path_to_sequences = sorted( glob.glob(os.path.join(path_to_data_dir, '*.{}'.format(img_prefix)))) for img_path in path_to_sequences: original_img = crop(cv2.imread(img_path)) ratio = max(original_img.shape[:2]) / Config.crop_size img = cv2.resize(original_img, (Config.crop_size, Config.crop_size)) img = preprocess(img).float() img = torch.unsqueeze(img, 0) out, _ = model(img.cuda()) line, vertex = out[0].squeeze(), out[1].squeeze() objects, peaks = find_objects(vertex, line) original_img = cv2.putText(original_img, "Class name: {}".format(class_name), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) if len(objects) > 0: for object in objects: cuboid2d_points = object[1] + [ (object[0][0] * 8, object[0][1] * 8) ] cuboid3d_points = np.array(cuboid3d_points) location = None quaternion = None obj_2d_points = [] obj_3d_points = [] for i in range(8): check_point_2d = cuboid2d_points[i] # Ignore invalid points if check_point_2d is None: continue elif check_point_2d[0] < 0 or check_point_2d[ 1] < 0 or check_point_2d[ 0] >= Config.crop_size / Config.stride or check_point_2d[ 1] >= Config.crop_size / Config.stride: continue else: check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio) obj_2d_points.append(check_point_2d) obj_3d_points.append(cuboid3d_points[i]) centroid = tuple([ int(point * Config.stride * ratio) for point in object[0] ]) original_img = cv2.circle(original_img, centroid, 5, -1) obj_2d_points = np.array(obj_2d_points, dtype=float) obj_3d_points = np.array(obj_3d_points, dtype=float) valid_point_count = len(obj_2d_points) if valid_point_count >= 5: ret, rvec, tvec = cv2.solvePnP( obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) if ret: location = list(x[0] for x in tvec) quaternion = convert_rvec_to_quaternion(rvec) projected_points, _ = cv2.projectPoints( cuboid3d_points, rvec, tvec, matrix_camera, dist_coeffs) projected_points = np.squeeze(projected_points) # If the location.Z is negative or object is behind the camera then flip both location and rotation x, y, z = location original_img = cv2.putText( original_img, "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}" .format(x / 10, y / 10, z / 10), (50, 150), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) print( "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}" .format(x / 10, y / 10, z / 10)) if z < 0: # Get the opposite location location = [-x, -y, -z] # Change the rotation by 180 degree rotate_angle = np.pi rotate_quaternion = Quaternion.from_axis_rotation( location, rotate_angle) quaternion = rotate_quaternion.cross(quaternion) vertexes = [tuple(p) for p in projected_points] plot(original_img, vertexes) if args.save: if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, img_path.split('/')[-1]) print('=> Save {}'.format(output_path)) cv2.imwrite(output_path, original_img) if args.plot: original_img = cv2.resize(original_img, (600, 600)) cv2.imshow('prediction', original_img) cv2.waitKey(int(1000 / fps))
valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, preprocess='vgg', training=False, batch_size=batch_size, shuffle=True) print('val dataset len: {}'.format(len(valid_data.dataset))) # model model = get_model(trunk='vgg19') model = torch.nn.DataParallel(model).cuda() # load pretrained use_vgg(model, model_path, 'vgg19') # Fix the VGG weights first, and then the weights will be released for i in range(20): for param in model.module.model0[i].parameters(): param.requires_grad = False trainable_vars = [param for param in model.parameters() if param.requires_grad] optimizer = torch.optim.SGD(trainable_vars, lr=init_lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov) for epoch in range(5): #adjust_learning_rate(optimizer, epoch)
if params.val_nbatch > 0: valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, preprocess='vgg', training=False, batch_size=params.batch_size, shuffle=True) print('val dataset len: {}'.format(len(valid_data.dataset))) # model model = get_model(trunk=trunk) # load pretrained if params.ckpt is None: use_vgg(model, model_path, trunk) # Fix the VGG weights first, and then the weights will be released for i in range(20): for param in model.model0[i].parameters(): param.requires_grad = False trainable_vars = [param for param in model.parameters() if param.requires_grad] params.optimizer = torch.optim.SGD(trainable_vars, lr=params.init_lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov)
def _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix): # load pre-trained model model = get_model(trunk='vgg19') model = model.cuda() use_vgg(model, './model', 'vgg19') print("=> Load pre-trained model from {}".format(path_to_checkpoint)) model.load_state_dict(torch.load(path_to_checkpoint)) model.eval() # parameter of object size for pnp solver print("=> Load {} object size".format(class_name)) path_to_object_seetings = os.path.join(path_to_data_dir, '_object_settings.json') if not os.path.exists(path_to_object_seetings): raise FileNotFoundError(path_to_object_seetings) object_list = json.load(open(path_to_object_seetings))['exported_objects'] object_size = None for obj in object_list: if obj['class'].find(class_name) != -1: object_size = obj['cuboid_dimensions'] if not object_size: raise ValueError("Object size is none") _cuboid3d = Cuboid3d(object_size) cuboid3d_points = np.array(_cuboid3d.get_vertices()) # parameter of camera for pnp solver path_to_camera_seetings = os.path.join(path_to_data_dir, '_camera_settings.json') if not os.path.exists(path_to_camera_seetings): raise FileNotFoundError(path_to_camera_seetings) intrinsic_settings = json.load(open( path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings'] matrix_camera = np.zeros((3, 3)) matrix_camera[0, 0] = intrinsic_settings['fx'] matrix_camera[1, 1] = intrinsic_settings['fy'] matrix_camera[0, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[1, 2] = max(intrinsic_settings['cx'], intrinsic_settings['cy']) matrix_camera[2, 2] = 1 try: dist_coeffs = np.array( json.load(open(path_to_camera_seetings))['camera_settings'][0] ["distortion_coefficients"]) except KeyError: dist_coeffs = np.zeros((4, 1)) # dataloader val_dataset = Dataset(path_to_data=path_to_data_dir, class_name=class_name, split='val', img_prefix=img_prefix) val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, drop_last=False) correct = 0 wrong = 0 # set threshold (cm) threshold = 3.0 for batch_index, (images, _, _, location_targets, ratio) in tqdm(enumerate(val_dataloader)): images = images.cuda() output, _ = model(images) line, vertex = output[0], output[1] line, vertex = line.squeeze(), vertex.squeeze() objects, peaks = find_objects(vertex, line) location_predictions = [] if len(objects) > 0: for object in objects: cuboid2d_points = object[1] + [ (object[0][0] * 8, object[0][1] * 8) ] cuboid3d_points = np.array(cuboid3d_points) location = None quaternion = None obj_2d_points = [] obj_3d_points = [] for i in range(8): check_point_2d = cuboid2d_points[i] # Ignore invalid points if (check_point_2d is None): continue elif check_point_2d[0] < 0 or check_point_2d[ 1] < 0 or check_point_2d[ 0] >= Config.crop_size / Config.stride or check_point_2d[ 1] >= Config.crop_size / Config.stride: continue else: check_point_2d = (check_point_2d[0] * Config.stride * ratio, check_point_2d[1] * Config.stride * ratio) obj_2d_points.append(check_point_2d) obj_3d_points.append(cuboid3d_points[i]) projected_points = object[1] vertexes = projected_points.copy() centroid = tuple([ int(point * Config.stride * ratio) for point in object[0] ]) obj_2d_points = np.array(obj_2d_points, dtype=np.float32) obj_3d_points = np.array(obj_3d_points, dtype=np.float32) valid_point_count = len(obj_2d_points) if valid_point_count >= 4: ret, rvec, tvec = cv2.solvePnP( obj_3d_points, obj_2d_points, matrix_camera, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) if ret: location = list(x[0] for x in tvec) quaternion = convert_rvec_to_quaternion(rvec) projected_points, _ = cv2.projectPoints( cuboid3d_points, rvec, tvec, matrix_camera, dist_coeffs) projected_points = np.squeeze(projected_points) # If the location.Z is negative or object is behind the camera then flip both location and rotation x, y, z = location if z < 0: # Get the opposite location location = [-x, -y, -z] # Change the rotation by 180 degree rotate_angle = np.pi rotate_quaternion = Quaternion.from_axis_rotation( location, rotate_angle) quaternion = rotate_quaternion.cross(quaternion) vertexes = [tuple(p) for p in projected_points] location_predictions.append(location) location_predictions = np.array(location_predictions) if len(location_targets) == 0: wrong += len(location_predictions) else: location_targets = location_targets.cpu().data.numpy()[0] for location_target in location_targets: distances = [ np.sqrt( np.sum( np.square(location_target - location_prediction / 10.0))) for location_prediction in location_predictions ] if len(distances) == 0: pass wrong += 1 elif min(distances) > threshold: wrong += 1 else: correct += 1 print('Object: {} Accuracy: {}%'.format( class_name, correct / (wrong + correct) * 100.0))