def __init__(self): super(UrbanEnv, self).__init__() # Initialize environment parameters self.town = carla_config.town self.state_y = carla_config.grid_height self.state_x = carla_config.grid_width self.channel = carla_config.features # Sensors self.rgb_sensor = None self.semantic_sensor = None # Planners self.planner = None # States and actions self.observation_space = spaces.Box(low=0, high=255, shape=(carla_config.grid_height, carla_config.grid_width, carla_config.features), dtype=np.uint8) self.action_space = spaces.Discrete(carla_config.N_DISCRETE_ACTIONS) self.ego_vehicle = None self.current_speed = 0.0 # Rendering related self.renderer = None self.is_render_enabled = carla_config.render if self.is_render_enabled: self.renderer = Renderer() self.init_renderer()
def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) # feature extraction model self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr = self.options.lr, weight_decay=0) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size = 16, create_transl=False).to(self.device) # per vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # keypoints loss including 2D and 3D self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # SMPL parameters loss if we have self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model':self.model} self.optimizers_dict = {'optimizer':self.optimizer} self.focal_length = constants.FOCAL_LENGTH # initialize MVSMPLify self.mvsmplify = MVSMPLify(step_size=1e-2, batch_size=16, num_iters=100,focal_length=self.focal_length) print(self.options.pretrained_checkpoint) if self.options.pretrained_checkpoint is not None: self.load_pretrained(checkpoint_file = self.options.pretrained_checkpoint) #load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res = 224, faces=self.smpl.faces)
def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr, weight_decay=0) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=self.options.batch_size, create_transl=False).to(self.device) # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH self.conf_thresh = self.options.conf_thresh # Initialize SMPLify fitting module self.smplify = SMPLify(step_size=1e-2, batch_size=self.options.batch_size, num_iters=self.options.num_smplify_iters, focal_length=self.focal_length, prior_mul=0.1, conf_thresh=self.conf_thresh) if self.options.pretrained_checkpoint is not None: self.load_pretrained(checkpoint_file=self.options.pretrained_checkpoint) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # Create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces)
def __init__(self, light_variability=(20,8), gridlines_on=None, gridlines_width=None, gridlines_spacing=None): if gridlines_on or gridlines_width or gridlines_spacing: assert not (gridlines_on is None\ or gridlines_width is None\ or gridlines_spacing is None),\ "All gridlines variables must be set if any are" self.rend = Renderer() self.shapes = [] self.grid_shapes = [] self.center = np.array((0, 140, 300)) self.light_variability = light_variability self.background_prims = [] background_lower_bound = -1e3 background_upper_bound = 1e3 wall_bound = 1e3 self.background_prims.append( Tri([(-wall_bound, 0, wall_bound), (wall_bound, 0, wall_bound), (-wall_bound, 0, -wall_bound)])) self.background_prims.append( Tri([(-wall_bound, 0, -wall_bound), (wall_bound, 0, wall_bound), (wall_bound, 0, -wall_bound)])) self.background_prims.append( Tri([(-wall_bound, -50, wall_bound), (0, wall_bound, wall_bound), (wall_bound, -50, wall_bound)])) if gridlines_on: for i in range(int((background_upper_bound - background_lower_bound) / (gridlines_width + gridlines_spacing))): offset = i * (gridlines_width + gridlines_spacing) self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_lower_bound), (background_lower_bound + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)])) self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)])) self.grid_shapes.append(Tri([(background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset), (background_upper_bound, 0.01, background_lower_bound + offset), (background_lower_bound, 0.01, background_lower_bound + offset)])) self.grid_shapes.append(Tri([(background_upper_bound, 0.01, background_lower_bound + offset), (background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset), (background_upper_bound, 0.01, background_lower_bound + gridlines_width + offset)])) self.default_light = np.array((400, 300, -800)) self.default_intensity = 1000000 self.camera = Cam((0, 140, 300), (128, 128))
def render_plot(img, poses, bboxes): renderer = Renderer(vertices_path="./pose_references/vertices_trans.npy", triangles_path="./pose_references/triangles.npy") (w, h) = img.size image_intrinsics = np.array([[w + h, 0, w // 2], [0, w + h, h // 2], [0, 0, 1]]) trans_vertices = renderer.transform_vertices(img, poses) img = renderer.render(img, trans_vertices, alpha=1) # for bbox in bboxes: # bbox = bbox.astype(np.uint8) # print(bbox) # img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255,0,0), 2) return img
def __init__(self, gc, gender): data_dir = osp.join(global_var.ROOT, 'pca') style_model = np.load( osp.join(data_dir, 'style_model_{}.npz'.format(gc))) self.renderer = Renderer(512) self.img = None self.body = trimesh.load(osp.join(global_var.ROOT, 'smpl', 'hres_{}.obj'.format(gender)), process=False) with open(osp.join(global_var.ROOT, 'garment_class_info.pkl'), 'rb') as f: garment_meta = pickle.load(f) # self.vert_indcies = garment_meta[gc]['vert_indices'] self.f = garment_meta[gc]['f'] self.gamma = np.zeros([1, 4], dtype=np.float32) self.pca = PCA(n_components=4) self.pca.components_ = style_model['pca_w'] self.pca.mean_ = style_model['mean'] win_name = '{}_{}'.format(gc, gender) cv2.namedWindow(win_name) cv2.createTrackbar('0', win_name, 100, 200, self.value_change) cv2.createTrackbar('1', win_name, 100, 200, self.value_change) cv2.createTrackbar('2', win_name, 100, 200, self.value_change) cv2.createTrackbar('3', win_name, 100, 200, self.value_change) cv2.createTrackbar('trans', win_name, 100, 200, self.value_change) self.trans = 0 self.win_name = win_name self.render()
def main(): model = utils.rnn_model.Model(32, 40, 40, 0.005, 0.0001) utils.tensorflow.model_persistor( model, checkpoint_dir='./notebooks/.checkpoints/') model = model.copy_in_stateful_model() Renderer.init_window(1000, 500) while True: angle = np.random.uniform(0, 2 * np.pi) direction = [np.sin(angle), np.cos(angle)] simulation = utils.pong.PONGSimulation(W=40, H=40, direction=angle) model.init(direction) while True: key = get_pressed_key() movement_left = 1 if key == 'w' else 0 movement_left = -1 if key == 's' else movement_left movement_right = 1 if key == 'up' else 0 movement_right = -1 if key == 'down' else movement_right controls = [movement_left, movement_right] frame, _ = simulation.tick(controls) pred_frame = model.tick(controls) rgb_frame = cmap(frame) rgb_pred_frame = cmap(pred_frame) print('LEFT: [%s] -- RIGHT: [%s]' % tuple(' UP ' if i > 0 else 'DOWN' if i < 0 else ' ** ' for i in controls)) split_screen = np.concatenate((rgb_frame, rgb_pred_frame), axis=1) Renderer.show_frame(split_screen) if key == 'q': return if key == 'r': break
def init_fn(self): # create training dataset self.train_ds = create_dataset(self.options.dataset, self.options) # create Mesh object self.mesh = Mesh() self.faces = self.mesh.faces.to(self.device) # create GraphCNN self.graph_cnn = GraphCNN(self.mesh.adjmat, self.mesh.ref_vertices.t(), num_channels=self.options.num_channels, num_layers=self.options.num_layers).to( self.device) # SMPL Parameter regressor self.smpl_param_regressor = SMPLParamRegressor().to(self.device) # Setup a joint optimizer for the 2 models self.optimizer = torch.optim.Adam( params=list(self.graph_cnn.parameters()) + list(self.smpl_param_regressor.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) # SMPL model self.smpl = SMPL().to(self.device) # Create loss functions self.criterion_shape = nn.L1Loss().to(self.device) self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) self.criterion_regr = nn.MSELoss().to(self.device) # Pack models and optimizers in a dict - necessary for checkpointing self.models_dict = { 'graph_cnn': self.graph_cnn, 'smpl_param_regressor': self.smpl_param_regressor } self.optimizers_dict = {'optimizer': self.optimizer} # Renderer for visualization self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy()) # LSP indices from full list of keypoints self.to_lsp = list(range(14)) # Optionally start training from a pretrained checkpoint # Note that this is different from resuming training # For the latter use --resume if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint)
def bbox_from_json(bbox_file): """Get center and scale of bounding box from bounding box annotations. The expected format is [top_left(x), top_left(y), width, height]. """ with open(bbox_file, 'r') as f: bbox = np.array(json.load(f)['bbox']).astype(np.float32) ul_corner = bbox[:2] center = ul_corner + 0.5 * bbox[2:] # Load pretrained model model = hmr(config.SMPL_MEAN_PARAMS).to(device) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) # Load SMPL model smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1, create_transl=False).to(device) model.eval() # Setup renderer for visualization renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces) # Preprocess input image and generate predictions img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=constants.IMG_RES) with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device)) pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices # Calculate camera parameters for rendering camera_translation = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH / (constants.IMG_RES * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_translation = camera_translation[0].cpu().numpy() pred_vertices = pred_vertices[0].cpu().numpy() img = img.permute(1, 2, 0).cpu().numpy() width = max(bbox[2], bbox[3]) scale = width / 200.0 # make sure the bounding box is rectangular return center, scale
def init_fn(self): self.options.img_res = cfg.DANET.INIMG_SIZE self.options.heatmap_size = cfg.DANET.HEATMAP_SIZE self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = DaNet(options=self.options, smpl_mean_params=path_config.SMPL_MEAN_PARAMS).to( self.device) self.smpl = self.model.iuv2smpl.smpl self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=0) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) # Load dictionary of fits of SPIN self.fits_dict = FitsDict(self.options, self.train_ds) # Create renderer try: self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces) except: Warning('No renderer for visualization.') self.renderer = None self.decay_steps_ind = 1
def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr) self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=0.95) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=self.options.batch_size, create_transl=False).to(self.device) # consistency loss self.criterion_consistency_contrastive = NTXent( tau=self.options.tau, kernel=self.options.kernel).to(self.device) self.criterion_consistency_mse = nn.MSELoss().to(self.device) # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) # Create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces) # Create input image flag self.input_img = self.options.input_img # initialize queue self.feat_queue = FeatQueue(max_queue_size=self.options.max_queue_size)
class render_utils(object): def __init__(self, config, device='cuda'): self.batch_size = config.batch_size self.image_size = config.image_size self.config = config self.device = device # self.flame = FLAME(self.config).to(self.device) self.flametex = FLAMETex(self.config).to(self.device) self._setup_renderer() self.flame_faces = self.render.faces def _setup_renderer(self): mesh_file = './data/head_template_mesh.obj' self.render = Renderer(self.image_size, obj_filename=mesh_file).to(self.device) def render_tex_and_normal(self, shapecode, expcode, posecode, texcode, lightcode, cam): verts, _, _ = self.flame(shape_params=shapecode, expression_params=expcode, pose_params=posecode) trans_verts = util.batch_orth_proj(verts, cam) trans_verts[:, :, 1:] = -trans_verts[:, :, 1:] albedos = self.flametex(texcode) rendering_results = self.render(verts, trans_verts, albedos, lights=lightcode) textured_images, normals = rendering_results[ 'images'], rendering_results['normals'] normal_images = self.render.render_normal(trans_verts, normals) return textured_images, normal_images def get_flame_faces(self): return self.flame_faces
# Load model mesh = Mesh(device=device) # Our pretrained networks have 5 residual blocks with 256 channels. # You might want to change this if you use a different architecture. model = CMR(mesh, 5, 256, pretrained_checkpoint=args.checkpoint, device=device) model.to(device) model.eval() # Setup renderer for visualization renderer = Renderer() # Preprocess input image and generate predictions img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=cfg.INPUT_RES) norm_img = norm_img.to(device) with torch.no_grad(): pred_vertices, pred_vertices_smpl, pred_camera, _, _ = model(norm_img) # Calculate camera parameters for rendering camera_translation = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * cfg.FOCAL_LENGTH / (cfg.INPUT_RES * pred_camera[:, 0] + 1e-9) ],
def _setup_renderer(self): mesh_file = './data/head_template_mesh.obj' self.render = Renderer(self.image_size, obj_filename=mesh_file).to(self.device)
# import matplotlib # matplotlib.use('MACOSX') import matplotlib.pyplot as plt from smplx import SMPL from utils.renderer import Renderer from utils.cam_utils import perspective_project_torch from data.ssp3d_dataset import SSP3DDataset import config # SMPL models in torch smpl_male = SMPL(config.SMPL_MODEL_DIR, batch_size=1, gender='male') smpl_female = SMPL(config.SMPL_MODEL_DIR, batch_size=1, gender='female') # Pyrender renderer renderer = Renderer(faces=smpl_male.faces, img_res=512) # SSP-3D datset class ssp3d_dataset = SSP3DDataset(config.SSP_3D_PATH) indices_to_plot = [11, 60, 199] # Visualising 3 examples from SSP-3D for i in indices_to_plot: data = ssp3d_dataset.__getitem__(i) fname = data['fname'] image = data['image'] cropped_image = data['cropped_image'] silhouette = data['silhouette'] joints2D = data['joints2D']
def _setup_renderer(self): self.render = Renderer(cfg.image_size, obj_filename=cfg.mesh_file).to(self.device)
class PhotometricFitting(object): def __init__(self, device='cuda'): # self.batch_size = cfg.batch_size # self.image_size = cfg.image_size # self.cropped_size = cfg.cropped_size self.config = cfg self.device = device self.flame = FLAME(self.config).to(self.device) self.flametex = FLAMETex(self.config).to(self.device) self._setup_renderer() def _setup_renderer(self): self.render = Renderer(cfg.image_size, obj_filename=cfg.mesh_file).to(self.device) def optimize(self, images, landmarks, image_masks, video_writer): bz = images.shape[0] shape = nn.Parameter(torch.zeros(bz, cfg.shape_params).float().to(self.device)) tex = nn.Parameter(torch.zeros(bz, cfg.tex_params).float().to(self.device)) exp = nn.Parameter(torch.zeros(bz, cfg.expression_params).float().to(self.device)) pose = nn.Parameter(torch.zeros(bz, cfg.pose_params).float().to(self.device)) cam = torch.zeros(bz, cfg.camera_params) cam[:, 0] = 5. cam = nn.Parameter(cam.float().to(self.device)) lights = nn.Parameter(torch.zeros(bz, 9, 3).float().to(self.device)) e_opt = torch.optim.Adam( [shape, exp, pose, cam, tex, lights], lr=cfg.e_lr, weight_decay=cfg.e_wd ) gt_landmark = landmarks # non-rigid fitting of all the parameters with 68 face landmarks, photometric loss and regularization terms. all_train_iter = 0 all_train_iters = [] photometric_loss = [] for k in range(cfg.max_iter): losses = {} vertices, landmarks2d, landmarks3d = self.flame(shape_params=shape, expression_params=exp, pose_params=pose) trans_vertices = util.batch_orth_proj(vertices, cam) trans_vertices[..., 1:] = - trans_vertices[..., 1:] landmarks2d = util.batch_orth_proj(landmarks2d, cam) landmarks2d[..., 1:] = - landmarks2d[..., 1:] landmarks3d = util.batch_orth_proj(landmarks3d, cam) landmarks3d[..., 1:] = - landmarks3d[..., 1:] losses['landmark'] = util.l2_distance(landmarks2d[:, :, :2], gt_landmark[:, :, :2]) # render albedos = self.flametex(tex) / 255. ops = self.render(vertices, trans_vertices, albedos, lights) predicted_images = ops['images'] # losses['photometric_texture'] = (image_masks * (ops['images'] - images).abs()).mean() * config.w_pho losses['photometric_texture'] = F.smooth_l1_loss(image_masks * ops['images'], image_masks * images) * cfg.w_pho all_loss = 0. for key in losses.keys(): all_loss = all_loss + losses[key] losses['all_loss'] = all_loss e_opt.zero_grad() all_loss.backward() e_opt.step() loss_info = '----iter: {}, time: {}\n'.format(k, datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) for key in losses.keys(): loss_info = loss_info + '{}: {}, '.format(key, float(losses[key])) if k % 10 == 0: all_train_iter += 10 all_train_iters.append(all_train_iter) photometric_loss.append(losses['photometric_texture']) print(loss_info) grids = {} visind = range(bz) # [0] grids['images'] = torchvision.utils.make_grid(images[visind]).detach().cpu() grids['landmarks_gt'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks[visind])) grids['landmarks2d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks2d[visind])) grids['landmarks3d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks3d[visind])) grids['albedoimage'] = torchvision.utils.make_grid( (ops['albedo_images'])[visind].detach().cpu()) grids['render'] = torchvision.utils.make_grid(predicted_images[visind].detach().float().cpu()) shape_images = self.render.render_shape(vertices, trans_vertices, images) grids['shape'] = torchvision.utils.make_grid( F.interpolate(shape_images[visind], [224, 224])).detach().float().cpu() # grids['tex'] = torchvision.utils.make_grid(F.interpolate(albedos[visind], [224, 224])).detach().cpu() grid = torch.cat(list(grids.values()), 1) grid_image = (grid.numpy().transpose(1, 2, 0).copy() * 255)[:, :, [2, 1, 0]] grid_image = np.minimum(np.maximum(grid_image, 0), 255).astype(np.uint8) video_writer.write(grid_image) single_params = { 'shape': shape.detach().cpu().numpy(), 'exp': exp.detach().cpu().numpy(), 'pose': pose.detach().cpu().numpy(), 'cam': cam.detach().cpu().numpy(), 'verts': trans_vertices.detach().cpu().numpy(), 'albedos': albedos.detach().cpu().numpy(), 'tex': tex.detach().cpu().numpy(), 'lit': lights.detach().cpu().numpy() } util.draw_train_process("training", all_train_iters, photometric_loss, 'photometric loss') # np.save("./test_results/model.npy", single_params) return single_params def run(self, img, net, rect_detect, landmark_detect, rect_thresh, save_name, video_writer, savefolder): # The implementation is potentially able to optimize with images(batch_size>1), # here we show the example with a single image fitting images = [] landmarks = [] image_masks = [] bbox = rect_detect.extract(img, rect_thresh) if len(bbox) > 0: crop_image, new_bbox = util.crop_img(img, bbox[0], cfg.cropped_size) # input landmark resize_img, landmark = landmark_detect.extract([crop_image, [new_bbox]]) landmark = landmark[0] landmark[:, 0] = landmark[:, 0] / float(resize_img.shape[1]) * 2 - 1 landmark[:, 1] = landmark[:, 1] / float(resize_img.shape[0]) * 2 - 1 landmarks.append(torch.from_numpy(landmark)[None, :, :].double().to(self.device)) landmarks = torch.cat(landmarks, dim=0) # input image image = cv2.resize(crop_image, (cfg.cropped_size, cfg.cropped_size)).astype(np.float32) / 255. image = image[:, :, [2, 1, 0]].transpose(2, 0, 1) images.append(torch.from_numpy(image[None, :, :, :]).double().to(self.device)) images = torch.cat(images, dim=0) images = F.interpolate(images, [cfg.image_size, cfg.image_size]) # face segment mask image_mask = util.face_seg(crop_image, net, cfg.cropped_size) image_masks.append(torch.from_numpy(image_mask).double().to(cfg.device)) image_masks = torch.cat(image_masks, dim=0) image_masks = F.interpolate(image_masks, [cfg.image_size, cfg.image_size]) # check folder exist or not util.check_mkdir(savefolder) save_file = os.path.join(savefolder, save_name) # optimize single_params = self.optimize(images, landmarks, image_masks, video_writer) self.render.save_obj(filename=save_file, vertices=torch.from_numpy(single_params['verts'][0]).to(self.device), textures=torch.from_numpy(single_params['albedos'][0]).to(self.device) ) np.save(save_file, single_params)
class UrbanEnv(CarlaGym): """docstring for ClassName""" def __init__(self): super(UrbanEnv, self).__init__() # Initialize environment parameters self.town = carla_config.town self.state_y = carla_config.grid_height self.state_x = carla_config.grid_width self.channel = carla_config.features # Sensors self.rgb_sensor = None self.semantic_sensor = None # Planners self.planner = None # States and actions self.observation_space = spaces.Box(low=0, high=255, shape=(carla_config.grid_height, carla_config.grid_width, carla_config.features), dtype=np.uint8) self.action_space = spaces.Discrete(carla_config.N_DISCRETE_ACTIONS) self.ego_vehicle = None self.current_speed = 0.0 # Rendering related self.renderer = None self.is_render_enabled = carla_config.render if self.is_render_enabled: self.renderer = Renderer() self.init_renderer() def step(self, action=None, sp=25): self._take_action(action, sp) self.tick() state = self._get_observation() reward, done = self._get_reward() if self.render and self.is_render_enabled: if self.rgb_image is not None: img = self.get_rendered_image() self.renderer.render_image(img) return state, reward, done def _get_observation(self): tensor = np.zeros([self.state_y, self.state_x, self.channel]) # Fill ego vehicle information ev_trans = self.ego_vehicle.get_transform() for i in range(-1, 2): for j in range(0, 2): x_discrete, status = self.get_index(i, carla_config.x_min, carla_config.x_max, carla_config.x_size) y_discrete, status = self.get_index(j, carla_config.y_min, carla_config.y_max, carla_config.y_size) x_discrete = np.argmax(x_discrete) y_discrete = np.argmax(y_discrete) tensor[x_discrete, y_discrete, :] = [0.01] # Fill pedestrian information peds = self.world.get_actors().filter('walker.pedestrian.*') for p in peds: p_trans = p.get_transform() #print(p_trans.rotation.yaw - ev_trans.rotation.yaw) p_xyz = np.array( [p_trans.location.x, p_trans.location.y, p_trans.location.z]) ev_xyz = np.array([ ev_trans.location.x, ev_trans.location.y, ev_trans.location.z ]) ped_loc = p_xyz - ev_xyz pitch = math.radians(ev_trans.rotation.pitch) roll = math.radians(ev_trans.rotation.roll) yaw = math.radians(ev_trans.rotation.yaw) R = transforms3d.euler.euler2mat(roll, pitch, yaw).T ped_loc_relative = np.dot(R, ped_loc) x_discrete, status_x = self.get_index(ped_loc_relative[0], carla_config.x_min, carla_config.x_max, carla_config.x_size) y_discrete, status_y = self.get_index(ped_loc_relative[1], carla_config.y_min, carla_config.y_max, carla_config.y_size) if status_x and status_y: x_discrete = np.argmax(x_discrete) y_discrete = np.argmax(y_discrete) # Get pdestrian id p_id = int(p.attributes['role_name']) # Get pedestrian relative orientation p_heading = p_trans.rotation.yaw - ev_trans.rotation.yaw # Get pedestrian lane type ped_lane = None waypoint = self.world.get_map().get_waypoint( p_trans.location, project_to_road=True, lane_type=(carla.LaneType.Driving | carla.LaneType.Sidewalk)) if waypoint.lane_type == carla.LaneType.Driving: ped_lane = 1 elif waypoint.lane_type == carla.LaneType.Sidewalk or waypoint.lane_type == carla.LaneType.Shoulder: ped_lane = 2 if within_crosswalk(p_trans.location.x, p_trans.location.y): ped_lane = 3 tensor[x_discrete, y_discrete, :] = [ self.normalize_data(p_id, 0.0, carla_config.num_of_ped), self.normalize_data(p_heading, 0.0, 360.0), self.normalize_data(ped_lane, 0.0, 3) ] # ped id, ped relative orientation and region occupied. #print(tensor[x_discrete, y_discrete, :]) return tensor def _get_reward(self): done = False total_reward = d_reward = nc_reward = c_reward = 0 # Reward for speed # ev_speed = get_speed(self.ego_vehicle) # if ev_speed > 0.0 and ev_speed <=50: # d_reward = (10.0 - abs(10.0 - ev_speed))/10.0 # elif ev_speed > 50: # d_reward = -5 # elif ev_speed <= 0.0: # d_reward = -2 ## Reward(penalty) for collision pedestrian_list = self.world.get_actors().filter('walker.pedestrian.*') collision, near_collision, pedestrian = self.is_collision( pedestrian_list) if collision is True: #print('collision') c_reward = -10 done = True elif near_collision is True: #print('near collision') nc_reward = -5 # Check if goal reached if self.planner.done(): done = True total_reward = d_reward + nc_reward + c_reward return total_reward, done def _take_action(self, action, sp): mps_kmph_conversion = 3.6 target_speed = 0.0 # accelerate if action == 0: current_speed = get_speed(self.ego_vehicle) / mps_kmph_conversion desired_speed = current_speed + 0.2 desired_speed *= 3.6 self.current_speed = desired_speed self.planner.local_planner.set_speed(desired_speed) control = self.planner.run_step() control.brake = 0.0 self.ego_vehicle.apply_control(control) # decelerate elif action == 1: current_speed = get_speed(self.ego_vehicle) / mps_kmph_conversion desired_speed = current_speed - 0.2 desired_speed *= 3.6 self.current_speed = desired_speed self.planner.local_planner.set_speed(desired_speed) control = self.planner.run_step() control.brake = 0.0 self.ego_vehicle.apply_control(control) elif action == 2: # emergency stop self.emergency_stop() # speed tracking elif action == 3: self.planner.local_planner.set_speed(self.current_speed) control = self.planner.run_step() control.brake = 0.0 self.ego_vehicle.apply_control(control) def emergency_stop(self): control = carla.VehicleControl() control.steer = 0.0 control.throttle = 0.0 control.brake = 1.0 control.hand_brake = False self.ego_vehicle.apply_control(control) def reset(self, client_only=False): if self.server: self.close() self.setup_client_and_server(display=carla_config.display, rendering=carla_config.render) self.initialize_ego_vehicle() self.apply_settings(fps=1.0, no_rendering=not carla_config.render) self.world.get_map().generate_waypoints(1.0) # Run some initial steps for i in range(100): self.step('2') state = self._get_observation() return state def initialize_ego_vehicle(self): # Spawn ego vehicle sp = carla.Transform( carla.Location(x=carla_config.sp_x, y=carla_config.sp_y, z=carla_config.sp_z), carla.Rotation(yaw=carla_config.sp_yaw)) bp = random.choice(self.world.get_blueprint_library().filter( carla_config.ev_bp)) bp.set_attribute('role_name', carla_config.ev_name) self.ego_vehicle = self.spawn_ego_vehicle(bp, sp) # Add sensors to ego vehicle # RGB sensor if carla_config.rgb_sensor: rgb_bp = rgb_bp = self.world.get_blueprint_library().find( 'sensor.camera.rgb') rgb_bp.set_attribute('image_size_x', carla_config.rgb_size_x) rgb_bp.set_attribute('image_size_y', carla_config.rgb_size_y) rgb_bp.set_attribute('fov', carla_config.rgb_fov) transform = carla.Transform( carla.Location(x=carla_config.rgb_loc_x, z=carla_config.rgb_loc_z)) self.rgb_sensor = self.world.spawn_actor( rgb_bp, transform, attach_to=self.ego_vehicle) self.rgb_sensor.listen(self.rgb_sensor_callback) self.rgb_image = None # Semantic sensor if carla_config.sem_sensor: sem_bp = self.world.get_blueprint_library().find( 'sensor.camera.semantic_segmentation') sem_bp.set_attribute('image_size_x', carla_config.rgb_size_x) sem_bp.set_attribute('image_size_y', carla_config.rgb_size_y) sem_bp.set_attribute('fov', carla_config.rgb_fov) trnasform = carla.Transform( carla.Location(x=carla_config.rgb_loc_x, z=carla_config.rgb_loc_z)) self.semantic_sensor = self.world.spawn_actor( sem_bp, trnasform, attach_to=self.ego_vehicle) self.semantic_sensor.listen(self.semantic_sensor_callback) self.semantic_image = None # Initialize the planner self.planner = Planner() self.planner.initialize(self.ego_vehicle) self.planner.set_destination( (carla_config.ev_goal_x, carla_config.ev_goal_y, carla_config.ev_goal_z)) def spawn_ego_vehicle(self, bp=None, sp=None): if not bp: bp = random.choice( self.world.get_blueprint_library().filter('vehicle.*')) if not sp: sp = random.choice(self.world.get_map().get_spawn_points()) return self.world.spawn_actor(bp, sp) def rgb_sensor_callback(self, image): #image.convert(cc.Raw) array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8")) array = np.reshape(array, (image.height, image.width, 4)) array = array[:, :, :3] self.rgb_image = array def semantic_sensor_callback(self, image): image.convert(cc.CityScapesPalette) array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8")) array = np.reshape(array, (image.height, image.width, 4)) array = array[:, :, :3] array = array[:, :, ::-1] self.semantic_image = array def close(self): #if carla_config.rgb_sensor: if self.rgb_sensor is not None: self.rgb_sensor.destroy() #if carla_config.sem_sensor: if self.semantic_sensor is not None: self.semantic_sensor.destroy() # if self.ego_vehicle is not None: # self.ego_vehicle.destroy() if self.renderer is not None: self.renderer.close() self.kill_processes() def init_renderer(self): self.no_of_cam = 0 if carla_config.rgb_sensor: self.no_of_cam += 1 if carla_config.sem_sensor: self.no_of_cam += 1 self.renderer.create_screen(carla_config.screen_x, carla_config.screen_y * self.no_of_cam) def render(self): if self.renderer is None or not (self.is_render_enabled): return img = self.get_rendered_image() self.renderer.render_image(img) def get_rendered_image(self): temp = [] if carla_config.rgb_sensor: temp.append(self.rgb_image) if carla_config.sem_sensor: temp.append(self.semantic_image) return np.vstack(img for img in temp) def get_index(self, val, start, stop, num): grids = np.linspace(start, stop, num) features = np.zeros(num) #Check extremes if val <= grids[0] or val > grids[-1]: return features, False for i in range(len(grids) - 1): if val >= grids[i] and val < grids[i + 1]: features[i] = 1 return features, True def normalize_data(self, data, min_val, max_val): return (data - min_val) / (max_val - min_val) def is_collision(self, entity_list): ego_vehicle_location = self.ego_vehicle.get_transform().location ego_vehicle_waypoint = self.world.get_map().get_waypoint( ego_vehicle_location) for target in entity_list: # if the object is not in our lane it's not an obstacle target_waypoint = self.world.get_map().get_waypoint( target.get_location(), project_to_road=True, lane_type=(carla.LaneType.Driving | carla.LaneType.Sidewalk)) # if target_waypoint.road_id == ego_vehicle_waypoint.road_id and \ # target_waypoint.lane_id == ego_vehicle_waypoint.lane_id: #target_waypoint.lane_type == ego_vehicle_waypoint.lane_type: if target_waypoint.lane_type == carla.LaneType.Driving and target_waypoint.lane_id == ego_vehicle_waypoint.lane_id: if is_within_distance_ahead(target.get_transform(), self.ego_vehicle.get_transform(), 4.0): #if self.distance(self.ego_vehicle.get_transform(), target.get_transform()) < 10.0: return (True, True, target) # if target_waypoint.road_id == ego_vehicle_waypoint.road_id and \ # target_waypoint.lane_type == ego_vehicle_waypoint.lane_type: elif is_within_distance_ahead(target.get_transform(), self.ego_vehicle.get_transform(), 8.0): return (False, True, target) return (False, False, None) def distance(self, source_transform, destination_transform): dx = source_transform.location.x - destination_transform.location.x dy = source_transform.location.y - destination_transform.location.y return math.sqrt(dx * dx + dy * dy) def get_ego_speed(self): return get_speed(self.ego_vehicle)
if __name__ == '__main__': args = parser.parse_args() device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Load trained model model = hmr(config.SMPL_MEAN_PARAMS).to(device) checkpoint = torch.load(args.trained_model) model.load_state_dict(checkpoint['model'], strict=False) smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1, create_transl=False).to(device) model.eval() # Generate rendered image renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces) # Processs the image and predict the parameters img, norm_img = process_image(args.test_image, args.bbox, input_res=constants.IMG_RES) with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device)) pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices camera_translation = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH / (constants.IMG_RES * pred_camera[:, 0] + 1e-9)
class Trainer(BaseTrainer): def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) # Switch the optimizer if self.options.optimizer == 'adam': print('Using adam') self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr, weight_decay=0) elif self.options.optimizer == 'sgd': print('Using sgd') self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=self.options.lr, weight_decay=0) elif self.options.optimizer == 'momentum': print('Using momentum') self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=self.options.lr, momentum=self.options.momentum, weight_decay=0) else: print(self.options.optimizer + 'Not found') raise Exception("Optimizer Wrong!") self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=self.options.batch_size, create_transl=False).to(self.device) # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH # Initialize SMPLify fitting module self.smplify = SMPLify(step_size=1e-2, batch_size=self.options.batch_size, num_iters=self.options.num_smplify_iters, focal_length=self.focal_length) if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # Create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces) def finalize(self): self.fits_dict.save() def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, openpose_weight, gt_weight): """ Compute 2D reprojection loss on the keypoints. The loss is weighted by the confidence. The available keypoints are different for each dataset. """ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone() conf[:, :25] *= openpose_weight conf[:, 25:] *= gt_weight loss = (conf * self.criterion_keypoints( pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean() return loss def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d, has_pose_3d): """Compute 3D keypoint loss for the examples that 3D keypoint annotations are available. The loss is weighted by the confidence. """ pred_keypoints_3d = pred_keypoints_3d[:, 25:, :] conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone() gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone() gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1] conf = conf[has_pose_3d == 1] pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1] if len(gt_keypoints_3d) > 0: gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :] pred_pelvis = (pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2 pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :] return (conf * self.criterion_keypoints(pred_keypoints_3d, gt_keypoints_3d)).mean() else: return torch.FloatTensor(1).fill_(0.).to(self.device) def shape_loss(self, pred_vertices, gt_vertices, has_smpl): """Compute per-vertex loss on the shape for the examples that SMPL annotations are available.""" pred_vertices_with_shape = pred_vertices[has_smpl == 1] gt_vertices_with_shape = gt_vertices[has_smpl == 1] if len(gt_vertices_with_shape) > 0: return self.criterion_shape(pred_vertices_with_shape, gt_vertices_with_shape) else: return torch.FloatTensor(1).fill_(0.).to(self.device) def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas, has_smpl): pred_rotmat_valid = pred_rotmat[has_smpl == 1] gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view( -1, 24, 3, 3)[has_smpl == 1] pred_betas_valid = pred_betas[has_smpl == 1] gt_betas_valid = gt_betas[has_smpl == 1] if len(pred_rotmat_valid) > 0: loss_regr_pose = self.criterion_regr(pred_rotmat_valid, gt_rotmat_valid) loss_regr_betas = self.criterion_regr(pred_betas_valid, gt_betas_valid) else: loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device) loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device) return loss_regr_pose, loss_regr_betas def train_step(self, input_batch): self.model.train() # Get data from the batch images = input_batch['img'] # input image gt_keypoints_2d = input_batch['keypoints'] # 2D keypoints gt_pose = input_batch['pose'] # SMPL pose parameters gt_betas = input_batch['betas'] # SMPL beta parameters gt_joints = input_batch['pose_3d'] # 3D pose has_smpl = input_batch['has_smpl'].byte( ) # flag that indicates whether SMPL parameters are valid has_pose_3d = input_batch['has_pose_3d'].byte( ) # flag that indicates whether 3D pose is valid is_flipped = input_batch[ 'is_flipped'] # flag that indicates whether image was flipped during data augmentation rot_angle = input_batch[ 'rot_angle'] # rotation angle used for data augmentation dataset_name = input_batch[ 'dataset_name'] # name of the dataset the image comes from indices = input_batch[ 'sample_index'] # index of example inside its dataset batch_size = images.shape[0] # Get GT vertices and model joints # Note that gt_model_joints is different from gt_joints as it comes from SMPL gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_model_joints = gt_out.joints gt_vertices = gt_out.vertices # Get current best fits from the dictionary opt_pose, opt_betas = self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu())] opt_pose = opt_pose.to(self.device) opt_betas = opt_betas.to(self.device) opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:, 3:], global_orient=opt_pose[:, :3]) opt_vertices = opt_output.vertices if opt_vertices.shape != (self.options.batch_size, 6890, 3): opt_vertices = torch.zeros_like(opt_vertices, device=self.device) opt_joints = opt_output.joints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) # Estimate camera translation given the model joints and 2D keypoints # by minimizing a weighted least squares loss gt_cam_t = estimate_translation(gt_model_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_cam_t = estimate_translation(opt_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_joint_loss = self.smplify.get_fitting_loss( opt_pose, opt_betas, opt_cam_t, 0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device), gt_keypoints_2d_orig).mean(dim=-1) # Feed images in the network to predict camera and SMPL parameters pred_rotmat, pred_betas, pred_camera = self.model(images) pred_output = self.smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if pred_vertices.shape != (self.options.batch_size, 6890, 3): pred_vertices = torch.zeros_like(pred_vertices, device=self.device) pred_joints = pred_output.joints # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * self.focal_length / (self.options.img_res * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_center = torch.zeros(batch_size, 2, device=self.device) pred_keypoints_2d = perspective_projection( pred_joints, rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size, -1, -1), translation=pred_cam_t, focal_length=self.focal_length, camera_center=camera_center) # Normalize keypoints to [-1,1] pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.) if self.options.run_smplify: # Convert predicted rotation matrices to axis-angle pred_rotmat_hom = torch.cat([ pred_rotmat.detach().view(-1, 3, 3).detach(), torch.tensor( [0, 0, 1], dtype=torch.float32, device=self.device).view( 1, 3, 1).expand(batch_size * 24, -1, -1) ], dim=-1) pred_pose = rotation_matrix_to_angle_axis( pred_rotmat_hom).contiguous().view(batch_size, -1) # tgm.rotation_matrix_to_angle_axis returns NaN for 0 rotation, so manually hack it pred_pose[torch.isnan(pred_pose)] = 0.0 # Run SMPLify optimization starting from the network prediction new_opt_vertices, new_opt_joints,\ new_opt_pose, new_opt_betas,\ new_opt_cam_t, new_opt_joint_loss = self.smplify( pred_pose.detach(), pred_betas.detach(), pred_cam_t.detach(), 0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device), gt_keypoints_2d_orig) new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1) # Will update the dictionary for the examples where the new loss is less than the current one update = (new_opt_joint_loss < opt_joint_loss) opt_joint_loss[update] = new_opt_joint_loss[update] opt_vertices[update, :] = new_opt_vertices[update, :] opt_joints[update, :] = new_opt_joints[update, :] opt_pose[update, :] = new_opt_pose[update, :] opt_betas[update, :] = new_opt_betas[update, :] opt_cam_t[update, :] = new_opt_cam_t[update, :] self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu(), update.cpu())] = (opt_pose.cpu(), opt_betas.cpu()) else: update = torch.zeros(batch_size, device=self.device).byte() # Replace extreme betas with zero betas opt_betas[(opt_betas.abs() > 3).any(dim=-1)] = 0. # Replace the optimized parameters with the ground truth parameters, if available opt_vertices[has_smpl, :, :] = gt_vertices[has_smpl, :, :] opt_cam_t[has_smpl, :] = gt_cam_t[has_smpl, :] opt_joints[has_smpl, :, :] = gt_model_joints[has_smpl, :, :] opt_pose[has_smpl, :] = gt_pose[has_smpl, :] opt_betas[has_smpl, :] = gt_betas[has_smpl, :] # Assert whether a fit is valid by comparing the joint loss with the threshold valid_fit = (opt_joint_loss < self.options.smplify_threshold).to( self.device) # Add the examples with GT parameters to the list of valid fits # print(valid_fit.dtype) valid_fit = valid_fit.to(torch.uint8) valid_fit = valid_fit | has_smpl opt_keypoints_2d = perspective_projection( opt_joints, rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size, -1, -1), translation=opt_cam_t, focal_length=self.focal_length, camera_center=camera_center) opt_keypoints_2d = opt_keypoints_2d / (self.options.img_res / 2.) # Compute loss on SMPL parameters loss_regr_pose, loss_regr_betas = self.smpl_losses( pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit) # Compute 2D reprojection loss for the keypoints loss_keypoints = self.keypoint_loss(pred_keypoints_2d, gt_keypoints_2d, self.options.openpose_train_weight, self.options.gt_train_weight) # Compute 3D keypoint loss loss_keypoints_3d = self.keypoint_3d_loss(pred_joints, gt_joints, has_pose_3d) # Per-vertex loss for the shape loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit) # Compute total loss # The last component is a loss that forces the network to predict positive depth values loss = self.options.shape_loss_weight * loss_shape +\ self.options.keypoint_loss_weight * loss_keypoints +\ self.options.keypoint_loss_weight * loss_keypoints_3d +\ loss_regr_pose + self.options.beta_loss_weight * loss_regr_betas +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() loss *= 60 # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments for tensorboard logging output = { 'pred_vertices': pred_vertices.detach(), 'opt_vertices': opt_vertices, 'pred_cam_t': pred_cam_t.detach(), 'opt_cam_t': opt_cam_t } losses = { 'loss': loss.detach().item(), 'loss_keypoints': loss_keypoints.detach().item(), 'loss_keypoints_3d': loss_keypoints_3d.detach().item(), 'loss_regr_pose': loss_regr_pose.detach().item(), 'loss_regr_betas': loss_regr_betas.detach().item(), 'loss_shape': loss_shape.detach().item() } return output, losses def train_summaries(self, input_batch, output, losses): # Update dictionary every time when summaries are provoked self.finalize() images = input_batch['img'] images = images * torch.tensor( [0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1) images = images + torch.tensor( [0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1) pred_vertices = output['pred_vertices'] assert pred_vertices.shape == (self.options.batch_size, 6890, 3) opt_vertices = output['opt_vertices'] pred_cam_t = output['pred_cam_t'] opt_cam_t = output['opt_cam_t'] images_pred = self.renderer.visualize_tb(pred_vertices, pred_cam_t, images) images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t, images) self.summary_writer.add_image('pred_shape', images_pred, self.step_count) self.summary_writer.add_image('opt_shape', images_opt, self.step_count) for loss_name, val in losses.items(): self.summary_writer.add_scalar(loss_name, val, self.step_count)
def init_fn(self): # create training dataset self.train_ds = create_dataset(self.options.dataset, self.options, use_IUV=True) self.dp_res = int(self.options.img_res // (2**self.options.warp_level)) self.CNet = DPNet(warp_lv=self.options.warp_level, norm_type=self.options.norm_type).to(self.device) self.LNet = get_LNet(self.options).to(self.device) self.smpl = SMPL().to(self.device) self.female_smpl = SMPL(cfg.FEMALE_SMPL_FILE).to(self.device) self.male_smpl = SMPL(cfg.MALE_SMPL_FILE).to(self.device) uv_res = self.options.uv_res self.uv_type = self.options.uv_type self.sampler = Index_UV_Generator(UV_height=uv_res, UV_width=-1, uv_type=self.uv_type).to(self.device) weight_file = 'data/weight_p24_h{:04d}_w{:04d}_{}.npy'.format( uv_res, uv_res, self.uv_type) if not os.path.exists(weight_file): cal_uv_weight(self.sampler, weight_file) uv_weight = torch.from_numpy(np.load(weight_file)).to( self.device).float() uv_weight = uv_weight * self.sampler.mask.to(uv_weight.device).float() uv_weight = uv_weight / uv_weight.mean() self.uv_weight = uv_weight[None, :, :, None] self.tv_factor = (uv_res - 1) * (uv_res - 1) # Setup an optimizer if self.options.stage == 'dp': self.optimizer = torch.optim.Adam( params=list(self.CNet.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) self.models_dict = {'CNet': self.CNet} self.optimizers_dict = {'optimizer': self.optimizer} else: self.optimizer = torch.optim.Adam( params=list(self.LNet.parameters()) + list(self.CNet.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) self.models_dict = {'CNet': self.CNet, 'LNet': self.LNet} self.optimizers_dict = {'optimizer': self.optimizer} # Create loss functions self.criterion_shape = nn.L1Loss().to(self.device) self.criterion_uv = nn.L1Loss().to(self.device) self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) self.criterion_keypoints_3d = nn.L1Loss(reduction='none').to( self.device) self.criterion_regr = nn.MSELoss().to(self.device) # LSP indices from full list of keypoints self.to_lsp = list(range(14)) self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy()) # Optionally start training from a pretrained checkpoint # Note that this is different from resuming training # For the latter use --resume if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint)
class PhotometricFitting(object): def __init__(self, device='cuda'): self.batch_size = cfg.batch_size self.image_size = cfg.image_size self.cropped_size = cfg.cropped_size self.config = cfg self.device = device self.flame = FLAME(self.config).to(self.device) self.flametex = FLAMETex(self.config).to(self.device) self._setup_renderer() def _setup_renderer(self): self.render = Renderer(self.image_size, obj_filename=cfg.mesh_file).to(self.device) def optimize(self, images, landmarks, image_masks, savefolder=None): bz = images.shape[0] shape = nn.Parameter( torch.zeros(bz, cfg.shape_params).float().to(self.device)) tex = nn.Parameter( torch.zeros(bz, cfg.tex_params).float().to(self.device)) exp = nn.Parameter( torch.zeros(bz, cfg.expression_params).float().to(self.device)) pose = nn.Parameter( torch.zeros(bz, cfg.pose_params).float().to(self.device)) cam = torch.zeros(bz, cfg.camera_params) cam[:, 0] = 5. cam = nn.Parameter(cam.float().to(self.device)) lights = nn.Parameter(torch.zeros(bz, 9, 3).float().to(self.device)) e_opt = torch.optim.Adam([shape, exp, pose, cam, tex, lights], lr=cfg.e_lr, weight_decay=cfg.e_wd) e_opt_rigid = torch.optim.Adam([pose, cam], lr=cfg.e_lr, weight_decay=cfg.e_wd) gt_landmark = landmarks # rigid fitting of pose and camera with 51 static face landmarks, # this is due to the non-differentiable attribute of contour landmarks trajectory for k in range(200): losses = {} vertices, landmarks2d, landmarks3d = self.flame( shape_params=shape, expression_params=exp, pose_params=pose) trans_vertices = util.batch_orth_proj(vertices, cam) trans_vertices[..., 1:] = -trans_vertices[..., 1:] landmarks2d = util.batch_orth_proj(landmarks2d, cam) landmarks2d[..., 1:] = -landmarks2d[..., 1:] landmarks3d = util.batch_orth_proj(landmarks3d, cam) landmarks3d[..., 1:] = -landmarks3d[..., 1:] losses['landmark'] = util.l2_distance( landmarks2d[:, 17:, :2], gt_landmark[:, 17:, :2]) * cfg.w_lmks all_loss = 0. for key in losses.keys(): all_loss = all_loss + losses[key] losses['all_loss'] = all_loss e_opt_rigid.zero_grad() all_loss.backward() e_opt_rigid.step() loss_info = '----iter: {}, time: {}\n'.format( k, datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) for key in losses.keys(): loss_info = loss_info + '{}: {}, '.format( key, float(losses[key])) if k % 10 == 0: print(loss_info) if k % 10 == 0: grids = {} visind = range(bz) # [0] grids['images'] = torchvision.utils.make_grid( images[visind]).detach().cpu() grids['landmarks_gt'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks[visind])) grids['landmarks2d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks2d[visind])) grids['landmarks3d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks3d[visind])) grid = torch.cat(list(grids.values()), 1) grid_image = (grid.numpy().transpose(1, 2, 0).copy() * 255)[:, :, [2, 1, 0]] grid_image = np.minimum(np.maximum(grid_image, 0), 255).astype(np.uint8) cv2.imwrite('{}/{}.jpg'.format(savefolder, k), grid_image) # non-rigid fitting of all the parameters with 68 face landmarks, photometric loss and regularization terms. for k in range(200, 1000): losses = {} vertices, landmarks2d, landmarks3d = self.flame( shape_params=shape, expression_params=exp, pose_params=pose) trans_vertices = util.batch_orth_proj(vertices, cam) trans_vertices[..., 1:] = -trans_vertices[..., 1:] landmarks2d = util.batch_orth_proj(landmarks2d, cam) landmarks2d[..., 1:] = -landmarks2d[..., 1:] landmarks3d = util.batch_orth_proj(landmarks3d, cam) landmarks3d[..., 1:] = -landmarks3d[..., 1:] losses['landmark'] = util.l2_distance( landmarks2d[:, :, :2], gt_landmark[:, :, :2]) * cfg.w_lmks losses['shape_reg'] = (torch.sum(shape**2) / 2) * cfg.w_shape_reg # *1e-4 losses['expression_reg'] = (torch.sum(exp**2) / 2) * cfg.w_expr_reg # *1e-4 losses['pose_reg'] = (torch.sum(pose**2) / 2) * cfg.w_pose_reg ## render albedos = self.flametex(tex) / 255. ops = self.render(vertices, trans_vertices, albedos, lights) predicted_images = ops['images'] losses['photometric_texture'] = ( image_masks * (ops['images'] - images).abs()).mean() * cfg.w_pho all_loss = 0. for key in losses.keys(): all_loss = all_loss + losses[key] losses['all_loss'] = all_loss e_opt.zero_grad() all_loss.backward() e_opt.step() loss_info = '----iter: {}, time: {}\n'.format( k, datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')) for key in losses.keys(): loss_info = loss_info + '{}: {}, '.format( key, float(losses[key])) if k % 10 == 0: print(loss_info) # visualize if k % 10 == 0: grids = {} visind = range(bz) # [0] grids['images'] = torchvision.utils.make_grid( images[visind]).detach().cpu() grids['landmarks_gt'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks[visind])) grids['landmarks2d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks2d[visind])) grids['landmarks3d'] = torchvision.utils.make_grid( util.tensor_vis_landmarks(images[visind], landmarks3d[visind])) grids['albedoimage'] = torchvision.utils.make_grid( (ops['albedo_images'])[visind].detach().cpu()) grids['render'] = torchvision.utils.make_grid( predicted_images[visind].detach().float().cpu()) shape_images = self.render.render_shape( vertices, trans_vertices, images) grids['shape'] = torchvision.utils.make_grid( F.interpolate(shape_images[visind], [224, 224])).detach().float().cpu() # grids['tex'] = torchvision.utils.make_grid(F.interpolate(albedos[visind], [224, 224])).detach().cpu() grid = torch.cat(list(grids.values()), 1) grid_image = (grid.numpy().transpose(1, 2, 0).copy() * 255)[:, :, [2, 1, 0]] grid_image = np.minimum(np.maximum(grid_image, 0), 255).astype(np.uint8) cv2.imwrite('{}/{}.jpg'.format(savefolder, k), grid_image) single_params = { 'shape': shape.detach().cpu().numpy(), 'exp': exp.detach().cpu().numpy(), 'pose': pose.detach().cpu().numpy(), 'cam': cam.detach().cpu().numpy(), 'verts': trans_vertices.detach().cpu().numpy(), 'albedos': albedos.detach().cpu().numpy(), 'tex': tex.detach().cpu().numpy(), 'lit': lights.detach().cpu().numpy() } return single_params def run(self, imagepath, landmarkpath): # The implementation is potentially able to optimize with images(batch_size>1), # here we show the example with a single image fitting images = [] landmarks = [] image_masks = [] image_name = os.path.basename(imagepath)[:-4] savefile = os.path.sep.join([cfg.save_folder, image_name + '.npy']) # photometric optimization is sensitive to the hair or glass occlusions, # therefore we use a face segmentation network to mask the skin region out. image_mask_folder = './FFHQ_seg/' image_mask_path = os.path.sep.join( [image_mask_folder, image_name + '.npy']) image = cv2.resize(cv2.imread(imagepath), (cfg.cropped_size, cfg.cropped_size)).astype( np.float32) / 255. image = image[:, :, [2, 1, 0]].transpose(2, 0, 1) images.append(torch.from_numpy(image[None, :, :, :]).to(self.device)) image_mask = np.load(image_mask_path, allow_pickle=True) image_mask = image_mask[..., None].astype('float32') image_mask = image_mask.transpose(2, 0, 1) image_mask_bn = np.zeros_like(image_mask) image_mask_bn[np.where(image_mask != 0)] = 1. image_masks.append( torch.from_numpy(image_mask_bn[None, :, :, :]).to(self.device)) landmark = np.load(landmarkpath).astype(np.float32) landmark[:, 0] = landmark[:, 0] / float(image.shape[2]) * 2 - 1 landmark[:, 1] = landmark[:, 1] / float(image.shape[1]) * 2 - 1 landmarks.append( torch.from_numpy(landmark)[None, :, :].float().to(self.device)) images = torch.cat(images, dim=0) images = F.interpolate(images, [cfg.image_size, cfg.image_size]) image_masks = torch.cat(image_masks, dim=0) image_masks = F.interpolate(image_masks, [cfg.image_size, cfg.image_size]) landmarks = torch.cat(landmarks, dim=0) savefolder = os.path.sep.join([cfg.save_folder, image_name]) util.check_mkdir(savefolder) # optimize single_params = self.optimize(images, landmarks, image_masks, savefolder) self.render.save_obj(filename=savefile[:-4] + '.obj', vertices=torch.from_numpy( single_params['verts'][0]).to(self.device), textures=torch.from_numpy( single_params['albedos'][0]).to(self.device)) np.save(savefile, single_params)
import os import os.path as osp import cv2 import numpy as np import pickle from utils.renderer import Renderer from smpl_torch import SMPLNP from global_var import ROOT if __name__ == '__main__': garment_class = 't-shirt' gender = 'female' img_size = 512 renderer = Renderer(img_size) smpl = SMPLNP(gender=gender, cuda=False) pose_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pose') shape_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'shape') ss_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'style_shape') pose_vis_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pose_vis') ss_vis_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'style_shape_vis') pivots_path = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pivots.txt') avail_path = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'avail.txt') os.makedirs(pose_vis_dir, exist_ok=True) os.makedirs(ss_vis_dir, exist_ok=True) with open(os.path.join(ROOT, 'garment_class_info.pkl'), 'rb') as f: class_info = pickle.load(f, encoding='latin-1') body_f = smpl.base.faces garment_f = class_info[garment_class]['f']
from utils.shapes import Sphere, Cuboid, Tetrahedron from utils.renderer import Renderer, Cam, Lit, Tri import cv2 rend = Renderer() sp = Sphere((0, 0, 0), 25) shapes = sp.render() cuboid = Cuboid((0, 0, -100)) cuboid.scale(25, axis=0) cuboid.scale(50, axis=1) cuboid.scale(75, axis=2) cuboid.rotate(90, 90) shapes.extend(cuboid.render()) tetrahedron = Tetrahedron((100, 0, 0)) tetrahedron.scale(25, axis=0) tetrahedron.scale(50, axis=1) tetrahedron.scale(20, axis=2) tetrahedron.rotate(45, 180) shapes.extend(tetrahedron.render()) background_prims = [] background_prims.append(Tri([(-1000.00,-40.00,1000.00), (1000.00,-40.00, 1000.00), (-1000.00,-40.00,-1000.00)])) background_prims.append(Tri([(-1000.00,-40.00,-1000.00), (1000.00,-40.00, 1000.00), (1000.00,-40.00,-1000.00)])) light = Lit((125,300,35),79000) camera = [Cam((200,222,83),( -.5,-.7,-.5), (640,480))] shadow, noshadow = rend.render(camera, light, shapes, background_prims) cv2.imwrite("shadow.png", shadow) cv2.imwrite("noshadow.png", noshadow)
plt.savefig(args.img[:-4]+'_CMRpreds'+'.png', dpi=400) if __name__ == '__main__': args = parser.parse_args() # Load model mesh = Mesh(device=DEVICE) # Our pretrained networks have 5 residual blocks with 256 channels. # You might want to change this if you use a different architecture. model = CMR(mesh, 5, 256, pretrained_checkpoint=args.checkpoint) if DEVICE == torch.device("cuda"): model.cuda() model.eval() # Setup renderer for visualization renderer = Renderer() # Preprocess input image and generate predictions img, norm_img = process_image(args.img, input_res=cfg.INPUT_RES) if DEVICE == torch.device("cuda"): norm_img = norm_img.cuda() with torch.no_grad(): pred_vertices, pred_vertices_smpl, pred_camera, _, _ = model(norm_img)#.cuda()) # Calculate camera parameters for rendering camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*cfg.FOCAL_LENGTH/(cfg.INPUT_RES * pred_camera[:,0] +1e-9)],dim=-1) camera_translation = camera_translation[0].cpu().numpy() pred_vertices = pred_vertices[0].cpu().numpy() pred_vertices_smpl = pred_vertices_smpl[0].cpu().numpy() img = img.permute(1,2,0).cpu().numpy()
class Scene: def __init__(self, light_variability=(20,8), gridlines_on=None, gridlines_width=None, gridlines_spacing=None): if gridlines_on or gridlines_width or gridlines_spacing: assert not (gridlines_on is None\ or gridlines_width is None\ or gridlines_spacing is None),\ "All gridlines variables must be set if any are" self.rend = Renderer() self.shapes = [] self.grid_shapes = [] self.center = np.array((0, 140, 300)) self.light_variability = light_variability self.background_prims = [] background_lower_bound = -1e3 background_upper_bound = 1e3 wall_bound = 1e3 self.background_prims.append( Tri([(-wall_bound, 0, wall_bound), (wall_bound, 0, wall_bound), (-wall_bound, 0, -wall_bound)])) self.background_prims.append( Tri([(-wall_bound, 0, -wall_bound), (wall_bound, 0, wall_bound), (wall_bound, 0, -wall_bound)])) self.background_prims.append( Tri([(-wall_bound, -50, wall_bound), (0, wall_bound, wall_bound), (wall_bound, -50, wall_bound)])) if gridlines_on: for i in range(int((background_upper_bound - background_lower_bound) / (gridlines_width + gridlines_spacing))): offset = i * (gridlines_width + gridlines_spacing) self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_lower_bound), (background_lower_bound + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)])) self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_upper_bound), (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)])) self.grid_shapes.append(Tri([(background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset), (background_upper_bound, 0.01, background_lower_bound + offset), (background_lower_bound, 0.01, background_lower_bound + offset)])) self.grid_shapes.append(Tri([(background_upper_bound, 0.01, background_lower_bound + offset), (background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset), (background_upper_bound, 0.01, background_lower_bound + gridlines_width + offset)])) self.default_light = np.array((400, 300, -800)) self.default_intensity = 1000000 self.camera = Cam((0, 140, 300), (128, 128)) def calc_center(self): return mean([shape.center for shape in self.shapes]) def add_object(self, i=-1): if i<0: i = randint(0, 7) shape = [Sphere(self.center, 0.5), Tetrahedron(self.center), Cuboid(self.center), Cylinder(self.center, 50), Pyramid(self.center), Cone(self.center, 50), Torus(self.center, 0.5, 50, 0.15), HollowCuboid(self.center, 0.15)][i] shape.scale(35) self.shapes.append(shape) def mutate_object(self, shape): shape.scale(randint(25, 40)) self.__rotate_object(shape) self.__translate_object(shape) def mutate_all_objects(self): for shape in self.shapes: self.__scale_object(shape) self.__rotate_object(shape) self.__translate_object(shape) def crossover(self, scene): offspring = Scene() offspring.shapes = self.shapes + scene.shapes shuffle(offspring.shapes) offspring.shapes = offspring.shapes[:len(offspring.shapes)//2] return offspring def mutate(self): if randint(0,1) == 0: self.add_object() else: shape = self.shapes[randint(0, len(self.shapes) - 1)] mutation = [self.__scale_object, self.__translate_object, self.__rotate_object][randint(0,2)] mutation(shape) def new_light(self, theta = 60, phi=8): d = norm(self.default_light - self.center) x_trans = d * math.sin(math.radians(theta)) z_trans = d * math.cos(math.radians(theta)) y_trans = d * math.sin(math.radians(phi)) translation = np.array((x_trans, y_trans, -z_trans)) return Lit(self.center + translation, self.default_intensity) def __scale_object(self, shape): for i in range(3): shape.scale(uniform(0.8, 1.2), axis=i) def __translate_object(self, shape): shape.translate((randint(-50, 50), 0, randint(-50, 50))) def ground_mesh(self): for shape in self.shapes: lowest_y = shape.lowest_y() shape.translate((0, -lowest_y, 0)) def __rotate_object(self, shape): shape.rotate(randint(0, 359), randint(0, 359), randint(0, 359)) def refocus_camera(self): self.camera.location = self.calc_center() def render(self): surface_prims = [] light = self.new_light(*self.light_variability)#Lit(self.default_light, self.default_intensity)# for shape in self.shapes: surface_prims += shape.render() views = [self.camera.view_from(-30, 0, 200)] res_x, res_y = self.camera.resolution return self.rend.render(views, light, surface_prims, self.background_prims, res_x, res_y, self.grid_shapes, grid_color=(0.7,0.7,0.7))
# Load model if args.config is None: tmp = args.checkpoint.split('/')[:-2] tmp.append('config.json') args.config = '/' + join(*tmp) with open(args.config, 'r') as f: options = json.load(f) options = namedtuple('options', options.keys())(**options) model = DMR(options, args.checkpoint) model.eval() # Setup renderer for visualization _, faces = read_obj('data/reference_mesh.obj') renderer = Renderer(faces=np.array(faces) - 1) # Preprocess input image and generate predictions img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=cfg.INPUT_RES) with torch.no_grad(): out_dict = model(norm_img.to(model.device)) pred_vertices = out_dict['pred_vertices'] pred_camera = out_dict['camera'] # Calculate camera parameters for rendering camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*cfg.FOCAL_LENGTH/(cfg.INPUT_RES * pred_camera[:,0] +1e-9)],dim=-1) camera_translation = camera_translation[0].cpu().numpy() pred_vertices = pred_vertices[0].cpu().numpy() img = img.permute(1,2,0).cpu().numpy() # Render non-parametric shape
class Trainer_li(BaseTrainer): def init_fn(self): #self.dataset = 'h36m' #self.train_ds = BaseDataset(self.options, self.dataset) # training dataset self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to( self.device) # feature extraction model self.optimizer = torch.optim.Adam( params=self.model.parameters(), #lr=5e-5, lr=self.options.lr, weight_decay=0) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=16, create_transl=False).to(self.device) # per vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # keypoints loss including 2D and 3D self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # SMPL parameters loss if we have self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH # initialize SMPLify self.smplify = SMPLify(step_size=1e-2, batch_size=16, num_iters=100, focal_length=self.focal_length) print(self.options.pretrained_checkpoint) if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) #load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=224, faces=self.smpl.faces) def finalize(self): self.fits_dict.save() def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, openpose_weight, gt_weight): """Compute 2D reprojection loss on the keypoints. The loss is weighted by the confidence. """ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone() conf[:, :25] *= openpose_weight conf[:, 25:] *= gt_weight loss = (conf * self.criterion_keypoints( pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean() return loss def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d, has_pose_3d): pred_keypoints_3d = pred_keypoints_3d[:, 25:, :] conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone() gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone() gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1] conf = conf[has_pose_3d == 1] pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1] if len(gt_keypoints_3d) > 0: gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :] pred_pelvis = (pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2 pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :] return (conf * self.criterion_keypoints(pred_keypoints_3d, gt_keypoints_3d)).mean() else: return torch.FloatTensor(1).fill_(0.).to(self.device) def shape_loss(self, pred_vertices, gt_vertices, has_smpl): """Compute per-vertex loss on the shape for the examples that SMPL annotations are available.""" pred_vertices_with_shape = pred_vertices[has_smpl == 1] gt_vertices_with_shape = gt_vertices[has_smpl == 1] if len(gt_vertices_with_shape) > 0: return self.criterion_shape(pred_vertices_with_shape, gt_vertices_with_shape) else: return torch.FloatTensor(1).fill_(0.).to(self.device) def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas, has_smpl): pred_rotmat_valid = pred_rotmat[has_smpl == 1] gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view( -1, 24, 3, 3)[has_smpl == 1] #print(pred_rotmat_valid.size(),gt_rotmat_valid.size()) #input() pred_betas_valid = pred_betas[has_smpl == 1] gt_betas_valid = gt_betas[has_smpl == 1] if len(pred_rotmat_valid) > 0: loss_regr_pose = self.criterion_regr(pred_rotmat_valid, gt_rotmat_valid) loss_regr_betas = self.criterion_regr(pred_betas_valid, gt_betas_valid) else: loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device) loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device) return loss_regr_pose, loss_regr_betas def train_step(self, input_batch): self.model.train() # get data from batch has_smpl = input_batch['has_smpl'].bool() has_pose_3d = input_batch['has_pose_3d'].bool() gt_pose1 = input_batch['pose'] # SMPL pose parameters gt_betas1 = input_batch['betas'] # SMPL beta parameters dataset_name = input_batch['dataset_name'] indices = input_batch[ 'sample_index'] # index of example inside its dataset is_flipped = input_batch[ 'is_flipped'] # flag that indicates whether image was flipped during data augmentation rot_angle = input_batch[ 'rot_angle'] # rotation angle used for data augmentation #print(rot_angle) # Get GT vertices and model joints # Note that gt_model_joints is different from gt_joints as it comes from SMPL gt_betas = torch.cat((gt_betas1, gt_betas1, gt_betas1, gt_betas1), 0) gt_pose = torch.cat((gt_pose1, gt_pose1, gt_pose1, gt_pose1), 0) gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_model_joints = gt_out.joints gt_vertices = gt_out.vertices # Get current best fits from the dictionary opt_pose1, opt_betas1 = self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu())] opt_pose = torch.cat( (opt_pose1.to(self.device), opt_pose1.to(self.device), opt_pose1.to(self.device), opt_pose1.to(self.device)), 0) #print(opt_pose.device) #opt_betas = opt_betas.to(self.device) opt_betas = torch.cat( (opt_betas1.to(self.device), opt_betas1.to(self.device), opt_betas1.to(self.device), opt_betas1.to(self.device)), 0) opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:, 3:], global_orient=opt_pose[:, :3]) opt_vertices = opt_output.vertices opt_joints = opt_output.joints # images images = torch.cat((input_batch['img_0'], input_batch['img_1'], input_batch['img_2'], input_batch['img_3']), 0) batch_size = input_batch['img_0'].shape[0] #input() # Output of CNN pred_rotmat, pred_betas, pred_camera = self.model(images) pred_output = self.smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices pred_joints = pred_output.joints pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * self.focal_length / (self.options.img_res * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_center = torch.zeros(batch_size * 4, 2, device=self.device) pred_keypoints_2d = perspective_projection( pred_joints, rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size * 4, -1, -1), translation=pred_cam_t, focal_length=self.focal_length, camera_center=camera_center) pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.) # 2d joint points gt_keypoints_2d = torch.cat( (input_batch['keypoints_0'], input_batch['keypoints_1'], input_batch['keypoints_2'], input_batch['keypoints_3']), 0) gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) gt_cam_t = estimate_translation(gt_model_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_cam_t = estimate_translation(opt_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) #input() opt_joint_loss = self.smplify.get_fitting_loss( opt_pose, opt_betas, opt_cam_t, 0.5 * self.options.img_res * torch.ones(batch_size * 4, 2, device=self.device), gt_keypoints_2d_orig).mean(dim=-1) if self.options.run_smplify: pred_rotmat_hom = torch.cat([ pred_rotmat.detach().view(-1, 3, 3).detach(), torch.tensor( [0, 0, 1], dtype=torch.float32, device=self.device).view( 1, 3, 1).expand(batch_size * 4 * 24, -1, -1) ], dim=-1) pred_pose = rotation_matrix_to_angle_axis( pred_rotmat_hom).contiguous().view(batch_size * 4, -1) pred_pose[torch.isnan(pred_pose)] = 0.0 #pred_pose_detach = pred_pose.detach() #pred_betas_detach = pred_betas.detach() #pred_cam_t_detach = pred_cam_t.detach() new_opt_vertices, new_opt_joints,\ new_opt_pose, new_opt_betas,\ new_opt_cam_t, new_opt_joint_loss = self.smplify( pred_pose.detach(), pred_betas.detach(), pred_cam_t.detach(), 0.5 * self.options.img_res * torch.ones(batch_size*4, 2, device=self.device), gt_keypoints_2d_orig) new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1) # Will update the dictionary for the examples where the new loss is less than the current one update = (new_opt_joint_loss < opt_joint_loss) update1 = torch.cat((update, update, update, update), 0) opt_joint_loss[update] = new_opt_joint_loss[update] #print(opt_joints.size(),new_opt_joints.size()) #input() opt_joints[update1, :] = new_opt_joints[update1, :] #print(opt_pose.size(),new_opt_pose.size()) opt_betas[update1, :] = new_opt_betas[update1, :] opt_pose[update1, :] = new_opt_pose[update1, :] #print(i, opt_pose_mv[i]) opt_vertices[update1, :] = new_opt_vertices[update1, :] opt_cam_t[update1, :] = new_opt_cam_t[update1, :] # now we comput the loss on the four images # Replace the optimized parameters with the ground truth parameters, if available #for i in range(4): #print('Here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1') has_smpl1 = torch.cat((has_smpl, has_smpl, has_smpl, has_smpl), 0) opt_vertices[has_smpl1, :, :] = gt_vertices[has_smpl1, :, :] opt_pose[has_smpl1, :] = gt_pose[has_smpl1, :] opt_cam_t[has_smpl1, :] = gt_cam_t[has_smpl1, :] opt_joints[has_smpl1, :, :] = gt_model_joints[has_smpl1, :, :] opt_betas[has_smpl1, :] = gt_betas[has_smpl1, :] #print(opt_cam_t[0:batch_size],opt_cam_t[batch_size:2*batch_size],opt_cam_t[2*batch_size:3*batch_size],opt_cam_t[3*batch_size:4*batch_size]) # Assert whether a fit is valid by comparing the joint loss with the threshold valid_fit1 = (opt_joint_loss < self.options.smplify_threshold).to( self.device) # Add the examples with GT parameters to the list of valid fits valid_fit = torch.cat( (valid_fit1, valid_fit1, valid_fit1, valid_fit1), 0) | has_smpl1 #gt_keypoints_2d = torch.cat((input_batch['keypoints_0'],input_batch['keypoints_1'],input_batch['keypoints_2'],input_batch['keypoints_3']),0) loss_keypoints = self.keypoint_loss(pred_keypoints_2d, gt_keypoints_2d, 0, 1) #gt_joints = torch.cat((input_batch['pose_3d_0'],input_batch['pose_3d_1'],input_batch['pose_3d_2'],input_batch['pose_3d_3']),0) #loss_keypoints_3d = self.keypoint_3d_loss(pred_joints, gt_joints, torch.cat((has_pose_3d,has_pose_3d,has_pose_3d,has_pose_3d),0)) loss_regr_pose, loss_regr_betas = self.smpl_losses( pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit) loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit) #print(loss_shape_sum,loss_keypoints_sum,loss_keypoints_3d_sum,loss_regr_pose_sum,loss_regr_betas_sum) #input() loss_all = 0 * loss_shape +\ 5. * loss_keypoints +\ 0. * loss_keypoints_3d +\ loss_regr_pose + 0.001* loss_regr_betas +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() loss_all *= 60 #print(loss_all) # Do backprop self.optimizer.zero_grad() loss_all.backward() self.optimizer.step() output = { 'pred_vertices': pred_vertices, 'opt_vertices': opt_vertices, 'pred_cam_t': pred_cam_t, 'opt_cam_t': opt_cam_t } losses = { 'loss': loss_all.detach().item(), 'loss_keypoints': loss_keypoints.detach().item(), 'loss_keypoints_3d': loss_keypoints_3d.detach().item(), 'loss_regr_pose': loss_regr_pose.detach().item(), 'loss_regr_betas': loss_regr_betas.detach().item(), 'loss_shape': loss_shape.detach().item() } return output, losses def train_summaries(self, input_batch, output, losses): pred_vertices = output['pred_vertices'] opt_vertices = output['opt_vertices'] pred_cam_t = output['pred_cam_t'] opt_cam_t = output['opt_cam_t'] images_pred = self.renderer.visualize_tb(pred_vertices, pred_cam_t, input_batch) images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t, input_batch) self.summary_writer.add_image('pred_shape', images_pred, self.step_count) self.summary_writer.add_image('opt_shape', images_opt, self.step_count) for loss_name, val in losses.items(): self.summary_writer.add_scalar(loss_name, val, self.step_count)
2)) # n_skirt, n_body body_ind = np.argsort(dist, 1)[:, :K] body_dist = np.sort(dist, 1)[:, :K] # Inverse distance weighting w = 1 / (body_dist**p) w = w / np.sum(w, 1, keepdims=True) n_skirt = len(skirt_v) n_body = len(body_v) skirt_weight = np.zeros([n_skirt, n_body], dtype=np.float32) skirt_weight[np.tile(np.arange(n_skirt)[:, None], (1, K)), body_ind] = w np.savez_compressed('C:/data/v3/skirt_weight.npz', w=skirt_weight) exit() # test renderer = Renderer(512) smpl = SMPLNP(gender='female', skirt=True) smpl_torch = TorchSMPL4Garment('female') import torch disp = smpl_torch.forward_unpose_deformation( torch.from_numpy(np.zeros([1, 72])).float(), torch.from_numpy(np.zeros([1, 300])).float(), torch.from_numpy(skirt_v)[None].float()) disp = disp.detach().cpu().numpy()[0] for t in np.linspace(0, 1, 20): theta = np.zeros([72]) theta[5] = t theta[8] = -t body_v, gar_v = smpl(np.zeros([300]), theta, disp, 'skirt')
class Trainer(BaseTrainer): def init_fn(self): self.options.img_res = cfg.DANET.INIMG_SIZE self.options.heatmap_size = cfg.DANET.HEATMAP_SIZE self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = DaNet(options=self.options, smpl_mean_params=path_config.SMPL_MEAN_PARAMS).to( self.device) self.smpl = self.model.iuv2smpl.smpl self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=0) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) # Load dictionary of fits of SPIN self.fits_dict = FitsDict(self.options, self.train_ds) # Create renderer try: self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces) except: Warning('No renderer for visualization.') self.renderer = None self.decay_steps_ind = 1 def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, openpose_weight, gt_weight): """ Compute 2D reprojection loss on the keypoints. The loss is weighted by the confidence. The available keypoints are different for each dataset. """ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone() conf[:, :25] *= openpose_weight conf[:, 25:] *= gt_weight loss = (conf * self.criterion_keypoints( pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean() return loss def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d, has_pose_3d): """Compute 3D keypoint loss for the examples that 3D keypoint annotations are available. The loss is weighted by the confidence. """ pred_keypoints_3d = pred_keypoints_3d[:, 25:, :] conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone() gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone() gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1] conf = conf[has_pose_3d == 1] pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1] if len(gt_keypoints_3d) > 0: gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :] pred_pelvis = (pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2 pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :] return (conf * self.criterion_keypoints(pred_keypoints_3d, gt_keypoints_3d)).mean() else: return torch.FloatTensor(1).fill_(0.).to(self.device) def shape_loss(self, pred_vertices, gt_vertices, has_smpl): """Compute per-vertex loss on the shape for the examples that SMPL annotations are available.""" pred_vertices_with_shape = pred_vertices[has_smpl == 1] gt_vertices_with_shape = gt_vertices[has_smpl == 1] if len(gt_vertices_with_shape) > 0: return self.criterion_shape(pred_vertices_with_shape, gt_vertices_with_shape) else: return torch.FloatTensor(1).fill_(0.).to(self.device) def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas, has_smpl): pred_rotmat_valid = pred_rotmat[has_smpl == 1] gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view( -1, 24, 3, 3)[has_smpl == 1] pred_betas_valid = pred_betas[has_smpl == 1] gt_betas_valid = gt_betas[has_smpl == 1] if len(pred_rotmat_valid) > 0: loss_regr_pose = self.criterion_regr(pred_rotmat_valid, gt_rotmat_valid) loss_regr_betas = self.criterion_regr(pred_betas_valid, gt_betas_valid) else: loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device) loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device) return loss_regr_pose, loss_regr_betas def train_step(self, input_batch): # Learning rate decay if self.decay_steps_ind < len(cfg.SOLVER.STEPS) and input_batch[ 'step_count'] == cfg.SOLVER.STEPS[self.decay_steps_ind]: lr = self.optimizer.param_groups[0]['lr'] lr_new = lr * cfg.SOLVER.GAMMA print('Decay the learning on step {} from {} to {}'.format( input_batch['step_count'], lr, lr_new)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr_new lr = self.optimizer.param_groups[0]['lr'] assert lr == lr_new self.decay_steps_ind += 1 self.model.train() # Get data from the batch images = input_batch['img'] # input image gt_keypoints_2d = input_batch['keypoints'] # 2D keypoints gt_pose = input_batch['pose'] # SMPL pose parameters gt_betas = input_batch['betas'] # SMPL beta parameters gt_joints = input_batch['pose_3d'] # 3D pose has_smpl = input_batch['has_smpl'].byte( ) # flag that indicates whether SMPL parameters are valid has_pose_3d = input_batch['has_pose_3d'].byte( ) # flag that indicates whether 3D pose is valid is_flipped = input_batch[ 'is_flipped'] # flag that indicates whether image was flipped during data augmentation rot_angle = input_batch[ 'rot_angle'] # rotation angle used for data augmentation dataset_name = input_batch[ 'dataset_name'] # name of the dataset the image comes from indices = input_batch[ 'sample_index'] # index of example inside its dataset batch_size = images.shape[0] # Get GT vertices and model joints # Note that gt_model_joints is different from gt_joints as it comes from SMPL gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_model_joints = gt_out.joints gt_vertices = gt_out.vertices # Get current pseudo labels (final fits of SPIN) from the dictionary opt_pose, opt_betas = self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu())] opt_pose = opt_pose.to(self.device) opt_betas = opt_betas.to(self.device) # Replace extreme betas with zero betas opt_betas[(opt_betas.abs() > 3).any(dim=-1)] = 0. # Replace the optimized parameters with the ground truth parameters, if available opt_pose[has_smpl, :] = gt_pose[has_smpl, :] opt_betas[has_smpl, :] = gt_betas[has_smpl, :] opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:, 3:], global_orient=opt_pose[:, :3]) opt_vertices = opt_output.vertices opt_joints = opt_output.joints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) # Estimate camera translation given the model joints and 2D keypoints # by minimizing a weighted least squares loss gt_cam_t = estimate_translation(gt_model_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_cam_t = estimate_translation(opt_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) if self.options.train_data in ['h36m_coco_itw']: valid_fit = self.fits_dict.get_vaild_state(dataset_name, indices.cpu()).to( self.device) valid_fit = valid_fit | has_smpl else: valid_fit = has_smpl # Feed images in the network to predict camera and SMPL parameters input_batch['opt_pose'] = opt_pose input_batch['opt_betas'] = opt_betas input_batch['valid_fit'] = valid_fit input_batch['dp_dict'] = { k: v.to(self.device) if isinstance(v, torch.Tensor) else v for k, v in input_batch['dp_dict'].items() } has_iuv = torch.tensor([dn not in ['dp_coco'] for dn in dataset_name], dtype=torch.uint8).to(self.device) has_iuv = has_iuv & valid_fit input_batch['has_iuv'] = has_iuv has_dp = input_batch['has_dp'] target_smpl_kps = torch.zeros( (batch_size, 24, 3)).to(opt_output.smpl_joints.device) target_smpl_kps[:, :, :2] = perspective_projection( opt_output.smpl_joints.detach().clone(), rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size, -1, -1), translation=opt_cam_t, focal_length=self.focal_length, camera_center=torch.zeros(batch_size, 2, device=self.device) + (0.5 * self.options.img_res)) target_smpl_kps[:, :, :2] = target_smpl_kps[:, :, :2] / ( 0.5 * self.options.img_res) - 1 target_smpl_kps[has_iuv == 1, :, 2] = 1 target_smpl_kps[has_dp == 1] = input_batch['smpl_2dkps'][has_dp == 1] input_batch['target_smpl_kps'] = target_smpl_kps # [B, 24, 3] input_batch['target_verts'] = opt_vertices.detach().clone( ) # [B, 6890, 3] # camera translation for neural renderer gt_cam_t_nr = opt_cam_t.detach().clone() gt_camera = torch.zeros(gt_cam_t_nr.shape).to(gt_cam_t_nr.device) gt_camera[:, 1:] = gt_cam_t_nr[:, :2] gt_camera[:, 0] = (2. * self.focal_length / self.options.img_res) / gt_cam_t_nr[:, 2] input_batch['target_cam'] = gt_camera # Do forward danet_return_dict = self.model(input_batch) loss_tatal = 0 losses_dict = {} for loss_key in danet_return_dict['losses']: loss_tatal += danet_return_dict['losses'][loss_key] losses_dict['loss_{}'.format(loss_key)] = danet_return_dict[ 'losses'][loss_key].detach().item() # Do backprop self.optimizer.zero_grad() loss_tatal.backward() self.optimizer.step() if input_batch['pretrain_mode']: pred_vertices = None pred_cam_t = None else: pred_vertices = danet_return_dict['prediction']['vertices'].detach( ) pred_cam_t = danet_return_dict['prediction']['cam_t'].detach() # Pack output arguments for tensorboard logging output = { 'pred_vertices': pred_vertices, 'opt_vertices': opt_vertices, 'pred_cam_t': pred_cam_t, 'opt_cam_t': opt_cam_t, 'visualization': danet_return_dict['visualization'] } losses_dict.update({'loss_tatal': loss_tatal.detach().item()}) return output, losses_dict def train_summaries(self, input_batch, output, losses): for loss_name, val in losses.items(): self.summary_writer.add_scalar(loss_name, val, self.step_count) def visualize(self, input_batch, output, losses): images = input_batch['img'] images = images * torch.tensor( [0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1) images = images + torch.tensor( [0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1) pred_vertices = output['pred_vertices'] opt_vertices = output['opt_vertices'] pred_cam_t = output['pred_cam_t'] opt_cam_t = output['opt_cam_t'] if self.renderer is not None: images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t, images) self.summary_writer.add_image('opt_shape', images_opt, self.step_count) if pred_vertices is not None: images_pred = self.renderer.visualize_tb( pred_vertices, pred_cam_t, images) self.summary_writer.add_image('pred_shape', images_pred, self.step_count) for key_name in [ 'pred_uv', 'gt_uv', 'part_uvi_pred', 'part_uvi_gt', 'skps_hm_pred', 'skps_hm_pred_soft', 'skps_hm_gt', 'skps_hm_gt_soft' ]: if key_name in output['visualization']: vis_uv_raw = output['visualization'][key_name] if key_name in ['pred_uv', 'gt_uv']: iuv = F.interpolate(vis_uv_raw, scale_factor=4., mode='nearest') img_iuv = images.clone() img_iuv[iuv > 0] = iuv[iuv > 0] vis_uv = make_grid(img_iuv, padding=1, pad_value=1) else: vis_uv = make_grid(vis_uv_raw, padding=1, pad_value=1) self.summary_writer.add_image(key_name, vis_uv, self.step_count) if 'target_smpl_kps' in input_batch: smpl_kps = input_batch['target_smpl_kps'].detach() smpl_kps[:, :, :2] *= images.size(-1) / 2. smpl_kps[:, :, :2] += images.size(-1) / 2. img_smpl_hm = images.detach().clone() img_with_smpljoints = vis_utils.vis_batch_image_with_joints( img_smpl_hm.data, smpl_kps.cpu().numpy(), np.ones((smpl_kps.shape[0], smpl_kps.shape[1], 1))) img_with_smpljoints = np.transpose(img_with_smpljoints, (2, 0, 1)) self.summary_writer.add_image('stn_centers_gt', img_with_smpljoints, self.step_count) if 'stn_kps_pred' in output['visualization']: smpl_kps = output['visualization']['stn_kps_pred'] smpl_kps[:, :, :2] *= images.size(-1) / 2. smpl_kps[:, :, :2] += images.size(-1) / 2. img_smpl_hm = images.detach().clone() if 'skps_hm_gt' in output['visualization']: smpl_hm = output['visualization']['skps_hm_gt'].expand( -1, 3, -1, -1) smpl_hm = F.interpolate(smpl_hm, scale_factor=output.size(-1) / smpl_hm.size(-1)) img_smpl_hm[smpl_hm > 0.1] = smpl_hm[smpl_hm > 0.1] img_with_smpljoints = vis_utils.vis_batch_image_with_joints( img_smpl_hm.data, smpl_kps.cpu().numpy(), np.ones((smpl_kps.shape[0], smpl_kps.shape[1], 1))) img_with_smpljoints = np.transpose(img_with_smpljoints, (2, 0, 1)) self.summary_writer.add_image('stn_centers_pred', img_with_smpljoints, self.step_count)