def __init__(self, dir: str, rasterization_settings: dict, znear: float = 1.0, zfar: float = 1000.0, scale_min: float = 0.5, scale_max: float = 2.0, device: str = 'cuda'): super(ToyNeuralGraphicsDataset, self).__init__() device = torch.device(device) self.device = device self.scale_min = scale_min self.scale_max = scale_max self.scale_range = scale_max - scale_min objs = [ os.path.join(dir, f) for f in os.listdir(dir) if f.endswith('.obj') ] self.meshes = load_objs_as_meshes(objs, device=device) R, T = look_at_view_transform(0, 0, 0) self.cameras = FoVPerspectiveCameras(R=R, T=T, znear=znear, zfar=zfar, device=device) self.renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=self.cameras, raster_settings=RasterizationSettings(**rasterization_settings), ), shader=HardFlatShader( device=device, cameras=self.cameras, ))
def render(mesh, model_id, shapenet_dataset, device, camera=None): # Rendering settings. # camera_distance = 1 # camera_elevation = 0.5 + 100 * random.random() # camera_azimuth = 30 + 90 * random.random() # R, T = look_at_view_transform(camera_distance, camera_elevation, camera_azimuth) # camera = FoVPerspectiveCameras(R=R, T=T, device=device) # raster_settings = RasterizationSettings(image_size=512) # lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],device=device) # #rendering_settings = cameras, raster_settings, lights # image = shapenet_dataset.render( # model_ids=[model_id], # device=device, # cameras=camera, # raster_settings=raster_settings, # lights=lights, # )[..., :3] if not camera: camera_elevation = 0 + 180 * torch.rand( (1)) #torch.linspace(0, 180, batch_size) camera_azimuth = -180 + 2 * 180 * torch.rand( (1)) #torch.linspace(-180, 180, batch_size) #R, T = look_at_view_transform(camera_distance, camera_elevation, camera_azimuth) R, T = look_at_view_transform(1.9, camera_elevation, camera_azimuth) camera = FoVPerspectiveCameras(R=R, T=T, device=device) camera.eval() #necessary ? raster_settings = RasterizationSettings(image_size=224) # TODO ????? lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None], device=device) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=camera, raster_settings=raster_settings), shader=HardPhongShader(device=device, cameras=camera)) renderer.eval() #rendering_settings = cameras, raster_settings, lights #image = shapenet_dataset.render( # model_ids=[model_id], # device=device, # cameras=camera, # raster_settings=raster_settings, # lights=lights, #)[..., :3] image = renderer(mesh)[..., :3] #plt.imshow(image.squeeze().detach().cpu().numpy()) #plt.show() image = image.permute(0, 3, 1, 2) return image, camera #TODO batch of images
def create_renderer(self): self.num_angles = self.config.num_angles azim = torch.linspace(-1 * self.config.angle_range, self.config.angle_range, self.num_angles) R, T = look_at_view_transform(dist=1.0, elev=0, azim=azim) T[:, 1] = -85 T[:, 2] = 200 cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T) raster_settings = RasterizationSettings( image_size=self.config.img_size, blur_radius=0.0, faces_per_pixel=1, ) lights = PointLights(device=self.device, location=[[0.0, 85, 100.0]]) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=HardPhongShader(device=self.device, cameras=cameras, lights=lights)) return renderer
def render_mesh(verts, faces): device = verts[0].get_device() N = len(verts) num_verts_per_mesh = [] for i in range(N): num_verts_per_mesh.append(verts[i].shape[0]) verts_rgb = torch.ones((N, np.max(num_verts_per_mesh), 3), requires_grad=False, device=device) for i in range(N): verts_rgb[i, num_verts_per_mesh[i]:, :] = -1 textures = Textures(verts_rgb=verts_rgb) meshes = Meshes(verts=verts, faces=faces, textures=textures) elev = torch.rand(N) * 30 - 15 azim = torch.rand(N) * 360 - 180 R, T = look_at_view_transform(dist=2, elev=elev, azim=azim) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) sigma = 1e-4 raster_settings = RasterizationSettings( image_size=128, blur_radius=np.log(1. / 1e-4 - 1.) * sigma, faces_per_pixel=40, perspective_correct=False) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftSilhouetteShader()) return renderer(meshes)
def _get_renderer(self, device): R, T = look_at_view_transform(10, 0, 0) # camera's position cameras = FoVPerspectiveCameras( device=device, R=R, T=T, znear=0.01, zfar=50, fov=2 * np.arctan(self.img_size // 2 / self.focal) * 180. / np.pi) lights = PointLights(device=device, location=[[0.0, 0.0, 1e5]], ambient_color=[[1, 1, 1]], specular_color=[[0., 0., 0.]], diffuse_color=[[0., 0., 0.]]) raster_settings = RasterizationSettings( image_size=self.img_size, blur_radius=0.0, faces_per_pixel=1, ) blend_params = blending.BlendParams(background_color=[0, 0, 0]) renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params)) return renderer
def generate_rotating_nerf(neural_radiance_field, n_frames=50): logRs = torch.zeros(n_frames, 3, device=device) logRs[:, 1] = torch.linspace(-3.14, 3.14, n_frames, device=device) Rs = so3_exponential_map(logRs) Ts = torch.zeros(n_frames, 3, device=device) Ts[:, 2] = 2.7 frames = [] print('Rendering rotating NeRF ...') for R, T in zip(Rs, Ts): camera = FoVPerspectiveCameras( R=R[None], T=T[None], znear=target_cameras.znear[0], zfar=target_cameras.zfar[0], aspect_ratio=target_cameras.aspect_ratio[0], fov=target_cameras.fov[0], device=device, ) # Note that we again render with `NDCGridSampler` # and the batched_forward function of neural_radiance_field. frames.append( renderer_grid( cameras=camera, volumetric_function=neural_radiance_field.batched_forward, )[0][..., :3]) return torch.cat(frames)
def setup(self, device): R, T = look_at_view_transform(self.viewpoint_distance, self.viewpoint_elevation, self.viewpoint_azimuth, device=device) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings( image_size=self.opt.fast_image_size, blur_radius=self.opt.raster_blur_radius, faces_per_pixel=self.opt.raster_faces_per_pixel, ) rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings) lights = PointLights(device=device, location=[self.opt.lights_location]) lights = DirectionalLights(device=device, direction=[self.opt.lights_direction]) shader = SoftPhongShader( device=device, cameras=cameras, lights=lights, blend_params=BlendParams( self.opt.blend_params_sigma, self.opt.blend_params_gamma, self.opt.blend_params_background_color, ), ) self.renderer = MeshRenderer( rasterizer=rasterizer, shader=shader, )
def render_rotating_volume(volume_model, device, n_frames=50, video_size=400, n_pts_per_ray=192): renderer = get_renderer(video_size, n_pts_per_ray) # Render frames. with torch.inference_mode(): print("Generating rotating volume ...") elev = 30 azimuths = torch.linspace(0., 360., n_frames, device=device) frames = [] for azim in tqdm(azimuths): R, T = look_at_view_transform(dist=args.camera_radius, elev=elev, azim=azim) batch_cameras = FoVPerspectiveCameras(device=device, R=R, T=T) rgbo = volume_model(batch_cameras, renderer) rgb = rgbo[Ellipsis, :3] opacity = rgbo[Ellipsis, 3:4] frame = opacity * rgb + 1 - opacity frame = frame.clamp(0.0, 1.0) frames.append(frame) frames = torch.cat(frames).clamp(0., 1.) frames = frames.movedim(-1, 1) # THWC to TCHW. return frames.cpu().numpy()
def define_render(num): shapenet_cam_params_file = '../data/metadata/rendering_metadata.json' with open(shapenet_cam_params_file) as f: shapenet_cam_params = json.load(f) param_num = num R, T = look_at_view_transform( dist=shapenet_cam_params["distance"][param_num] * 5, elev=shapenet_cam_params["elevation"][param_num], azim=shapenet_cam_params["azimuth"][param_num]) cameras = FoVPerspectiveCameras( device=device, R=R, T=T, fov=shapenet_cam_params["field_of_view"][param_num]) raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, ) lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights)) return renderer
def change_cameras(self, mode, camera_dist=2.2): azim_train = torch.linspace(-1 * self.config.angle_range_train, self.config.angle_range_train, self.num_angles_train) azim_test = torch.linspace(-1 * self.config.angle_range_test, self.config.angle_range_test, self.num_angles_test) R, T = look_at_view_transform(camera_dist, 6, azim_train) train_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T) self.train_cameras = train_cameras R, T = look_at_view_transform(camera_dist, 6, azim_test) test_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T) self.test_cameras = test_cameras if mode == 'train': self.renderer.rasterizer.cameras=self.train_cameras self.renderer.shader.cameras=self.train_cameras elif mode == 'test': self.renderer.rasterizer.cameras=self.test_cameras self.renderer.shader.cameras=self.test_cameras
def _render( mesh: Meshes, name: str, dist: float = 3.0, elev: float = 10.0, azim: float = 0, image_size: int = 256, pan=None, RT=None, use_ambient=False, ): device = mesh.device if RT is not None: R, T = RT else: R, T = look_at_view_transform(dist, elev, azim) if pan is not None: R, T = rotate_on_spot(R, T, pan) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings(image_size=image_size, blur_radius=0.0, faces_per_pixel=1) # Init shader settings if use_ambient: lights = AmbientLights(device=device) else: lights = PointLights(device=device) lights.location = torch.tensor([0.0, 0.0, 2.0], device=device)[None] blend_params = BlendParams( sigma=1e-1, gamma=1e-4, background_color=torch.tensor([1.0, 1.0, 1.0], device=device), ) # Init renderer renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=HardPhongShader(device=device, lights=lights, cameras=cameras, blend_params=blend_params), ) output = renderer(mesh) image = (output[0, ..., :3].cpu().numpy() * 255).astype(np.uint8) if DEBUG: Image.fromarray(image).save(DATA_DIR / f"glb_{name}_.png") return image
def render_validation_view(volume_model, render_size, device): with torch.inference_mode(): test_renderer = get_renderer(render_size, n_pts_per_ray=192) R, T = look_at_view_transform(dist=4.0, elev=45, azim=30) camera = FoVPerspectiveCameras(device=device, R=R, T=T) rgbo = volume_model(camera, test_renderer) rgb = rgbo[Ellipsis, :3] opacity = rgbo[Ellipsis, 3:4] rendering = opacity * rgb + (1 - opacity) rendering = rendering.clamp(0.0, 1.0) return rendering.squeeze(0)
def create_renderer(self): self.num_angles_train = self.config.num_angles_train self.num_angles_test = self.config.num_angles_test azim_train = torch.linspace(-1 * self.config.angle_range_train, self.config.angle_range_train, self.num_angles_train) azim_test = torch.linspace(-1 * self.config.angle_range_test, self.config.angle_range_test, self.num_angles_test) # Cameras for SMPL meshes: camera_dist = 2.2 R, T = look_at_view_transform(camera_dist, 6, azim_train) train_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T) self.train_cameras = train_cameras R, T = look_at_view_transform(camera_dist, 6, azim_test) test_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T) self.test_cameras = test_cameras raster_settings = RasterizationSettings( image_size=self.config.img_size, blur_radius=0.0, faces_per_pixel=1, ) lights = PointLights(device=self.device, location=[[0.0, 85, 100.0]]) renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=train_cameras, raster_settings=raster_settings ), shader=HardPhongShader( device=self.device, cameras=train_cameras, lights=lights ) ) return renderer
def __init__(self, dist=2, elev=0, azimuth=180, fov=40, image_size=256, R=None, T=None, cameras=None, return_format="torch", device='cuda'): super().__init__() # If you provide R and T, you don't need dist, elev, azimuth, fov self.device = device self.return_format = return_format # Data structures and functions for rendering if cameras is None: if R is None and T is None: R, T = look_at_view_transform(dist, elev, azimuth) cameras = FoVPerspectiveCameras(R=R, T=T, znear=1, zfar=10000, fov=fov, degrees=True, device=device) # cameras = PerspectiveCameras(R=R, T=T, focal_length=1.6319*10, device=device) self.raster_settings = RasterizationSettings( image_size=image_size, blur_radius=0.0, # no blur bin_size=0, ) # Place lights at the same point as the camera location = T if location is None: location = ((0, 0, 0), ) lights = PointLights(ambient_color=((0.3, 0.3, 0.3), ), diffuse_color=((0.7, 0.7, 0.7), ), device=device, location=location) self.mesh_rasterizer = MeshRasterizer( cameras=cameras, raster_settings=self.raster_settings) self._renderer = MeshRenderer(rasterizer=self.mesh_rasterizer, shader=SoftPhongShader(device=device, cameras=cameras, lights=lights)) self.cameras = self.mesh_rasterizer.cameras
class Camera: def __init__(self,camera_type='fov',device='cuda'): self.camera_type = camera_type self.device = device def lookAt(self,dist=0.0,elev=0.0,azim=0.0): R,T = look_at_view_transform(dist,elev,azim) if self.camera_type == 'fov': self.camera = FoVPerspectiveCameras(device=self.device,R=R,T=T) def getLocation(self): location = self.camera.get_camera_center() return location
def save_p3d_mesh(verts, faces, filling_factors): features = [(int(i * 255), 0, 0) for i in filling_factors] features = torch.unsqueeze(torch.Tensor(features), 0) if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.set_device(device) else: device = torch.device("cpu") texture = TexturesVertex(features) mesh = Meshes(torch.unsqueeze(torch.Tensor(verts), 0), torch.unsqueeze(torch.Tensor(faces), 0), texture).cuda() # Initialize a camera. # Rotate the object by increasing the elevation and azimuth angles R, T = look_at_view_transform(dist=2.0, elev=-50, azim=-90) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) # Define the settings for rasterization and shading. Here we set the output image to be of size # 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1 # and blur_radius=0.0. We also set bin_size and max_faces_per_bin to None which ensure that # the faster coarse-to-fine rasterization method is used. Refer to rasterize_meshes.py for # explanations of these parameters. Refer to docs/notes/renderer.md for an explanation of # the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings( image_size=1024, blur_radius=0.0, faces_per_pixel=1, ) # Place a point light in front of the object. As mentioned above, the front of the cow is facing the # -z direction. lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) # Create a phong renderer by composing a rasterizer and a shader. The textured phong shader will # interpolate the texture uv coordinates for each vertex, sample from a texture image and # apply the Phong lighting model renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights)) img = renderer(mesh) plt.figure(figsize=(10, 10)) plt.imshow(img[0].cpu().numpy()) plt.show()
def setup(self, device): if self.renderer is not None: return R, T = look_at_view_transform(self.opt.viewpoint_distance, self.opt.viewpoint_elevation, self.opt.viewpoint_azimuth, device=device) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = PointsRasterizationSettings( image_size=self.opt.raster_image_size, radius=self.opt.raster_radius, points_per_pixel=self.opt.raster_points_per_pixel, ) rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings) lights = PointLights(device=device, location=[self.opt.lights_location]) self.renderer = PulsarPointsRenderer(rasterizer=rasterizer, n_channels=3).to(device)
def _setup_render(self): # Unpack options ... opts = self.opts # Initialize a camera. # TODO(ycho): Alternatively, specify the intrinsic matrix `K` instead. cameras = FoVPerspectiveCameras(znear=opts.znear, zfar=opts.zfar, aspect_ratio=opts.aspect, fov=opts.fov, degrees=True, device=self.device) # Define the settings for rasterization and shading. # As we are rendering images for visualization purposes only we will set faces_per_pixel=1 # and blur_radius=0.0. Refer to raster_points.py for explanations of # these parameters. # points_per_pixel (Optional): We will keep track of this many points per # pixel, returning the nearest points_per_pixel points along the z-axis # Create a points renderer by compositing points using an alpha compositor (nearer points # are weighted more heavily). See [1] for an explanation. if self.opts.use_mesh: raster_settings = RasterizationSettings( image_size=opts.image_size, blur_radius=0.0, # hmm... faces_per_pixel=1) rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings) lights = PointLights(device=self.device, location=[[0.0, 0.0, -3.0]]) renderer = MeshRenderer(rasterizer=rasterizer, shader=SoftPhongShader(device=self.device, cameras=cameras, lights=lights)) else: raster_settings = PointsRasterizationSettings( image_size=opts.image_size, radius=0.1, points_per_pixel=8) rasterizer = PointsRasterizer(cameras=cameras, raster_settings=raster_settings) renderer = PointsRenderer(rasterizer=rasterizer, compositor=AlphaCompositor()) return renderer
def generate_rotating_nerf(neural_radiance_field, target_cameras, renderer_grid, device, n_frames=50, num_forward=1, save_visualization=False): logRs = torch.zeros(n_frames, 3, device=device) logRs[:, 1] = torch.linspace(-3.14, 3.14, n_frames, device=device) Rs = so3_exponential_map(logRs) Ts = torch.zeros(n_frames, 3, device=device) Ts[:, 2] = 2.7 frames = [] uncertainties = [] path = f"nerf_vis/view{i}/sample{j}" if save_visualization else None print('Rendering rotating NeRF ...') for i, (R, T) in enumerate(zip(tqdm(Rs), Ts)): camera = FoVPerspectiveCameras( R=R[None], T=T[None], znear=target_cameras.znear[0], zfar=target_cameras.zfar[0], aspect_ratio=target_cameras.aspect_ratio[0], fov=target_cameras.fov[0], device=device, ) # Note that we again render with `NDCGridSampler` # and the batched_forward function of neural_radiance_field. frame_samples = torch.stack([ renderer_grid(cameras=camera, volumetric_function=partial( batched_forward, net=neural_radiance_field, path=path))[0][..., :3] for j in range(num_forward) ]) frames.append(frame_samples.mean(0)) uncertainties.append( frame_samples.var(0).sum(-1).sqrt() if num_forward > 1 else torch. zeros_like(frame_samples[0, ..., 0])) return torch.cat(frames), torch.cat(uncertainties)
def create_renderer(render_opt): """ Create rendere """ Renderer = get_class_from_string(render_opt.renderer_type) Raster = get_class_from_string(render_opt.raster_type) i = render_opt.raster_type.rfind('.') raster_setting_type = render_opt.raster_type[:i] + \ '.PointsRasterizationSettings' if render_opt.compositor_type is not None: Compositor = get_class_from_string(render_opt.compositor_type) compositor = Compositor() else: compositor = None RasterSetting = get_class_from_string(raster_setting_type) raster_settings = RasterSetting(**render_opt.raster_params) renderer = Renderer( rasterizer=Raster(cameras=FoVPerspectiveCameras(), raster_settings=raster_settings), compositor=compositor, ) return renderer
def create_splatting_renderer(): Renderer = get_class_from_string( 'DSS.core.renderer.SurfaceSplattingRenderer') Raster = get_class_from_string('DSS.core.rasterizer.SurfaceSplatting') # i = render_opt.raster_type.rfind('.') # raster_setting_type = render_opt.raster_type[:i] + \ # '.PointsRasterizationSettings' if render_opt.compositor_type is not None: Compositor = get_class_from_string( 'pytorch3d.renderer.NormWeightedCompositor') compositor = Compositor() else: compositor = None raster_params = { 'backface_culling': False, 'Vrk_invariant': True, 'Vrk_isotropic': False, 'bin_size': None, 'clip_pts_grad': 0.05, 'cutoff_threshold': 1.0, 'depth_merging_threshold': 0.05, 'image_size': 512, 'max_points_per_bin': None, 'points_per_pixel': 5, 'radii_backward_scaler': 5, } # RasterSetting = get_class_from_string(raster_setting_type) RasterSetting = get_class_from_string( 'DSS.core.rasterizer.PointsRasterizationSettings') raster_settings = RasterSetting(**raster_params) renderer = Renderer( rasterizer=Raster(cameras=FoVPerspectiveCameras(), raster_settings=raster_settings), compositor=compositor, ) return renderer
def test_render_shapenet_core(self): """ Test rendering objects from ShapeNetCore. """ # Setup device and seed for random selections. device = torch.device("cuda:0") torch.manual_seed(39) # Load category piano from ShapeNetCore. piano_dataset = ShapeNetCore(SHAPENET_PATH, synsets=["piano"]) # Rendering settings. R, T = look_at_view_transform(1.0, 1.0, 90) cameras = FoVPerspectiveCameras(R=R, T=T, device=device) raster_settings = RasterizationSettings(image_size=512) lights = PointLights( location=torch.tensor([0.0, 1.0, -2.0], device=device)[None], # TODO: debug the source of the discrepancy in two images when rendering on GPU. diffuse_color=((0, 0, 0), ), specular_color=((0, 0, 0), ), device=device, ) # Render first three models in the piano category. pianos = piano_dataset.render( idxs=list(range(3)), device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Check that there are three images in the batch. self.assertEqual(pianos.shape[0], 3) # Compare the rendered models to the reference images. for idx in range(3): piano_rgb = pianos[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (piano_rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_shapenet_core_render_piano_by_idxs_%s.png" % idx)) image_ref = load_rgb_image( "test_shapenet_core_render_piano_%s.png" % idx, DATA_DIR) self.assertClose(piano_rgb, image_ref, atol=0.05) # Render the same piano models but by model_ids this time. pianos_2 = piano_dataset.render( model_ids=[ "13394ca47c89f91525a3aaf903a41c90", "14755c2ee8e693aba508f621166382b0", "156c4207af6d2c8f1fdc97905708b8ea", ], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): piano_rgb_2 = pianos_2[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (piano_rgb_2.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_shapenet_core_render_piano_by_ids_%s.png" % idx)) image_ref = load_rgb_image( "test_shapenet_core_render_piano_%s.png" % idx, DATA_DIR) self.assertClose(piano_rgb_2, image_ref, atol=0.05) ####################### # Render by categories ####################### # Load ShapeNetCore. shapenet_dataset = ShapeNetCore(SHAPENET_PATH) # Render a mixture of categories and specify the number of models to be # randomly sampled from each category. mixed_objs = shapenet_dataset.render( categories=["faucet", "chair"], sample_nums=[2, 1], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): mixed_rgb = mixed_objs[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (mixed_rgb.numpy() * 255).astype(np.uint8) ).save( DATA_DIR / ("DEBUG_shapenet_core_render_mixed_by_categories_%s.png" % idx)) image_ref = load_rgb_image( "test_shapenet_core_render_mixed_by_categories_%s.png" % idx, DATA_DIR) self.assertClose(mixed_rgb, image_ref, atol=0.05) # Render a mixture of categories without specifying sample_nums. mixed_objs_2 = shapenet_dataset.render( categories=["faucet", "chair"], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(2): mixed_rgb_2 = mixed_objs_2[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (mixed_rgb_2.numpy() * 255).astype(np.uint8) ).save( DATA_DIR / ("DEBUG_shapenet_core_render_without_sample_nums_%s.png" % idx)) image_ref = load_rgb_image( "test_shapenet_core_render_without_sample_nums_%s.png" % idx, DATA_DIR) self.assertClose(mixed_rgb_2, image_ref, atol=0.05)
def generate_cow_renders(num_views: int = 40, data_dir: str = DATA_DIR, azimuth_range: float = 180): """ This function generates `num_views` renders of a cow mesh. The renders are generated from viewpoints sampled at uniformly distributed azimuth intervals. The elevation is kept constant so that the camera's vertical position coincides with the equator. For a more detailed explanation of this code, please refer to the docs/tutorials/fit_textured_mesh.ipynb notebook. Args: num_views: The number of generated renders. data_dir: The folder that contains the cow mesh files. If the cow mesh files do not exist in the folder, this function will automatically download them. Returns: cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the images are rendered. images: A tensor of shape `(num_views, height, width, 3)` containing the rendered images. silhouettes: A tensor of shape `(num_views, height, width)` containing the rendered silhouettes. """ # set the paths # download the cow mesh if not done before cow_mesh_files = [ os.path.join(data_dir, fl) for fl in ("cow.obj", "cow.mtl", "cow_texture.png") ] if any(not os.path.isfile(f) for f in cow_mesh_files): os.makedirs(data_dir, exis_ok=True) os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj") os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl") os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png" ) # Setup if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.set_device(device) else: device = torch.device("cpu") # Load obj file obj_filename = os.path.join(data_dir, "cow.obj") mesh = load_objs_as_meshes([obj_filename], device=device) # We scale normalize and center the target mesh to fit in a sphere of radius 1 # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh # to its original center and scale. Note that normalizing the target mesh, # speeds up the optimization but is not necessary! verts = mesh.verts_packed() N = verts.shape[0] center = verts.mean(0) scale = max((verts - center).abs().max(0)[0]) mesh.offset_verts_(-(center.expand(N, 3))) mesh.scale_verts_((1.0 / float(scale))) # Get a batch of viewing angles. elev = torch.linspace(0, 0, num_views) # keep constant azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0 # Place a point light in front of the object. As mentioned above, the front of # the cow is facing the -z direction. lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) # Initialize an OpenGL perspective camera that represents a batch of different # viewing angles. All the cameras helper methods support mixed type inputs and # broadcasting. So we can view the camera from the a distance of dist=2.7, and # then specify elevation and azimuth angles for each viewpoint as tensors. R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) # Define the settings for rasterization and shading. Here we set the output # image to be of size 128X128. As we are rendering images for visualization # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to # rasterize_meshes.py for explanations of these parameters. We also leave # bin_size and max_faces_per_bin to their default values of None, which sets # their values using huristics and ensures that the faster coarse-to-fine # rasterization method is used. Refer to docs/notes/renderer.md for an # explanation of the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings(image_size=128, blur_radius=0.0, faces_per_pixel=1) # Create a phong renderer by composing a rasterizer and a shader. The textured # phong shader will interpolate the texture uv coordinates for each vertex, # sample from a texture image and apply the Phong lighting model blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0)) renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params), ) # Create a batch of meshes by repeating the cow mesh and associated textures. # Meshes has a useful `extend` method which allows us do this very easily. # This also extends the textures. meshes = mesh.extend(num_views) # Render the cow mesh from each viewing angle target_images = renderer(meshes, cameras=cameras, lights=lights) # Rasterization settings for silhouette rendering sigma = 1e-4 raster_settings_silhouette = RasterizationSettings( image_size=128, blur_radius=np.log(1.0 / 1e-4 - 1.0) * sigma, faces_per_pixel=50) # Silhouette renderer renderer_silhouette = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings_silhouette), shader=SoftSilhouetteShader(), ) # Render silhouette images. The 3rd channel of the rendering output is # the alpha/silhouette channel silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights) # binary silhouettes silhouette_binary = (silhouette_images[..., 3] > 1e-4).float() return cameras, target_images[..., :3], silhouette_binary
128, 128, ), mode='bilinear').permute(0, 2, 3, 1).numpy() target_silhouettes = segment_image(target_images) # black background target_images = target_images * target_silhouettes[..., None] target_images = torch.from_numpy(target_images).float().to(device) target_silhouettes = torch.from_numpy(target_silhouettes).float().to( device) extrinsics = h5file['Extrinsics'][model_idx, :num_images] target_cameras = FoVPerspectiveCameras(device=device, R=extrinsics[:, :3, :3], T=extrinsics[:, :3, -1]) h5file.close() # target_cameras, target_images, target_silhouettes = generate_cow_renders(num_views=40, azimuth_range=180) print(f'Generated {len(target_images)} images/silhouettes/cameras.') import pdb pdb.set_trace() # render_size describes the size of both sides of the # rendered images in pixels. Since an advantage of # Neural Radiance Fields are high quality renders # with a significant amount of details, we render # the implicit function at double the size of
class ToyNeuralGraphicsDataset(data.Dataset): def __init__(self, dir: str, rasterization_settings: dict, znear: float = 1.0, zfar: float = 1000.0, scale_min: float = 0.5, scale_max: float = 2.0, device: str = 'cuda'): super(ToyNeuralGraphicsDataset, self).__init__() device = torch.device(device) self.device = device self.scale_min = scale_min self.scale_max = scale_max self.scale_range = scale_max - scale_min objs = [ os.path.join(dir, f) for f in os.listdir(dir) if f.endswith('.obj') ] self.meshes = load_objs_as_meshes(objs, device=device) R, T = look_at_view_transform(0, 0, 0) self.cameras = FoVPerspectiveCameras(R=R, T=T, znear=znear, zfar=zfar, device=device) self.renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=self.cameras, raster_settings=RasterizationSettings(**rasterization_settings), ), shader=HardFlatShader( device=device, cameras=self.cameras, )) def get_random_transform(self): scale = (torch.rand(1).squeeze() * self.scale_range + self.scale_min).item() rot = random_rotation() x, y, d = torch.rand(3) x = x * 2.0 - 1.0 y = y * 2.0 - 1.0 trans = torch.Tensor([x, y, d]) trans = self.cameras.unproject_points( trans.unsqueeze(0).to(self.device), world_coordinates=False, scaled_depth_input=True)[0].cpu() return scale, rot, trans def __getitem__(self, index): index %= len(self.meshes) scale, rot, trans = self.get_random_transform() transform = Transform3d() \ .scale(scale) \ .compose(Rotate(rot)) \ .translate(*trans) \ .get_matrix() \ .squeeze() mesh = self.meshes[index].scale_verts(scale) pixels = self.renderer(mesh, R=rot.unsqueeze(0).to(self.device), T=trans.unsqueeze(0).to(self.device)) pixels = pixels[0, ..., :3].transpose(0, -1) return (pixels, [transform.to(self.device)]) def __len__(self): return len(self.meshes) * 1024
def test_render_r2n2(self): """ Test rendering objects from R2N2 selected both by indices and model_ids. """ # Set up device and seed for random selections. device = torch.device("cuda:0") torch.manual_seed(39) # Load dataset in the train split. r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH) # Render first three models in the dataset. R, T = look_at_view_transform(1.0, 1.0, 90) cameras = FoVPerspectiveCameras(R=R, T=T, device=device) raster_settings = RasterizationSettings(image_size=512) lights = PointLights( location=torch.tensor([0.0, 1.0, -2.0], device=device)[None], # TODO: debug the source of the discrepancy in two images when rendering on GPU. diffuse_color=((0, 0, 0),), specular_color=((0, 0, 0),), device=device, ) r2n2_by_idxs = r2n2_dataset.render( idxs=list(range(3)), device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Check that there are three images in the batch. self.assertEqual(r2n2_by_idxs.shape[0], 3) # Compare the rendered models to the reference images. for idx in range(3): r2n2_by_idxs_rgb = r2n2_by_idxs[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray((r2n2_by_idxs_rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_r2n2_render_by_idxs_%s.png" % idx) ) image_ref = load_rgb_image( "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR ) self.assertClose(r2n2_by_idxs_rgb, image_ref, atol=0.05) # Render the same models but by model_ids this time. r2n2_by_model_ids = r2n2_dataset.render( model_ids=[ "1a4a8592046253ab5ff61a3a2a0e2484", "1a04dcce7027357ab540cc4083acfa57", "1a9d0480b74d782698f5bccb3529a48d", ], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): r2n2_by_model_ids_rgb = r2n2_by_model_ids[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (r2n2_by_model_ids_rgb.numpy() * 255).astype(np.uint8) ).save(DATA_DIR / ("DEBUG_r2n2_render_by_model_ids_%s.png" % idx)) image_ref = load_rgb_image( "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR ) self.assertClose(r2n2_by_model_ids_rgb, image_ref, atol=0.05) ############################### # Test rendering by categories ############################### # Render a mixture of categories. categories = ["chair", "lamp"] mixed_objs = r2n2_dataset.render( categories=categories, sample_nums=[1, 2], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): mixed_rgb = mixed_objs[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray((mixed_rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_r2n2_render_by_categories_%s.png" % idx) ) image_ref = load_rgb_image( "test_r2n2_render_by_categories_%s.png" % idx, DATA_DIR ) self.assertClose(mixed_rgb, image_ref, atol=0.05)
def render(self, model_ids: Optional[List[str]] = None, categories: Optional[List[str]] = None, sample_nums: Optional[List[int]] = None, idxs: Optional[List[int]] = None, shader_type=HardPhongShader, device="cpu", **kwargs) -> torch.Tensor: """ If a list of model_ids are supplied, render all the objects by the given model_ids. If no model_ids are supplied, but categories and sample_nums are specified, randomly select a number of objects (number specified in sample_nums) in the given categories and render these objects. If instead a list of idxs is specified, check if the idxs are all valid and render models by the given idxs. Otherwise, randomly select a number (first number in sample_nums, default is set to be 1) of models from the loaded dataset and render these models. Args: model_ids: List[str] of model_ids of models intended to be rendered. categories: List[str] of categories intended to be rendered. categories and sample_nums must be specified at the same time. categories can be given in the form of synset offsets or labels, or a combination of both. sample_nums: List[int] of number of models to be randomly sampled from each category. Could also contain one single integer, in which case it will be broadcasted for every category. idxs: List[int] of indices of models to be rendered in the dataset. shader_type: Select shading. Valid options include HardPhongShader (default), SoftPhongShader, HardGouraudShader, SoftGouraudShader, HardFlatShader, SoftSilhouetteShader. device: torch.device on which the tensors should be located. **kwargs: Accepts any of the kwargs that the renderer supports. Returns: Batch of rendered images of shape (N, H, W, 3). """ idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs) # Use the getitem method which loads mesh + texture models = [self[idx] for idx in idxs] meshes = collate_batched_meshes(models)["mesh"] if meshes.textures is None: meshes.textures = TexturesVertex(verts_features=torch.ones_like( meshes.verts_padded(), device=device)) meshes = meshes.to(device) cameras = kwargs.get("cameras", FoVPerspectiveCameras()).to(device) if len(cameras) != 1 and len(cameras) % len(meshes) != 0: raise ValueError( "Mismatch between batch dims of cameras and meshes.") if len(cameras) > 1: # When rendering R2N2 models, if more than one views are provided, broadcast # the meshes so that each mesh can be rendered for each of the views. meshes = meshes.extend(len(cameras) // len(meshes)) renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=kwargs.get("raster_settings", RasterizationSettings()), ), shader=shader_type( device=device, cameras=cameras, lights=kwargs.get("lights", PointLights()).to(device), ), ) return renderer(meshes)
def main(inference, n_iter, save_state_dict, load_state_dict, kl_annealing_iters, zero_kl_iters, max_kl_factor, init_scale, save_visualization): if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.set_device(device) else: print('Please note that NeRF is a resource-demanding method.' + ' Running this notebook on CPU will be extremely slow.' + ' We recommend running the example on a GPU' + ' with at least 10 GB of memory.') device = torch.device("cpu") target_cameras, target_images, target_silhouettes = generate_cow_renders( num_views=30, azimuth_low=-180, azimuth_high=90) print(f'Generated {len(target_images)} images/silhouettes/cameras.') # render_size describes the size of both sides of the # rendered images in pixels. Since an advantage of # Neural Radiance Fields are high quality renders # with a significant amount of details, we render # the implicit function at double the size of # target images. render_size = target_images.shape[1] * 2 # Our rendered scene is centered around (0,0,0) # and is enclosed inside a bounding box # whose side is roughly equal to 3.0 (world units). volume_extent_world = 3.0 # 1) Instantiate the raysamplers. # Here, NDCGridRaysampler generates a rectangular image # grid of rays whose coordinates follow the PyTorch3d # coordinate conventions. raysampler_grid = NDCGridRaysampler( image_height=render_size, image_width=render_size, n_pts_per_ray=128, min_depth=0.1, max_depth=volume_extent_world, ) # MonteCarloRaysampler generates a random subset # of `n_rays_per_image` rays emitted from the image plane. raysampler_mc = MonteCarloRaysampler( min_x=-1.0, max_x=1.0, min_y=-1.0, max_y=1.0, n_rays_per_image=750, n_pts_per_ray=128, min_depth=0.1, max_depth=volume_extent_world, ) # 2) Instantiate the raymarcher. # Here, we use the standard EmissionAbsorptionRaymarcher # which marches along each ray in order to render # the ray into a single 3D color vector # and an opacity scalar. raymarcher = EmissionAbsorptionRaymarcher() # Finally, instantiate the implicit renders # for both raysamplers. renderer_grid = ImplicitRenderer( raysampler=raysampler_grid, raymarcher=raymarcher, ) renderer_mc = ImplicitRenderer( raysampler=raysampler_mc, raymarcher=raymarcher, ) # First move all relevant variables to the correct device. renderer_grid = renderer_grid.to(device) renderer_mc = renderer_mc.to(device) target_cameras = target_cameras.to(device) target_images = target_images.to(device) target_silhouettes = target_silhouettes.to(device) # Set the seed for reproducibility torch.manual_seed(1) # Instantiate the radiance field model. neural_radiance_field_net = NeuralRadianceField().to(device) if load_state_dict is not None: sd = torch.load(load_state_dict) sd["harmonic_embedding.frequencies"] = neural_radiance_field_net.harmonic_embedding.frequencies neural_radiance_field_net.load_state_dict(sd) # TYXE comment: set up the BNN depending on the desired inference standard_normal = dist.Normal( torch.tensor(0.).to(device), torch.tensor(1.).to(device)) prior_kwargs = {} test_samples = 1 if inference == "ml": prior_kwargs.update(expose_all=False, hide_all=True) guide = None elif inference == "map": guide = partial(pyro.infer.autoguide.AutoDelta, init_loc_fn=tyxe.guides.PretrainedInitializer.from_net( neural_radiance_field_net)) elif inference == "mean-field": guide = partial(tyxe.guides.AutoNormal, init_scale=init_scale, init_loc_fn=tyxe.guides.PretrainedInitializer.from_net( neural_radiance_field_net)) test_samples = 8 else: raise RuntimeError(f"Unreachable inference: {inference}") prior = tyxe.priors.IIDPrior(standard_normal, **prior_kwargs) neural_radiance_field = tyxe.PytorchBNN(neural_radiance_field_net, prior, guide) # TYXE comment: we need a batch of dummy data for the BNN to trace the parameters dummy_data = namedtuple("RayBundle", "origins directions lengths")( torch.randn(1, 1, 3).to(device), torch.randn(1, 1, 3).to(device), torch.randn(1, 1, 8).to(device)) # Instantiate the Adam optimizer. We set its master learning rate to 1e-3. lr = 1e-3 optimizer = torch.optim.Adam( neural_radiance_field.pytorch_parameters(dummy_data), lr=lr) # We sample 6 random cameras in a minibatch. Each camera # emits raysampler_mc.n_pts_per_image rays. batch_size = 6 # Init the loss history buffers. loss_history_color, loss_history_sil = [], [] if kl_annealing_iters > 0 or zero_kl_iters > 0: kl_factor = 0. kl_annealing_rate = max_kl_factor / max(kl_annealing_iters, 1) else: kl_factor = max_kl_factor kl_annealing_rate = 0. # The main optimization loop. for iteration in range(n_iter): # In case we reached the last 75% of iterations, # decrease the learning rate of the optimizer 10-fold. if iteration == round(n_iter * 0.75): print('Decreasing LR 10-fold ...') optimizer = torch.optim.Adam( neural_radiance_field.pytorch_parameters(dummy_data), lr=lr * 0.1) # Zero the optimizer gradient. optimizer.zero_grad() # Sample random batch indices. batch_idx = torch.randperm(len(target_cameras))[:batch_size] # Sample the minibatch of cameras. batch_cameras = FoVPerspectiveCameras( R=target_cameras.R[batch_idx], T=target_cameras.T[batch_idx], znear=target_cameras.znear[batch_idx], zfar=target_cameras.zfar[batch_idx], aspect_ratio=target_cameras.aspect_ratio[batch_idx], fov=target_cameras.fov[batch_idx], device=device, ) rendered_images_silhouettes, sampled_rays = renderer_mc( cameras=batch_cameras, volumetric_function=partial(batched_forward, net=neural_radiance_field)) rendered_images, rendered_silhouettes = ( rendered_images_silhouettes.split([3, 1], dim=-1)) # Compute the silhoutte error as the mean huber # loss between the predicted masks and the # sampled target silhouettes. silhouettes_at_rays = sample_images_at_mc_locs( target_silhouettes[batch_idx, ..., None], sampled_rays.xys) sil_err = huber( rendered_silhouettes, silhouettes_at_rays, ).abs().mean() # Compute the color error as the mean huber # loss between the rendered colors and the # sampled target images. colors_at_rays = sample_images_at_mc_locs(target_images[batch_idx], sampled_rays.xys) color_err = huber( rendered_images, colors_at_rays, ).abs().mean() # The optimization loss is a simple # sum of the color and silhouette errors. # TYXE comment: we also add a kl loss for the variational posterior scaled by the size of the data # i.e. the total number of data points times the number of values that the data-dependent part of the # objective averages over. Effectively I'm treating this as if this was something like a Bernoulli likelihood # in a VAE where the expected log likelihood is averaged over both data points and pixels beta = kl_factor / (target_images.numel() + target_silhouettes.numel()) kl_err = neural_radiance_field.cached_kl_loss loss = color_err + sil_err + beta * kl_err # Log the loss history. loss_history_color.append(float(color_err)) loss_history_sil.append(float(sil_err)) # Every 10 iterations, print the current values of the losses. if iteration % 10 == 0: print(f'Iteration {iteration:05d}:' + f' loss color = {float(color_err):1.2e}' + f' loss silhouette = {float(sil_err):1.2e}' + f' loss kl = {float(kl_err):1.2e}' + f' kl_factor = {kl_factor:1.3e}') # Take the optimization step. loss.backward() optimizer.step() # TYXE comment: anneal the kl rate if iteration >= zero_kl_iters: kl_factor = min(max_kl_factor, kl_factor + kl_annealing_rate) # Visualize the full renders every 100 iterations. if iteration % 1000 == 0: show_idx = torch.randperm(len(target_cameras))[:1] fig = show_full_render( neural_radiance_field, FoVPerspectiveCameras( R=target_cameras.R[show_idx], T=target_cameras.T[show_idx], znear=target_cameras.znear[show_idx], zfar=target_cameras.zfar[show_idx], aspect_ratio=target_cameras.aspect_ratio[show_idx], fov=target_cameras.fov[show_idx], device=device, ), target_images[show_idx][0], target_silhouettes[show_idx][0], loss_history_color, loss_history_sil, renderer_grid, num_forward=test_samples) plt.savefig(f"nerf/full_render{iteration}.png") plt.close(fig) with torch.no_grad(): rotating_nerf_frames, uncertainty_frames = generate_rotating_nerf( neural_radiance_field, target_cameras, renderer_grid, device, n_frames=3 * 5, num_forward=test_samples, save_visualization=save_visualization) for i, (img, uncertainty) in enumerate( zip( rotating_nerf_frames.clamp(0., 1.).cpu().numpy(), uncertainty_frames.cpu().numpy())): f, ax = plt.subplots(figsize=(1.625, 1.625)) f.subplots_adjust(0, 0, 1, 1) ax.imshow(img) ax.set_axis_off() f.savefig(f"nerf/final_image{i}.jpg", bbox_inches="tight", pad_inches=0) plt.close(f) f, ax = plt.subplots(figsize=(1.625, 1.625)) f.subplots_adjust(0, 0, 1, 1) ax.imshow(uncertainty, cmap="hot", vmax=0.75**0.5) ax.set_axis_off() f.savefig(f"nerf/final_uncertainty{i}.jpg", bbox_inches="tight", pad_inches=0) plt.close(f) if save_state_dict is not None: if inference != "ml": raise ValueError( "Saving the state dict is only available for ml inference for now." ) state_dict = dict( neural_radiance_field.named_pytorch_parameters(dummy_data)) torch.save(state_dict, save_state_dict) test_cameras, test_images, test_silhouettes = generate_cow_renders( num_views=10, azimuth_low=90, azimuth_high=180) del renderer_mc del target_cameras del target_images del target_silhouettes torch.cuda.empty_cache() test_cameras = test_cameras.to(device) test_images = test_images.to(device) test_silhouettes = test_silhouettes.to(device) # TODO remove duplication from training code for test error with torch.no_grad(): sil_err = 0. color_err = 0. for i in range(len(test_cameras)): batch_idx = [i] # Sample the minibatch of cameras. batch_cameras = FoVPerspectiveCameras( R=test_cameras.R[batch_idx], T=test_cameras.T[batch_idx], znear=test_cameras.znear[batch_idx], zfar=test_cameras.zfar[batch_idx], aspect_ratio=test_cameras.aspect_ratio[batch_idx], fov=test_cameras.fov[batch_idx], device=device, ) img_list, sils_list, sampled_rays_list, = [], [], [] for _ in range(test_samples): rendered_images_silhouettes, sampled_rays = renderer_grid( cameras=batch_cameras, volumetric_function=partial(batched_forward, net=neural_radiance_field)) imgs, sils = (rendered_images_silhouettes.split([3, 1], dim=-1)) img_list.append(imgs) sils_list.append(sils) sampled_rays_list.append(sampled_rays.xys) assert sampled_rays_list[0].eq( torch.stack(sampled_rays_list)).all() rendered_images = torch.stack(img_list).mean(0) rendered_silhouettes = torch.stack(sils_list).mean(0) # Compute the silhoutte error as the mean huber # loss between the predicted masks and the # sampled target silhouettes. # TYXE comment: sampled_rays are always the same for renderer_grid silhouettes_at_rays = sample_images_at_mc_locs( test_silhouettes[batch_idx, ..., None], sampled_rays.xys) sil_err += huber( rendered_silhouettes, silhouettes_at_rays, ).abs().mean().item() / len(test_cameras) # Compute the color error as the mean huber # loss between the rendered colors and the # sampled target images. colors_at_rays = sample_images_at_mc_locs(test_images[batch_idx], sampled_rays.xys) color_err += huber( rendered_images, colors_at_rays, ).abs().mean().item() / len(test_cameras) print(f"Test error: sil={sil_err:1.3e}; col={color_err:1.3e}")
# Place a point light in front of the object. As mentioned above, the front of the cow is facing the # -z direction. lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) n = 3 zfar = 100.0 for i in range(n): t = i / n # Initialize a camera. # With world coordinates +Y up, +X left and +Z in, the front of the cow is facing the -Z direction. # So we move the camera by 180 in the azimuth direction so it is facing the front of the cow. R, T = look_at_view_transform(0, 0, 0) cameras = FoVPerspectiveCameras(device=device, R=R, T=T, zfar=zfar) smin = 0.1 smax = 2.0 srange = smax - smin scale = (torch.rand(1).squeeze() * srange + smin).item() # Generate a random NDC coordinate https://pytorch3d.org/docs/cameras x, y, d = torch.rand(3) x = x * 2.0 - 1.0 y = y * 2.0 - 1.0 trans = torch.Tensor([x, y, d]).to(device) trans = cameras.unproject_points(trans.unsqueeze(0), world_coordinates=False, scaled_depth_input=True)[0] rot = random_rotations(1)[0].to(device)
PointLights, DirectionalLights, Materials, RasterizationSettings, MeshRenderer, MeshRasterizer, SoftPhongShader, TexturesUV, TexturesVertex) device = torch.device("cpu") print("WARNING: CPU only, this will be slow!") obj_filename = "./meshes/cow.obj" # load obj mesh = load_objs_as_meshes([obj_filename], device=device) # with world coordinates +Y up, +X left and +Z in, the front of the cow is facing the -Z direction. # so we move the camera by 180 in the azimuth direction so it is facing the front of the cow. R, T = look_at_view_transform(2.7, 0, 180) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) # define the settings for rasterization and shading. Here we set the output image to be of size # 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1 # and blur_radius=0.0. We also set bin_size and max_faces_per_bin to None which ensure that # the faster coarse-to-fine rasterization method is used. Refer to rasterize_meshes.py for # explanations of these parameters. Refer to docs/notes/renderer.md for an explanation of # the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, ) # place a point light in front of the object. As mentioned above, the front of the cow is facing the # -z direction.