def get_comparisons(self, model_input, prediction, ground_truth=None):
        predictions, depth_maps = prediction

        batch_size = predictions.shape[0]

        # Parse model input.
        intrinsics = model_input["intrinsics"].cuda()
        uv = model_input["uv"].cuda().float()

        x_cam = uv[:, :, 0].view(batch_size, -1)
        y_cam = uv[:, :, 1].view(batch_size, -1)
        z_cam = depth_maps.view(batch_size, -1)

        normals = geometry.compute_normal_map(x_img=x_cam,
                                              y_img=y_cam,
                                              z=z_cam,
                                              intrinsics=intrinsics)
        normals = F.pad(normals, pad=(1, 1, 1, 1), mode="constant", value=1.)

        predictions = util.lin2img(predictions)

        if ground_truth is not None:
            trgt_imgs = ground_truth["rgb"]
            trgt_imgs = util.lin2img(trgt_imgs)

            return torch.cat(
                (normals.cpu(), predictions.cpu(), trgt_imgs.cpu()),
                dim=3).numpy()
        else:
            return torch.cat((normals.cpu(), predictions.cpu()), dim=3).numpy()
    def forward(self, input, z=None):
        self.logs = list() # log saves tensors that"ll receive summaries when model"s write_updates function is called

        # Parse model input.
        instance_idcs = input["instance_idx"].long().cuda()
        pose = input["pose"].cuda()
        intrinsics = input["intrinsics"].cuda()
        uv = input["uv"].cuda().float()

        if self.fit_single_srn:
            phi = self.phi
        else:
            if self.has_params: # If each instance has a latent parameter vector, we"ll use that one.
                if z is None:
                    self.z = input["param"].cuda()
                else:
                    self.z = z
            else: # Else, we"ll use the embedding.
                self.z = self.latent_codes(instance_idcs)

            phi = self.hyper_phi(self.z) # Forward pass through hypernetwork yields a (callable) SRN.

        # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates.
        points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose,
                                                       intrinsics=intrinsics,
                                                       uv=uv,
                                                       phi=phi)
        self.logs.extend(log)

        # Sapmle phi a last time at the final ray-marched world coordinates.
        v = phi(points_xyz)

        # Translate features at ray-marched world coordinates to RGB colors.
        novel_views = self.pixel_generator(v)

        # Calculate normal map
        with torch.no_grad():
            batch_size = uv.shape[0]
            x_cam = uv[:, :, 0].view(batch_size, -1)
            y_cam = uv[:, :, 1].view(batch_size, -1)
            z_cam = depth_maps.view(batch_size, -1)

            normals = geometry.compute_normal_map(x_img=x_cam, y_img=y_cam, z=z_cam, intrinsics=intrinsics)
            self.logs.append(("image", "normals",
                              torchvision.utils.make_grid(normals, scale_each=True, normalize=True), 100))

        if not self.fit_single_srn:
            self.logs.append(("embedding", "", self.latent_codes.weight, 500))
            self.logs.append(("scalar", "embed_min", self.z.min(), 1))
            self.logs.append(("scalar", "embed_max", self.z.max(), 1))

        return novel_views, depth_maps
Exemplo n.º 3
0
    def forward(self, input, z=None):
        self.logs = list(
        )  # log saves tensors that"ll receive summaries when model"s write_updates function is called

        uv = np.mgrid[0:128, 0:128].astype(np.int32)
        uv = torch.from_numpy(np.flip(uv, axis=0).copy()).long()
        uv = uv.reshape(2, -1).transpose(1, 0)
        uvs = torch.cat([uv.unsqueeze(0) for _ in range(input[0].size(0))])

        # Parse model input.
        pose = input[0].cuda()
        rgb_mat = input[1].cuda()
        intrinsics = None
        uv = uvs.cuda().float()
        v_scene = self.phi_scene(rgb_mat)
        v_scene = expand(v_scene, uv.size(1))

        # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates.
        points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose,
                                                       intrinsics=intrinsics,
                                                       uv=uv,
                                                       phi=self.phi,
                                                       v_scene=v_scene)
        self.logs.extend(log)

        # Sample phi a last time at the final ray-marched world coordinates.
        v = self.phi(torch.cat([points_xyz, v_scene], dim=2))

        # Translate features at ray-marched world coordinates to RGB colors.
        novel_views = self.pixel_generator(v)

        # Calculate normal map
        with torch.no_grad():
            batch_size = uv.shape[0]
            x_cam = uv[:, :, 0].view(batch_size, -1)
            y_cam = uv[:, :, 1].view(batch_size, -1)
            z_cam = depth_maps.view(batch_size, -1)

            normals = geometry.compute_normal_map(x_img=x_cam,
                                                  y_img=y_cam,
                                                  z=z_cam,
                                                  intrinsics=intrinsics)
            self.logs.append(
                ("image", "normals",
                 torchvision.utils.make_grid(normals,
                                             scale_each=True,
                                             normalize=True), 100))

        return novel_views, depth_maps
Exemplo n.º 4
0
    def forward(self, input, z=None):
        self.logs = list(
        )  # log saves tensors that"ll receive summaries when model"s write_updates function is called

        # Parse model input.
        instance_idcs = input["instance_idx"].long().cuda()
        pose = input["pose"].cuda()
        intrinsics = input["intrinsics"].cuda()
        uv = input["uv"].cuda().float()

        if self.fit_single_srn:
            phi = self.phi
        else:
            if self.has_params:  # If each instance has a latent parameter vector, we"ll use that one.
                if z is None:
                    self.z = input["param"].cuda()
                else:
                    self.z = z
            else:  # Else, we"ll use the embedding.
                self.z = self.latent_codes(instance_idcs)

            phi = self.hyper_phi(
                self.z
            )  # Forward pass through hypernetwork yields a (callable) SRN.

        # Raymarch SRN phi along rays defined by camera pose, intrinsics and uv coordinates.
        points_xyz, depth_maps, log = self.ray_marcher(cam2world=pose,
                                                       intrinsics=intrinsics,
                                                       uv=uv,
                                                       phi=phi)
        self.logs.extend(log)

        # Sapmle phi a last time at the final ray-marched world coordinates.
        v = phi(points_xyz)

        # Translate features at ray-marched world coordinates to RGB colors.
        # novel_views = self.pixel_generator(v)  # [8, 1024, 256]

        # Use attention
        out_channels = 128
        batch_size, _, in_channels = v.shape
        img_sidelength = 32

        v_ = v.view(-1, img_sidelength, img_sidelength,
                    in_channels).permute(0, 3, 1, 2).contiguous()

        g = self.conv_g(v_)
        phi = self.conv_phi(v_)
        theta = self.conv_theta(v_)

        g_x = g.permute(0, 2, 3, 1).view(batch_size, -1,
                                         out_channels).contiguous()
        theta_x = theta.permute(0, 2, 3, 1).view(batch_size, -1,
                                                 out_channels).contiguous()
        phi_x = phi.view(batch_size, out_channels, -1)

        f = torch.matmul(theta_x, phi_x)
        f_softmax = torch.softmax(f, -1)
        y = torch.matmul(f_softmax, g_x)

        y = y.view(batch_size, out_channels, img_sidelength, img_sidelength)
        w_y = self.w_y(y).view(batch_size, -1, in_channels)

        v = torch.add(v, w_y)

        novel_views = self.pixel_generator(v)

        # Use simlpe conv network
        # in_channelsv.shape
        # size = 32

        # v_ = v.reshape(-1, size, size, in_channels).permute(0, 3, 1, 2)
        # self.conv1 = nn.Conv2d(256, 128, 3, padding=1).cuda()
        # self.conv2 = nn.Conv2d(128, 64, 3, padding=1).cuda()
        # self.conv3 = nn.Conv2d(64, 3, 3, padding=1).cuda()

        # v_ = self.conv1(v_)
        # v_ = self.conv2(v_)
        # v_ = self.conv3(v_)

        # novel_views = v_.permute(0, 2, 3, 1).reshape(-1, size**2, 3)

        # Calculate normal map
        with torch.no_grad():
            batch_size = uv.shape[0]
            x_cam = uv[:, :, 0].view(batch_size, -1)
            y_cam = uv[:, :, 1].view(batch_size, -1)
            z_cam = depth_maps.view(batch_size, -1)

            normals = geometry.compute_normal_map(x_img=x_cam,
                                                  y_img=y_cam,
                                                  z=z_cam,
                                                  intrinsics=intrinsics)
            self.logs.append(
                ("image", "normals",
                 torchvision.utils.make_grid(normals,
                                             scale_each=True,
                                             normalize=True), 100))

        if not self.fit_single_srn:
            self.logs.append(("embedding", "", self.latent_codes.weight, 500))
            self.logs.append(("scalar", "embed_min", self.z.min(), 1))
            self.logs.append(("scalar", "embed_max", self.z.max(), 1))

        return novel_views, depth_maps