def backward(ctx, grad_dists):
     grad_dists = grad_dists.contiguous()
     points, segms, idxs = ctx.saved_tensors
     grad_points, grad_segms = _C.edge_point_dist_backward(
         points, segms, idxs, grad_dists
     )
     return grad_points, None, grad_segms, None, None
Пример #2
0
    def test_edge_point_distance(self):
        """
        Test CUDA implementation for EdgePointDistanceForward
            &  EdgePointDistanceBackward
        """
        device = get_random_cuda_device()
        N, V, F, P = 4, 32, 16, 24
        meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device)

        # make points packed a leaf node
        points_packed = pcls.points_packed().detach().clone()  # (P, 3)

        points_first_idx = pcls.cloud_to_packed_first_idx()

        # make edges packed a leaf node
        verts_packed = meshes.verts_packed()
        edges_packed = verts_packed[meshes.edges_packed()]  # (E, 2, 3)
        edges_packed = edges_packed.clone().detach()

        edges_first_idx = meshes.mesh_to_edges_packed_first_idx()
        max_e = meshes.num_edges_per_mesh().max().item()

        # leaf nodes
        points_packed.requires_grad = True
        edges_packed.requires_grad = True
        grad_dists = torch.rand(
            (edges_packed.shape[0],), dtype=torch.float32, device=device
        )

        # Cuda Implementation: forward
        dists_cuda, idx_cuda = _C.edge_point_dist_forward(
            points_packed, points_first_idx, edges_packed, edges_first_idx, max_e
        )

        # Cuda Implementation: backward
        grad_points_cuda, grad_edges_cuda = _C.edge_point_dist_backward(
            points_packed, edges_packed, idx_cuda, grad_dists
        )

        # Cpu Implementation: forward
        dists_cpu, idx_cpu = _C.edge_point_dist_forward(
            points_packed.cpu(),
            points_first_idx.cpu(),
            edges_packed.cpu(),
            edges_first_idx.cpu(),
            max_e,
        )

        # Cpu Implementation: backward
        grad_points_cpu, grad_edges_cpu = _C.edge_point_dist_backward(
            points_packed.cpu(), edges_packed.cpu(), idx_cpu, grad_dists.cpu()
        )

        # Naive Implementation: forward
        edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist())
        dists_naive = []
        for i in range(N):
            points = pcls.points_list()[i]
            edges = edges_list[i]
            dists_temp = torch.zeros(
                (edges.shape[0], points.shape[0]), dtype=torch.float32, device=device
            )
            for e in range(edges.shape[0]):
                for p in range(points.shape[0]):
                    dist = self._point_to_edge_distance(points[p], edges[e])
                    dists_temp[e, p] = dist

            # torch.min() doesn't necessarily return the first index of the
            # smallest value, our warp_reduce does. So it's not straightforward
            # to directly compare indices, nor the gradients of grad_edges which
            # also depend on the indices of the minimum value.
            # To be able to compare, we will compare dists_temp.min(1) and
            # then feed the cuda indices to the naive output

            start = edges_first_idx[i]
            end = edges_first_idx[i + 1] if i < N - 1 else edges_packed.shape[0]

            min_idx = idx_cuda.cpu()[start:end] - points_first_idx[i].cpu()
            iidx = torch.arange(edges.shape[0], device=device)
            min_dist = dists_temp[iidx, min_idx]

            dists_naive.append(min_dist)

        dists_naive = torch.cat(dists_naive)

        # Compare
        self.assertClose(dists_naive.cpu(), dists_cuda.cpu())
        self.assertClose(dists_naive.cpu(), dists_cpu)

        # Naive Implementation: backward
        dists_naive.backward(grad_dists)
        grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()])
        grad_edges_naive = edges_packed.grad.cpu()

        # Compare
        self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7)
        self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7)
        self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7)
        self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)