def backward(ctx, grad_dists): grad_dists = grad_dists.contiguous() points, segms, idxs = ctx.saved_tensors grad_points, grad_segms = _C.point_edge_dist_backward( points, segms, idxs, grad_dists ) return grad_points, None, grad_segms, None, None
def test_point_edge_distance(self): """ Test CUDA implementation for PointEdgeDistanceForward & PointEdgeDistanceBackward """ device = get_random_cuda_device() N, V, F, P = 4, 32, 16, 24 meshes, pcls = self.init_meshes_clouds(N, V, F, P, device=device) # make points packed a leaf node points_packed = pcls.points_packed().detach().clone() # (P, 3) points_first_idx = pcls.cloud_to_packed_first_idx() max_p = pcls.num_points_per_cloud().max().item() # make edges packed a leaf node verts_packed = meshes.verts_packed() edges_packed = verts_packed[meshes.edges_packed()] # (E, 2, 3) edges_packed = edges_packed.clone().detach() edges_first_idx = meshes.mesh_to_edges_packed_first_idx() # leaf nodes points_packed.requires_grad = True edges_packed.requires_grad = True grad_dists = torch.rand( (points_packed.shape[0],), dtype=torch.float32, device=device ) # Cuda Implementation: forward dists_cuda, idx_cuda = _C.point_edge_dist_forward( points_packed, points_first_idx, edges_packed, edges_first_idx, max_p ) # Cuda Implementation: backward grad_points_cuda, grad_edges_cuda = _C.point_edge_dist_backward( points_packed, edges_packed, idx_cuda, grad_dists ) # Cpu Implementation: forward dists_cpu, idx_cpu = _C.point_edge_dist_forward( points_packed.cpu(), points_first_idx.cpu(), edges_packed.cpu(), edges_first_idx.cpu(), max_p, ) # Cpu Implementation: backward # Note that using idx_cpu doesn't pass - there seems to be a problem with tied results. grad_points_cpu, grad_edges_cpu = _C.point_edge_dist_backward( points_packed.cpu(), edges_packed.cpu(), idx_cuda.cpu(), grad_dists.cpu() ) # Naive Implementation: forward edges_list = packed_to_list(edges_packed, meshes.num_edges_per_mesh().tolist()) dists_naive = [] for i in range(N): points = pcls.points_list()[i] edges = edges_list[i] dists_temp = torch.zeros( (points.shape[0], edges.shape[0]), dtype=torch.float32, device=device ) for p in range(points.shape[0]): for e in range(edges.shape[0]): dist = self._point_to_edge_distance(points[p], edges[e]) dists_temp[p, e] = dist # torch.min() doesn't necessarily return the first index of the # smallest value, our warp_reduce does. So it's not straightforward # to directly compare indices, nor the gradients of grad_edges which # also depend on the indices of the minimum value. # To be able to compare, we will compare dists_temp.min(1) and # then feed the cuda indices to the naive output start = points_first_idx[i] end = points_first_idx[i + 1] if i < N - 1 else points_packed.shape[0] min_idx = idx_cuda[start:end] - edges_first_idx[i] iidx = torch.arange(points.shape[0], device=device) min_dist = dists_temp[iidx, min_idx] dists_naive.append(min_dist) dists_naive = torch.cat(dists_naive) # Compare self.assertClose(dists_naive.cpu(), dists_cuda.cpu()) self.assertClose(dists_naive.cpu(), dists_cpu) # Naive Implementation: backward dists_naive.backward(grad_dists) grad_points_naive = torch.cat([cloud.grad for cloud in pcls.points_list()]) grad_edges_naive = edges_packed.grad.cpu() # Compare self.assertClose(grad_points_naive.cpu(), grad_points_cuda.cpu(), atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cuda.cpu(), atol=5e-7) self.assertClose(grad_points_naive.cpu(), grad_points_cpu, atol=1e-7) self.assertClose(grad_edges_naive, grad_edges_cpu, atol=5e-7)