Пример #1
0
    def test_compare_coarse_cpu_vs_cuda(self):
        torch.manual_seed(231)
        N = 3
        max_P = 1000
        image_size = (64, 64)
        radius = 0.1
        bin_size = 16
        max_points_per_bin = 500

        # create heterogeneous point clouds
        points = []
        for _ in range(N):
            p = np.random.choice(max_P)
            points.append(torch.randn(p, 3))

        pointclouds = Pointclouds(points=points)
        points_packed = pointclouds.points_packed()
        cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
        num_points_per_cloud = pointclouds.num_points_per_cloud()

        radius = torch.full((points_packed.shape[0],), fill_value=radius)
        args = (
            points_packed,
            cloud_to_packed_first_idx,
            num_points_per_cloud,
            image_size,
            radius,
            bin_size,
            max_points_per_bin,
        )
        bp_cpu = _C._rasterize_points_coarse(*args)

        device = get_random_cuda_device()
        pointclouds_cuda = pointclouds.to(device)
        points_packed = pointclouds_cuda.points_packed()
        cloud_to_packed_first_idx = pointclouds_cuda.cloud_to_packed_first_idx()
        num_points_per_cloud = pointclouds_cuda.num_points_per_cloud()
        radius = radius.to(device)
        args = (
            points_packed,
            cloud_to_packed_first_idx,
            num_points_per_cloud,
            image_size,
            radius,
            bin_size,
            max_points_per_bin,
        )
        bp_cuda = _C._rasterize_points_coarse(*args)

        # Bin points might not be the same: CUDA version might write them in
        # any order. But if we sort the non-(-1) elements of the CUDA output
        # then they should be the same.
        for n in range(N):
            for by in range(bp_cpu.shape[1]):
                for bx in range(bp_cpu.shape[2]):
                    K = (bp_cpu[n, by, bx] != -1).sum().item()
                    idxs_cpu = bp_cpu[n, by, bx].tolist()
                    idxs_cuda = bp_cuda[n, by, bx].tolist()
                    idxs_cuda[:K] = sorted(idxs_cuda[:K])
                    self.assertEqual(idxs_cpu, idxs_cuda)
    def _test_coarse_rasterize(self, device):
        #
        #  Note that +Y is up and +X is left in the diagram below.
        #
        #  (4)              |2
        #                   |
        #                   |
        #                   |
        #                   |1
        #                   |
        #             (1)   |
        #                   | (2)
        # ____________(0)__(5)___________________
        # 2        1        |          -1      -2
        #                   |
        #       (3)         |
        #                   |
        #                   |-1
        #                   |
        #
        # Locations of the points are shown by o. The screen bounding box
        # is between [-1, 1] in both the x and y directions.
        #
        # These points are interesting because:
        # (0) Falls into two bins;
        # (1) and (2) fall into one bin;
        # (3) is out-of-bounds, but its disk is in-bounds;
        # (4) is out-of-bounds, and its entire disk is also out-of-bounds
        # (5) has a negative z-value, so it should be skipped
        # fmt: off
        points = torch.tensor(
            [
                [ 0.5,  0.0,  0.0],  # noqa: E241, E201
                [ 0.5,  0.5,  0.1],  # noqa: E241, E201
                [-0.3,  0.4,  0.0],  # noqa: E241
                [ 1.1, -0.5,  0.2],  # noqa: E241, E201
                [ 2.0,  2.0,  0.3],  # noqa: E241, E201
                [ 0.0,  0.0, -0.1],  # noqa: E241, E201
            ],
            device=device
        )
        # fmt: on
        image_size = 16
        radius = 0.2
        bin_size = 8
        max_points_per_bin = 5

        bin_points_expected = -1 * torch.ones(
            1, 2, 2, 5, dtype=torch.int32, device=device
        )
        # Note that the order is only deterministic here for CUDA if all points
        # fit in one chunk. This will the the case for this small example, but
        # to properly exercise coordianted writes among multiple chunks we need
        # to use a bigger test case.
        bin_points_expected[0, 1, 0, :2] = torch.tensor([0, 3])
        bin_points_expected[0, 0, 1, 0] = torch.tensor([2])
        bin_points_expected[0, 0, 0, :2] = torch.tensor([0, 1])

        pointclouds = Pointclouds(points=[points])
        args = (
            pointclouds.points_packed(),
            pointclouds.cloud_to_packed_first_idx(),
            pointclouds.num_points_per_cloud(),
            image_size,
            radius,
            bin_size,
            max_points_per_bin,
        )
        bin_points = _C._rasterize_points_coarse(*args)
        bin_points_same = (bin_points == bin_points_expected).all()
        self.assertTrue(bin_points_same.item() == 1)