Exemplo n.º 1
0
def test_problem_codes(cl_env, kernels, coord_dtype):
    from .test_collision_py import find_collisions
    ctx, cq = cl_env

    codes = np.array([0b00000000000000000000000000000000,
                      0b00000000000000000000000000000000,
                      0b00000110110000110100000100000010,
                      0b00001001001001001001001001001001,
                      0b00001001001001001001001001001001,
                      0b00010010010010010010010010010010,
                      0b00010010010010010010010010010010,
                      0b00010010011010010010011011011010,
                      0b00011001001011001001011001001011,
                      0b00011011011011011011011011011011,
                      0b00100100010000100010110100010110,
                      0b00100100100100100100100100100100,
                      0b00100100100101101101100101100100,
                      0b00101001101001101101101101101001,
                      0b00101101101101101101101101101101,
                      0b00110110110110110110110110110110, # This node had no parent
                      0b00110110110110110110110110110110,
                      0b00110110110110110110110110110110,
                      0b00111111111111111111111111111111,
                      0b00111111111111111111111111111111,
                      0b00111111111111111111111111111111], dtype='uint32')
    ids = np.arange(len(codes), dtype='uint32')
    n_nodes = 2 * len(codes) - 1

    codes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=codes
    )
    ids_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ids
    )
    nodes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, n_nodes * Node.itemsize
    )

    fill_internal = kernels['fillInternal'](
        cq, (roundUp(len(codes), 32),), None,
        nodes_buf, ids_buf, len(codes),
    )

    generate_bvh = kernels['generateBVH'](
        cq, (roundUp(len(codes) - 1, 32),), None,
        codes_buf, nodes_buf, len(codes),
        wait_for=[fill_internal]
    )

    (nodes_map, _) = cl.enqueue_map_buffer(
        cq, nodes_buf, cl.map_flags.READ,
        0, n_nodes, Node,
        wait_for=[generate_bvh], is_blocking=True
    )
    nodes_map.dtype = Node
    assert set(nodes_map['parent'][1:]) == set(range(len(codes) - 1))
Exemplo n.º 2
0
def test_generate_bvh(cl_env, kernels):
    ctx, cq = cl_env

    # From Figure 3
    codes = np.array([0b00001, 0b00010, 0b00100, 0b00101,
                      0b10011, 0b11000, 0b11001, 0b11110], dtype='uint32')
    ids = np.arange(len(codes), dtype='uint32')
    n_nodes = len(codes) * 2 - 1

    codes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=codes
    )
    nodes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, n_nodes * Node.itemsize
    )
    ids_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ids
    )

    fill_internal = kernels['fillInternal'](
        cq, (roundUp(len(codes), 32),), None,
        nodes_buf, ids_buf, len(codes),
    )
    generate_bvh = kernels['generateBVH'](
        cq, (roundUp(len(codes) - 1, 32),), None,
        codes_buf, nodes_buf, len(codes),
        wait_for=[fill_internal]
    )
    (nodes_map, _) = cl.enqueue_map_buffer(
        cq, nodes_buf, cl.map_flags.READ,
        0, n_nodes, Node,
        wait_for=[generate_bvh], is_blocking=True
    )
    nodes_map.dtype = Node

    leaf = len(codes) - 1
    expected = np.array([(-1, 7, [3, 4]),
                         (3, 1, [leaf+0, leaf+1]),
                         (3, 3, [leaf+2, leaf+3]),
                         (0, 3, [1, 2]),
                         (0, 7, [leaf+4, 5]),
                         (4, 7, [6, leaf+7]),
                         (5, 6, [leaf+5, leaf+6])], dtype=Node)
    np.testing.assert_equal(nodes_map[1:leaf], expected[1:])
    np.testing.assert_equal(nodes_map[['data', 'right_edge']][0],
                            expected[['data', 'right_edge']][0])

    expected_parents = np.array([1, 1, 2, 2, 4, 6, 6, 5])
    np.testing.assert_equal(nodes_map['parent'][leaf:], expected_parents)
    np.testing.assert_equal(nodes_map['right_edge'][leaf:], np.arange(len(codes)))
    np.testing.assert_equal(nodes_map['data'][leaf:, 0], np.arange(len(codes)))
Exemplo n.º 3
0
def test_fill_internal(cl_env, kernels):
    ctx, cq = cl_env

    n = 8
    ids = np.random.permutation(n).astype('uint32')
    nodes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, (2 * n - 1) * Node.itemsize
    )
    ids_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ids
    )

    fill_internal = kernels['fillInternal'](
        cq, (roundUp(n, 32),), None,
        nodes_buf, ids_buf, n,
    )

    (nodes_map, _) = cl.enqueue_map_buffer(
        cq, nodes_buf, cl.map_flags.READ,
        (n - 1) * Node.itemsize, n, Node,
        wait_for=[fill_internal], is_blocking=True
    )
    nodes_map.dtype = Node

    np.testing.assert_equal(nodes_map['data'][:, 0], ids)
    np.testing.assert_equal(nodes_map['right_edge'], np.arange(n))
Exemplo n.º 4
0
def test_codes(cl_env, kernels, coord_dtype):
    ctx, cq = cl_env

    coords = np.array([[ 0.0, 1.0, 3.0],
                       [ 0.0, 1.0, 3.0],
                       [ 4.0, 1.0, 8.0],
                       [-4.0,-6.0, 3.0],
                       [-5.0, 0.0,-1.0],
                       [-5.0, 0.5,-0.5]], dtype=coord_dtype)
    coord_range = np.array([coords.min(axis=0),
                            coords.max(axis=0)], dtype=coords.dtype)
    expected = np.array([862940378, 862940378, 1073741823,
                         20332620, 302580864, 306295426], dtype='int32')

    coords_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY, len(coords) * 4 * coord_dtype.itemsize
    )
    (coords_map, _) = cl.enqueue_map_buffer(
        cq, coords_buf, cl.map_flags.WRITE_INVALIDATE_REGION,
        0, (len(coords), 4), coord_dtype,
        is_blocking=True
    )
    coords_map[..., :3] = coords
    del coords_map
    range_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY, 2 * 4 * coord_dtype.itemsize
    )
    (range_map, _) = cl.enqueue_map_buffer(
        cq, range_buf, cl.map_flags.WRITE_INVALIDATE_REGION,
        0, (len(coord_range), 4), coord_dtype,
        is_blocking=True
    )
    range_map[..., :3] = coord_range
    del range_map
    codes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, len(coords) * np.dtype('uint32').itemsize
    )
    calc_codes = kernels['calculateCodes'](
        cq, (roundUp(len(coords), 32),), None,
        codes_buf, coords_buf, range_buf, len(coords),
    )

    (codes_map, _) = cl.enqueue_map_buffer(
        cq, codes_buf, cl.map_flags.READ | cl.map_flags.WRITE,
        0, (len(coords),), np.dtype('uint32'),
        wait_for=[calc_codes], is_blocking=True
    )
    np.testing.assert_equal(codes_map, expected)
    del codes_map
Exemplo n.º 5
0
def test_traverse(cl_env, kernels, coord_dtype):
    ctx, cq = cl_env

    coords = np.array([[ 0.0, 1.0, 3.0],
                       [ 0.0, 1.0, 3.0],
                       [ 4.0, 1.0, 8.0],
                       [-4.0,-6.0, 3.0],
                       [-5.0, 0.0,-1.0],
                       [-5.0, 0.5,-0.5]], dtype=coord_dtype)
    radii = np.ones(len(coords), dtype=coord_dtype)
    n_nodes = len(coords) * 2 - 1

    coords_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY, len(coords) * 4 * coord_dtype.itemsize
    )
    (coords_map, _) = cl.enqueue_map_buffer(
        cq, coords_buf, cl.map_flags.WRITE_INVALIDATE_REGION,
        0, (len(coords), 4), coord_dtype,
        is_blocking=True
    )
    coords_map[..., :3] = coords
    del coords_map
    range_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
        hostbuf=np.array([coords.min(axis=0), coords.max(axis=0)], dtype=coords.dtype)
    )
    codes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, len(coords) * np.dtype('uint32').itemsize
    )
    radii_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=radii
    )
    nodes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, n_nodes * Node.itemsize
    )
    bounds_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, n_nodes * 4 * 2 * coords.dtype.itemsize
    )
    flags_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, n_nodes * np.dtype('uint32').itemsize
    )
    n_collisions = 2
    collisions_buf = cl.Buffer(
        ctx, cl.mem_flags.WRITE_ONLY, n_collisions * 2 * np.dtype('uint32').itemsize
    )
    n_collisions_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, np.dtype('uint32').itemsize
    )

    calc_codes = kernels['calculateCodes'](
        cq, (roundUp(len(coords), 32),), None,
        codes_buf, coords_buf, range_buf, len(coords),
    )

    # Would use radix sort
    (codes_map, _) = cl.enqueue_map_buffer(
        cq, codes_buf, cl.map_flags.READ | cl.map_flags.WRITE,
        0, (len(coords),), np.dtype('uint32'),
        wait_for=[calc_codes], is_blocking=True
    )
    order = np.argsort(codes_map, kind='mergesort').astype('uint32')
    codes_map[...] = codes_map[order]
    del codes_map

    ids_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=order
    )

    fill_internal = kernels['fillInternal'](
        cq, (roundUp(len(coords), 32),), None,
        nodes_buf, ids_buf, len(coords),
    )
    generate_bvh = kernels['generateBVH'](
        cq, (roundUp(len(coords)-1, 32),), None,
        codes_buf, nodes_buf, len(coords),
        wait_for=[calc_codes, fill_internal]
    )
    clear_flags = cl.enqueue_fill_buffer(
        cq, flags_buf, np.zeros(1, dtype='uint32'),
        0, n_nodes * np.dtype('uint32').itemsize
    )
    calc_bounds = kernels['leafBounds'](
        cq, (roundUp(len(coords), 32),), None,
        bounds_buf, coords_buf, radii_buf, nodes_buf, len(coords),
        wait_for=[generate_bvh]
    )
    calc_bounds = kernels['internalBounds'](
        cq, (roundUp(len(coords), 32),), None,
        bounds_buf, flags_buf, nodes_buf, len(coords),
        wait_for=[clear_flags, calc_bounds]
    )
    clear_collisions = cl.enqueue_fill_buffer(
        cq, collisions_buf, np.array([-1], dtype='uint32'),
        0, n_collisions * 2 * np.dtype('uint32').itemsize
    )
    clear_n_collisions = cl.enqueue_fill_buffer(
        cq, n_collisions_buf, np.zeros(1, dtype='uint32'),
        0, np.dtype('uint32').itemsize
    )
    find_collisions = kernels['traverse'](
        cq, (roundUp(len(coords), 32),), None,
        collisions_buf, n_collisions_buf, n_collisions,
        nodes_buf, bounds_buf, len(coords),
        wait_for=[clear_collisions, clear_n_collisions, calc_bounds],
    )

    (n_collisions_map, _) = cl.enqueue_map_buffer(
        cq, n_collisions_buf, cl.map_flags.READ,
        0, 1, np.dtype('uint32'),
        wait_for=[find_collisions], is_blocking=True
    )

    assert n_collisions_map[0] == n_collisions

    (collisions_map, _) = cl.enqueue_map_buffer(
        cq, collisions_buf, cl.map_flags.READ,
        0, (n_collisions, 2), np.dtype('uint32'),
        wait_for=[find_collisions], is_blocking=True
    )
    expected = {(0, 1), (4, 5)}
    assert set(map(tuple, collisions_map)) == expected
Exemplo n.º 6
0
def test_compute_bounds(cl_env, kernels, coord_dtype):
    ctx, cq = cl_env

    coords = np.array([[ 0.0, 1.0, 3.0],
                       [ 4.0, 1.0, 8.0],
                       [-4.0,-6.0, 3.0],
                       [-5.0, 0.0,-1.0]], dtype=coord_dtype)
    radii = np.ones(len(coords), dtype=coord_dtype)
    leaf = len(coords) - 1
    nodes = np.array([(-1, 3, [leaf+0, 1]),
                      ( 0, 3, [leaf+3, 2]),
                      ( 1, 2, [leaf+1, leaf+2]),
                      ( 0, 0, [2, -1]),
                      ( 2, 1, [0, -1]),
                      ( 2, 2, [1, -1]),
                      ( 1, 3, [3, -1])], dtype=Node)

    coords_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY, len(coords) * 4 * coord_dtype.itemsize
    )
    (coords_map, _) = cl.enqueue_map_buffer(
        cq, coords_buf, cl.map_flags.WRITE_INVALIDATE_REGION,
        0, (len(coords), 4), coord_dtype,
        is_blocking=True
    )
    coords_map[..., :3] = coords
    del coords_map
    radii_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=radii
    )
    nodes_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=nodes
    )
    bounds_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, len(nodes) * 4 * 2 * coords.dtype.itemsize
    )
    flags_buf = cl.Buffer(
        ctx, cl.mem_flags.READ_WRITE, len(nodes) * np.dtype('uint32').itemsize
    )

    clear_flags = cl.enqueue_fill_buffer(
        cq, flags_buf, np.zeros(1, dtype='uint32'),
        0, len(nodes) * np.dtype('uint32').itemsize
    )
    calc_leaf_bounds = kernels['leafBounds'](
        cq, (roundUp(len(coords), 32),), None,
        bounds_buf, coords_buf, radii_buf, nodes_buf, len(coords),
    )
    calc_bounds = kernels['internalBounds'](
        cq, (roundUp(len(coords), 32),), None,
        bounds_buf, flags_buf, nodes_buf, len(coords),
        wait_for=[calc_leaf_bounds, clear_flags]
    )
    (bounds_map, _) = cl.enqueue_map_buffer(
        cq, bounds_buf, cl.map_flags.READ,
        0, (len(nodes), 2, 4), coord_dtype,
        wait_for=[calc_bounds], is_blocking=True
    )

    expected = np.array([[[-6.0,-7.0,-2.0], [ 5.0, 2.0, 9.0]],
                         [[-6.0,-1.0,-2.0], [ 5.0, 2.0, 9.0]],
                         [[-1.0, 0.0, 2.0], [ 5.0, 2.0, 9.0]],
                         [[-5.0,-7.0, 2.0], [-3.0,-5.0, 4.0]],
                         [[-1.0, 0.0, 2.0], [ 1.0, 2.0, 4.0]],
                         [[ 3.0, 0.0, 7.0], [ 5.0, 2.0, 9.0]],
                         [[-6.0,-1.0,-2.0], [-4.0, 1.0, 0.0]]], dtype=coord_dtype)
    np.testing.assert_equal(bounds_map[:, :, :3], expected)