Пример #1
0
def test_bitonic_sort(ctx_factory, size, dtype):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
            "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64
            and get_pocl_version(dev.platform) < (1, 0)):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    s = clrandom.rand(queue, (2, size, 3,), dtype, luxury=None, a=0, b=239482333)
    sgs = s.copy()
    # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for
    # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237
    if (dev.platform.name == "Portable Computing Language"
            and cl.get_cl_header_version() < (1, 2)):
        sgs.finish()
    sorter = BitonicSort(ctx)
    sgs, evt = sorter(sgs, axis=1)
    assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
Пример #2
0
def test_bitonic_argsort(ctx_factory, size, dtype):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
                     "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL")

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
    m = clrandom.rand(queue, (size, ), dtype, luxury=None, a=0, b=239432234)

    sorterm = BitonicSort(ctx)

    ms, evt = sorterm(m.copy(), idx=index, axis=0)

    assert np.array_equal(np.sort(m.get()), ms.get())

    # may be False because of identical values in array
    # assert np.array_equal(np.argsort(m.get()), index.get())

    # Check values by indices
    assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
Пример #3
0
def test_bitonic_sort(ctx_factory, size, dtype):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
                     "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL")

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    s = clrandom.rand(queue, (
        2,
        size,
        3,
    ),
                      dtype,
                      luxury=None,
                      a=0,
                      b=239482333)
    sorter = BitonicSort(ctx)
    sgs, evt = sorter(s.copy(), axis=1)
    assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
Пример #4
0
def test_bitonic_argsort(ctx_factory, size, dtype):
    import sys
    is_pypy = "__pypy__" in sys.builtin_module_names

    if not size and is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
        pytest.xfail("pypy doesn't seem to handle as_strided "
                "on zero-sized arrays very well")

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    device = queue.device
    if device.platform.vendor == "The pocl project" \
            and device.type & cl.device_type.GPU:
        pytest.xfail("bitonic argsort fails on POCL + Nvidia,"
                "at least the K40, as of pocl 1.6, 2021-01-20")

    dev = ctx.devices[0]
    if (dev.platform.name == "Portable Computing Language"
            and sys.platform == "darwin"):
        pytest.xfail("Bitonic sort crashes on Apple POCL")
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
            "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64
            and get_pocl_version(dev.platform) < (1, 0)):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")
    if (dev.platform.name == "Intel(R) OpenCL" and size == 0):
        pytest.xfail("size-0 arange fails on Intel CL")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
    m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234)

    sorterm = BitonicSort(ctx)

    ms = m.copy()
    # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for
    # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237
    if (dev.platform.name == "Portable Computing Language"
            and cl.get_cl_header_version() < (1, 2)):
        ms.finish()
        index.finish()
    ms, evt = sorterm(ms, idx=index, axis=0)

    assert np.array_equal(np.sort(m.get()), ms.get())

    # may be False because of identical values in array
    # assert np.array_equal(np.argsort(m.get()), index.get())

    # Check values by indices
    assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
Пример #5
0
def test_bitonic_argsort(ctx_factory, size, dtype):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    if not size and is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
        pytest.xfail("pypy doesn't seem to handle as_strided "
                     "on zero-sized arrays very well")

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Portable Computing Language"
            and sys.platform == "darwin"):
        pytest.xfail("Bitonic sort crashes on Apple POCL")
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
                     "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64 and get_pocl_version(dev.platform) <
        (1, 0)):
        pytest.xfail(
            "Double precision bitonic sort doesn't work on POCL < 1.0")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
    m = clrandom.rand(queue, (size, ), dtype, luxury=None, a=0, b=239432234)

    sorterm = BitonicSort(ctx)

    ms, evt = sorterm(m.copy(), idx=index, axis=0)

    assert np.array_equal(np.sort(m.get()), ms.get())

    # may be False because of identical values in array
    # assert np.array_equal(np.argsort(m.get()), index.get())

    # Check values by indices
    assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
Пример #6
0
@timeit_repeat(reps)
def test_bitonic_speed(buff, sorter):
    sorter(buff)[1].wait()


@timeit_repeat(reps)
def test_numpy_speed(buff):
    np.sort(buff)


from collections import defaultdict

times = defaultdict(list)
dtype = np.int32
xs = range(5, 32)
bs = BitonicSort(ctx)
rs = RadixSort(ctx,
               "int *ary",
               key_expr="ary[i]",
               sort_arg_names=["ary"],
               scan_kernel=GenericScanKernel)
for size in xs:
    print("running size=2^{} = {}".format(size, 2**size))
    s = clrandom.rand(queue, (2**size, ), dtype, a=0, b=2**16)
    times['bitonic'].append(test_bitonic_speed(s, bs).microseconds / 1000000.)
    times['radix'].append(test_radix_speed(s, rs).microseconds / 1000000.)
    times['numpy'].append(
        test_numpy_speed(s.get().copy()).microseconds / 1000000.)

print("\t".join(["Size"] + times.keys()))
for idx, s in enumerate(xs):