示例#1
0
def test_lin_comb_diff(ctx_factory, arg_type):
    ctx = ctx_factory()
    dev, = ctx.devices
    if not has_double_support(dev):
        if arg_type in (np.float64, np.complex128):
            pytest.skip('Device does not support double.')
    n = 100000
    a_np = (np.random.randn(n)).astype(arg_type)
    b_np = (np.random.randn(n)).astype(arg_type)
    c_np = (np.random.randn(n) * 10).astype(arg_type)
    queue = cl.CommandQueue(ctx)

    a_g = cl.array.to_device(queue, a_np)
    b_g = cl.array.to_device(queue, b_np)
    c_g = cl.array.to_device(queue, c_np)
    res_g = cl.array.empty_like(a_g)
    lin_comb_diff = lin_comb_diff_kernel(ctx, arg_type, arg_type,
                                         arg_type, np.float32, 2)
    gs, ls = get_group_sizes(n, dev, lin_comb_diff)

    evt = run_elwise_kernel(lin_comb_diff, queue, gs, ls, n, [],
                            res_g, c_g, a_g, b_g, 2, 3)
    evt.wait()

    # Check on GPU with PyOpenCL Array:
    assert np.linalg.norm((res_g - (c_g + 2 * a_g + 3 * b_g)).get()) <= 2e-4

    # Check on CPU with Numpy:
    res_np = res_g.get()
    assert np.linalg.norm(res_np - (c_np + 2 * a_np + 3 * b_np)) <= 2e-4
示例#2
0
 def f(t, y_in, y_out, wait_for=None):
     return run_elwise_kernel(knl, queue, gs, ls, len_x * 2, wait_for,
                              y_out, y_in, h_x, len_x)