def make_argument(v, name):
    if _is_scalar(v):
        return ScalarArg(numpy.dtype(v.type.dtype), name)
    else:
        return ArrayArg(numpy.dtype(v.type.dtype), name)
Пример #2
0
def elemwise_collapse(dtype1, dtype2, shape1, shape2, expected):
    assert len(shape1) == len(shape2)

    # int8 does not cause problematic upcasts
    scalar = numpy.asarray(1, dtype='int8')

    a_cpu, a_gpu = gen_gpuarray(shape1, dtype1, ctx=context)
    b_cpu, b_gpu = gen_gpuarray(shape2, dtype2, ctx=context)

    o_shape = []
    for i in range(len(shape1)):
        o_shape.append(max(shape1[i], shape2[i]))

    o = gpuarray.empty(o_shape, dtype=(a_cpu + b_cpu).dtype, context=context)

    n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu),
                                                    collapse=True,
                                                    broadcast=True)

    assert nd == expected, (shape1, shape2, dims, nd, expected)

    k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'),
                                 ArrayArg(numpy.dtype(dtype2), 'b'),
                                 ArrayArg(o.dtype, 'o')], "o[i] = a[i] + b[i]")
    out_cpu = a_cpu + b_cpu
    k(a_gpu, b_gpu, o, collapse=True, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    k(a_gpu, b_gpu, o, collapse=False, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    broadcast = any([True for i in shape1 + shape2
                     if i == 1])

    n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu, scalar),
                                                    collapse=True,
                                                    broadcast=True)
    assert nd == expected

    k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'),
                                 ArrayArg(numpy.dtype(dtype2), 'b'),
                                 ScalarArg(scalar.dtype, 's'),
                                 ArrayArg(o.dtype, 'o')],
                       "o[i] = a[i] + b[i] + s")
    out_cpu = a_cpu + b_cpu + scalar
    k(a_gpu, b_gpu, scalar, o, collapse=True, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    k(a_gpu, b_gpu, scalar, o, collapse=False, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    if expected == 1:
        expected2 = 2
    else:
        expected2 = expected

    if len(shape1) != 4:
        return

    if shape1[0] != 1:
        c_cpu, c_gpu = gen_gpuarray(shape1, dtype=dtype1, sliced=2, ctx=context)
        n, nd, dims, strs, offsets,contig = check_args((c_gpu, b_gpu),
                                                       collapse=True,
                                                       broadcast=True)
        if broadcast:
            assert nd >= expected
        else:
            assert nd == expected2
Пример #3
0
            const unsigned int n){
    unsigned int i;
    for (i=0; i<n; i++)
        out[i] = bspline3_norm_coord(x[i]);

    return;
}
""")

# --- Kernel test --- #

x = pygpu.gpuarray.array([0, 0.5, 1.0, 1.5, 2.0, 2.5])
src = kernel_tpl.render(float=DTYPE_TO_CTYPE[x.dtype], floor='floor')
out = x._empty_like_me()
args = [
    ArrayArg(out.dtype, 'out'),
    ArrayArg(x.dtype, 'x'),
    ScalarArg(np.dtype('uint32'), 'n')
]
spec = [pygpu.gpuarray.GpuArray, pygpu.gpuarray.GpuArray, 'uint32']
have_small = False
have_double = False
have_complex = False
for arg in args:
    if arg.dtype.itemsize < 4 and type(arg) == ArrayArg:
        have_small = True
    if arg.dtype in [np.float64, np.complex128]:
        have_double = True
    if arg.dtype in [np.complex64, np.complex128]:
        have_complex = True