def make_argument(v, name): if _is_scalar(v): return ScalarArg(numpy.dtype(v.type.dtype), name) else: return ArrayArg(numpy.dtype(v.type.dtype), name)
def elemwise_collapse(dtype1, dtype2, shape1, shape2, expected): assert len(shape1) == len(shape2) # int8 does not cause problematic upcasts scalar = numpy.asarray(1, dtype='int8') a_cpu, a_gpu = gen_gpuarray(shape1, dtype1, ctx=context) b_cpu, b_gpu = gen_gpuarray(shape2, dtype2, ctx=context) o_shape = [] for i in range(len(shape1)): o_shape.append(max(shape1[i], shape2[i])) o = gpuarray.empty(o_shape, dtype=(a_cpu + b_cpu).dtype, context=context) n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu), collapse=True, broadcast=True) assert nd == expected, (shape1, shape2, dims, nd, expected) k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'), ArrayArg(numpy.dtype(dtype2), 'b'), ArrayArg(o.dtype, 'o')], "o[i] = a[i] + b[i]") out_cpu = a_cpu + b_cpu k(a_gpu, b_gpu, o, collapse=True, broadcast=True) assert numpy.allclose(numpy.asarray(o), out_cpu) k(a_gpu, b_gpu, o, collapse=False, broadcast=True) assert numpy.allclose(numpy.asarray(o), out_cpu) broadcast = any([True for i in shape1 + shape2 if i == 1]) n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu, scalar), collapse=True, broadcast=True) assert nd == expected k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'), ArrayArg(numpy.dtype(dtype2), 'b'), ScalarArg(scalar.dtype, 's'), ArrayArg(o.dtype, 'o')], "o[i] = a[i] + b[i] + s") out_cpu = a_cpu + b_cpu + scalar k(a_gpu, b_gpu, scalar, o, collapse=True, broadcast=True) assert numpy.allclose(numpy.asarray(o), out_cpu) k(a_gpu, b_gpu, scalar, o, collapse=False, broadcast=True) assert numpy.allclose(numpy.asarray(o), out_cpu) if expected == 1: expected2 = 2 else: expected2 = expected if len(shape1) != 4: return if shape1[0] != 1: c_cpu, c_gpu = gen_gpuarray(shape1, dtype=dtype1, sliced=2, ctx=context) n, nd, dims, strs, offsets,contig = check_args((c_gpu, b_gpu), collapse=True, broadcast=True) if broadcast: assert nd >= expected else: assert nd == expected2
const unsigned int n){ unsigned int i; for (i=0; i<n; i++) out[i] = bspline3_norm_coord(x[i]); return; } """) # --- Kernel test --- # x = pygpu.gpuarray.array([0, 0.5, 1.0, 1.5, 2.0, 2.5]) src = kernel_tpl.render(float=DTYPE_TO_CTYPE[x.dtype], floor='floor') out = x._empty_like_me() args = [ ArrayArg(out.dtype, 'out'), ArrayArg(x.dtype, 'x'), ScalarArg(np.dtype('uint32'), 'n') ] spec = [pygpu.gpuarray.GpuArray, pygpu.gpuarray.GpuArray, 'uint32'] have_small = False have_double = False have_complex = False for arg in args: if arg.dtype.itemsize < 4 and type(arg) == ArrayArg: have_small = True if arg.dtype in [np.float64, np.complex128]: have_double = True if arg.dtype in [np.complex64, np.complex128]: have_complex = True