def get_weighted_inner_kernel(dtype_x, dtype_y, dtype_w, dtype_out): if (dtype_x == np.complex64) or (dtype_x == np.complex128): if (dtype_y == np.float64) or (dtype_y == np.float32): ys = "%s_fromreal(y[i])" % complex_dtype_to_name(dtype_x) else: ys = "y[i]" inner_map="%s_mul(%s_conj(x[i]), %s)" % (complex_dtype_to_name(dtype_x), complex_dtype_to_name(dtype_x), ys) else: inner_map="x[i]*y[i]" if (dtype_w == np.float64) or (dtype_w == np.float32): inner_map = inner_map + "/w[i]" else: inner_map = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_x), inner_map, "w[i]") return ReductionKernel(mgr.state.context, dtype_out, neutral="0", arguments="__global const %(tp_x)s *x, __global const %(tp_y)s *y, __global const %(tp_w)s *w" % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_w": dtype_to_ctype(dtype_w), }, reduce_expr="a+b", map_expr=inner_map, name="weighted_inner")
def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype from pyopencl.characterize import has_double_support dtype_out = get_common_dtype(dtype_a.type(0), dtype_b.type(0), has_double_support(ctx.devices[0])) a_real_dtype = dtype_a.type(0).real.dtype b_real_dtype = dtype_b.type(0).real.dtype out_real_dtype = dtype_out.type(0).real.dtype a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" out_is_complex = dtype_out.kind == "c" from pyopencl.elementwise import complex_dtype_to_name if a_is_complex and b_is_complex: a = "a[i]" b = "b[i]" if dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) map_expr = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b) else: a = "a[i]" b = "b[i]" if out_is_complex: if a_is_complex and dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if b_is_complex and dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if not a_is_complex and a_real_dtype != out_real_dtype: a = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), a) if not b_is_complex and b_real_dtype != out_real_dtype: b = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), b) map_expr = "%s*%s" % (a, b) return ReductionKernel(ctx, dtype_out, neutral="0", reduce_expr="a+b", map_expr=map_expr, arguments="const %(tp_a)s *a, " "const %(tp_b)s *b" % { "tp_a": dtype_to_ctype(dtype_a), "tp_b": dtype_to_ctype(dtype_b), })
def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype from pyopencl.characterize import has_double_support dtype_out = get_common_dtype( dtype_a.type(0), dtype_b.type(0), has_double_support(ctx.devices[0])) a_real_dtype = dtype_a.type(0).real.dtype b_real_dtype = dtype_b.type(0).real.dtype out_real_dtype = dtype_out.type(0).real.dtype a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" out_is_complex = dtype_out.kind == "c" from pyopencl.elementwise import complex_dtype_to_name if a_is_complex and b_is_complex: a = "a[i]" b = "b[i]" if dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) map_expr = "%s_mul(%s, %s)" % ( complex_dtype_to_name(dtype_out), a, b) else: a = "a[i]" b = "b[i]" if out_is_complex: if a_is_complex and dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if b_is_complex and dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if not a_is_complex and a_real_dtype != out_real_dtype: a = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), a) if not b_is_complex and b_real_dtype != out_real_dtype: b = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), b) map_expr = "%s*%s" % (a, b) return ReductionKernel(ctx, dtype_out, neutral="0", reduce_expr="a+b", map_expr=map_expr, arguments= "__global const %(tp_a)s *a, " "__global const %(tp_b)s *b" % { "tp_a": dtype_to_ctype(dtype_a), "tp_b": dtype_to_ctype(dtype_b), })
def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None, conjugate_first=False): from pyopencl.characterize import has_double_support map_expr, dtype_out, dtype_b = _get_dot_expr( dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support=has_double_support(ctx.devices[0])) reduce_expr = "a+b" neutral_expr = "0" if dtype_out.kind == "c": from pyopencl.elementwise import complex_dtype_to_name dtname = complex_dtype_to_name(dtype_out) reduce_expr = "%s_add(a, b)" % dtname neutral_expr = "%s_new(0, 0)" % dtname return ReductionKernel(ctx, dtype_out, neutral=neutral_expr, reduce_expr=reduce_expr, map_expr=map_expr, arguments=("const %(tp_a)s *a, " "const %(tp_b)s *b" % { "tp_a": dtype_to_ctype(dtype_a), "tp_b": dtype_to_ctype(dtype_b), }))
def _conj(result, arg): from pyopencl.elementwise import complex_dtype_to_name fname = "%s_conj" % complex_dtype_to_name(arg.dtype) return elementwise.get_unary_func_kernel(arg.context, fname, arg.dtype, out_dtype=result.dtype)
def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support, index_expr="i"): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype dtype_out = get_common_dtype(dtype_a.type(0), dtype_b.type(0), has_double_support) a_real_dtype = dtype_a.type(0).real.dtype b_real_dtype = dtype_b.type(0).real.dtype out_real_dtype = dtype_out.type(0).real.dtype a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" out_is_complex = dtype_out.kind == "c" from pyopencl.elementwise import complex_dtype_to_name if a_is_complex and b_is_complex: a = "a[%s]" % index_expr b = "b[%s]" % index_expr if dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if conjugate_first and a_is_complex: a = "%s_conj(%s)" % (complex_dtype_to_name(dtype_out), a) map_expr = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b) else: a = "a[%s]" % index_expr b = "b[%s]" % index_expr if out_is_complex: if a_is_complex and dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if b_is_complex and dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if not a_is_complex and a_real_dtype != out_real_dtype: a = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), a) if not b_is_complex and b_real_dtype != out_real_dtype: b = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), b) if conjugate_first and a_is_complex: a = "%s_conj(%s)" % (complex_dtype_to_name(dtype_out), a) map_expr = "%s*%s" % (a, b) return map_expr, dtype_out, dtype_b
def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support, index_expr="i"): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype dtype_out = get_common_dtype( dtype_a.type(0), dtype_b.type(0), has_double_support) a_real_dtype = dtype_a.type(0).real.dtype b_real_dtype = dtype_b.type(0).real.dtype out_real_dtype = dtype_out.type(0).real.dtype a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" out_is_complex = dtype_out.kind == "c" from pyopencl.elementwise import complex_dtype_to_name if a_is_complex and b_is_complex: a = "a[%s]" % index_expr b = "b[%s]" % index_expr if dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if conjugate_first and a_is_complex: a = "%s_conj(%s)" % ( complex_dtype_to_name(dtype_out), a) map_expr = "%s_mul(%s, %s)" % ( complex_dtype_to_name(dtype_out), a, b) else: a = "a[%s]" % index_expr b = "b[%s]" % index_expr if out_is_complex: if a_is_complex and dtype_a != dtype_out: a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if b_is_complex and dtype_b != dtype_out: b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if not a_is_complex and a_real_dtype != out_real_dtype: a = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), a) if not b_is_complex and b_real_dtype != out_real_dtype: b = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), b) if conjugate_first and a_is_complex: a = "%s_conj(%s)" % ( complex_dtype_to_name(dtype_out), a) map_expr = "%s*%s" % (a, b) return map_expr, dtype_out, dtype_b
def knl_runner(result, arg): if arg.dtype.kind == "c": from pyopencl.elementwise import complex_dtype_to_name fname = "%s_%s" % (complex_dtype_to_name(arg.dtype), name) else: fname = name return elementwise.get_unary_func_kernel( result.context, fname, arg.dtype)
def knl_runner(result, arg): if arg.dtype.kind == "c": from pyopencl.elementwise import complex_dtype_to_name fname = "%s_%s" % (complex_dtype_to_name(arg.dtype), name) else: fname = name return elementwise.get_unary_func_kernel(result.context, fname, arg.dtype)
def _abs(result, arg): if arg.dtype.kind == "c": from pyopencl.elementwise import complex_dtype_to_name fname = "%s_abs" % complex_dtype_to_name(arg.dtype) elif arg.dtype.kind == "f": fname = "fabs" elif arg.dtype.kind in ["u", "i"]: fname = "abs" else: raise TypeError("unsupported dtype in _abs()") return elementwise.get_unary_func_kernel(arg.context, fname, arg.dtype, out_dtype=result.dtype)
def _abs(result, arg): if arg.dtype.kind == "c": from pyopencl.elementwise import complex_dtype_to_name fname = "%s_abs" % complex_dtype_to_name(arg.dtype) elif arg.dtype.kind == "f": fname = "fabs" elif arg.dtype.kind in ["u", "i"]: fname = "abs" else: raise TypeError("unsupported dtype in _abs()") return elementwise.get_unary_func_kernel( arg.context, fname, arg.dtype, out_dtype=result.dtype)
def get_sum_kernel(ctx, dtype_out, dtype_in): if dtype_out is None: dtype_out = dtype_in reduce_expr = "a+b" neutral_expr = "0" if dtype_out.kind == "c": from pyopencl.elementwise import complex_dtype_to_name dtname = complex_dtype_to_name(dtype_out) reduce_expr = "%s_add(a, b)" % dtname neutral_expr = "%s_new(0, 0)" % dtname return ReductionKernel(ctx, dtype_out, neutral_expr, reduce_expr, arguments="const %(tp)s *in" % {"tp": dtype_to_ctype(dtype_in)})
def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support, index_expr="i"): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype dtype_out = get_common_dtype(dtype_a.type(0), dtype_b.type(0), has_double_support) a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" from pyopencl.elementwise import complex_dtype_to_name a = "a[%s]" % index_expr b = "b[%s]" % index_expr if a_is_complex and (dtype_a != dtype_out): a = "{}_cast({})".format(complex_dtype_to_name(dtype_out), a) if b_is_complex and (dtype_b != dtype_out): b = "{}_cast({})".format(complex_dtype_to_name(dtype_out), b) if a_is_complex and conjugate_first and a_is_complex: a = "{}_conj({})".format(complex_dtype_to_name(dtype_out), a) if a_is_complex and not b_is_complex: map_expr = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_out), a, b) elif not a_is_complex and b_is_complex: map_expr = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_out), a, b) elif a_is_complex and b_is_complex: map_expr = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_out), a, b) else: map_expr = f"{a}*{b}" return map_expr, dtype_out, dtype_b
def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None, conjugate_first=False): from pyopencl.characterize import has_double_support map_expr, dtype_out, dtype_b = _get_dot_expr( dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support=has_double_support(ctx.devices[0])) reduce_expr = "a+b" neutral_expr = "0" if dtype_out.kind == "c": from pyopencl.elementwise import complex_dtype_to_name dtname = complex_dtype_to_name(dtype_out) reduce_expr = "%s_add(a, b)" % dtname neutral_expr = "%s_new(0, 0)" % dtname return ReductionKernel(ctx, dtype_out, neutral=neutral_expr, reduce_expr=reduce_expr, map_expr=map_expr, arguments=( "const %(tp_a)s *a, " "const %(tp_b)s *b" % { "tp_a": dtype_to_ctype(dtype_a), "tp_b": dtype_to_ctype(dtype_b), }))
def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first, has_double_support, index_expr="i"): if dtype_b is None: if dtype_a is None: dtype_b = dtype_out else: dtype_b = dtype_a if dtype_out is None: from pyopencl.compyte.array import get_common_dtype dtype_out = get_common_dtype( dtype_a.type(0), dtype_b.type(0), has_double_support) a_is_complex = dtype_a.kind == "c" b_is_complex = dtype_b.kind == "c" from pyopencl.elementwise import complex_dtype_to_name a = "a[%s]" % index_expr b = "b[%s]" % index_expr if a_is_complex and (dtype_a != dtype_out): a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a) if b_is_complex and (dtype_b != dtype_out): b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b) if a_is_complex and conjugate_first and a_is_complex: a = "%s_conj(%s)" % ( complex_dtype_to_name(dtype_out), a) if a_is_complex and not b_is_complex: map_expr = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b) elif not a_is_complex and b_is_complex: map_expr = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b) elif a_is_complex and b_is_complex: map_expr = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b) else: map_expr = "%s*%s" % (a, b) return map_expr, dtype_out, dtype_b
maxloc_dtype_single = np.dtype([("max", np.float32), ("loc", np.int32)]) maxloc_dtype_single = get_or_register_dtype("maxlocs", dtype=maxloc_dtype_single) maxloc_dtype_double = get_or_register_dtype("maxlocd", dtype=maxloc_dtype_double) mls = ReductionKernel(mgr.state.context, maxloc_dtype_single, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="float *x", preamble=maxloc_preamble_single) mld = ReductionKernel(mgr.state.context, maxloc_dtype_double, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="double *x", preamble=maxloc_preamble_double) max_loc_map = {'single':mls,'double':mld} cfloat = complex_dtype_to_name(np.complex64) cdouble = complex_dtype_to_name(np.complex128) get_or_register_dtype('cfloat', np.complex64) get_or_register_dtype('cdouble', np.complex128) amls = ReductionKernel(mgr.state.context, maxloc_dtype_single, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(fabs(x[i]), i)", arguments="float *x", preamble=maxloc_preamble_single) amld = ReductionKernel(mgr.state.context, maxloc_dtype_double, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(fabs(x[i]), i)", arguments="double *x", preamble=maxloc_preamble_double) amlsc = ReductionKernel(mgr.state.context, maxloc_dtype_single, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(%s_abs(x[i]), i)" % cfloat,