def _gen_basic(self, ls, nd): src = basic_kernel.render(preamble=self.preamble, reduce_expr=self.reduce_expr, name="reduk", out_arg=self.out_arg, nd=nd, arguments=self.arguments, local_size=ls, redux=self.redux, neutral=self.neutral, map_expr=self.expression) spec = ['uint32', gpuarray.GpuArray] spec.extend('uint32' for _ in range(nd)) for i, arg in enumerate(self.arguments): spec.append(arg.spec()) if arg.isarray(): spec.append('uint32') spec.extend('int32' for _ in range(nd)) k = gpuarray.GpuKernel(src, "reduk", spec, context=self.context, cluda=True, **self.flags) return k, src, spec
def _make_basic(self, nd): name = "elem_" + str(nd) src = self.render_basic(nd, name=name) return gpuarray.GpuKernel(src, name, self.argspec_basic(nd), context=self.context, cluda=True, **self.flags)
def __init__(self, context, arguments, operation, preamble="", dimspec_limit=2, spec_limit=10): if isinstance(arguments, str): self.arguments = parse_c_args(arguments) else: self.arguments = tuple(arguments) self.operation = operation self.expression = massage_op(operation) self.context = context self._spec_limit = spec_limit self._dimspec_limit = dimspec_limit if not any(arg.isarray() for arg in self.arguments): raise RuntimeError("ElemwiseKernel can only be used with " "functions that have at least one " "vector argument.") have_small = False have_double = False have_complex = False for arg in self.arguments: if arg.dtype.itemsize < 4 and type(arg) == ArrayArg: have_small = True if arg.dtype in [numpy.float64, numpy.complex128]: have_double = True if arg.dtype in [numpy.complex64, numpy.complex128]: have_complex = True self.flags = dict(have_small=have_small, have_double=have_double, have_complex=have_complex) self.preamble = preamble self.contig_src = contiguous_kernel.render(preamble=self.preamble, name="elem_contig", arguments=self.arguments, expression=self.operation) self.contig_k = gpuarray.GpuKernel(self.contig_src, "elem_contig", self.argspec_contig(), context=self.context, cluda=True, **self.flags) self._speckey = None self._dims = None
def _make_dimspec(self, n, nd, dims): src = dimspec_kernel.render(preamble=self.preamble, name="elemk", n=n, nd=nd, dims=dims, arguments=self.arguments, expression=self.expression) return gpuarray.GpuKernel(src, "elemk", self.argspec_dimspec(nd), context=self.context, cluda=True, **self.flags)
def _make_specialized(self, n, nd, dims, strs, offsets): src = specialized_kernel.render(preamble=self.preamble, name="elemk", n=n, nd=nd, dim=dims, strs=strs, arguments=self.arguments, expression=self.expression, offsets=offsets) return gpuarray.GpuKernel(src, "elemk", self.argspec_specialized(), context=self.context, cluda=True, **self.flags)