示例#1
0
文件: vectorize.py 项目: the-lay/cupy
    def __call__(self, *args):
        itypes = ''.join([_get_input_type(x) for x in args])
        kern = self._kernel_cache.get(itypes, None)

        if kern is None:
            in_types = [_cuda_types.Scalar(t) for t in itypes]
            ret_type = None
            if self.otypes is not None:
                # TODO(asi1024): Implement
                raise NotImplementedError

            func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True)
            result = func._emit_code_from_types(in_types, ret_type)
            in_params = ', '.join(f'{t.dtype} in{i}'
                                  for i, t in enumerate(in_types))
            in_args = ', '.join([f'in{i}' for i in range(len(in_types))])
            out_params, out_lval = self._parse_out_param(result.return_type)
            body = '{} = {}({})'.format(out_lval, func.name, in_args)
            # note: we don't worry about -D not working on ROCm here, because
            # we unroll all headers for HIP and so thrust::tuple et al are all
            # defined regardless if CUPY_JIT_MODE is defined or not
            kern = _core.ElementwiseKernel(in_params,
                                           out_params,
                                           body,
                                           preamble=result.code,
                                           options=('-DCUPY_JIT_MODE', ))
            self._kernel_cache[itypes] = kern

        return kern(*args)
示例#2
0
    def __call__(self, *args):
        itypes = ''.join([_get_input_type(x) for x in args])
        kern = self._kernel_cache.get(itypes, None)

        if kern is None:
            in_types = [_types.Scalar(t) for t in itypes]
            ret_type = None
            if self.otypes is not None:
                # TODO(asi1024): Implement
                raise NotImplementedError

            func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True)
            result = func._emit_code_from_types(in_types, ret_type)
            in_params = ', '.join(f'{t.dtype} in{i}'
                                  for i, t in enumerate(in_types))
            out_params = str(result.return_type.dtype) + ' out0'
            body = 'out0 = {}({})'.format(
                func.name, ', '.join([f'in{i}' for i in range(len(in_types))]))
            kern = core.ElementwiseKernel(in_params,
                                          out_params,
                                          body,
                                          preamble=result.code)
            self._kernel_cache[itypes] = kern

        return kern(*args)