示例#1
0
    def __init__(self, name, reduce_func, expr, in_param, out_param, axis):
        """Reduction operation.
        """
        _fusion_thread_local.check_not_runtime()
        assert isinstance(name, str)
        assert isinstance(reduce_func, _reduction._SimpleReductionKernel)
        assert isinstance(in_param, _TraceArray)
        assert isinstance(out_param, _TraceArray)
        assert isinstance(axis, tuple)
        assert all(0 <= x < in_param.ndim for x in axis)

        self.name = name
        self.preamble = reduce_func.preamble
        self.in_params = _VariableSet(in_param)
        self.out_params = _VariableSet(out_param)
        self.block_stride_name = 'block_stride_' + name
        self.axis = axis

        if reduce_func.identity is None:
            self.identity = ''
        else:
            self.identity = str(reduce_func.identity)

        _, self.expr, self.postmap_cast_code, self.reduce_ctype = expr
        if self.reduce_ctype is None:
            out_param, = self.out_params
            self.reduce_ctype = get_typename(out_param.dtype)

        self.premap_op = None
        self.postmap_op = None
示例#2
0
    def _emit_set_index(indexed_params, tid):
        """Returns a CUDA code: setting a raw index to indexers.
        """
        _fusion_thread_local.check_not_runtime()
        assert isinstance(indexed_params, _VariableSet)

        return [
            p.format('${indexer}.set(${tid});', tid=tid)
            for p in indexed_params
        ]
示例#3
0
 def emit_code(self):
     _fusion_thread_local.check_not_runtime()
     assert len(self.in_params) == 1
     assert len(self.out_params) == 1
     in_param = list(self.in_params)[0]
     out_param = list(self.out_params)[0]
     params = ', '.join([
         in_param.var_name,
         out_param.var_name,
         in_param.indexer_name,
         out_param.indexer_name,
     ])
     return '{}({}, {});'.format(self.name, params, self.block_stride_name)
示例#4
0
    def __init__(self, ufunc_routines, in_params, out_params, ashape):
        # The `in_params` and `out_params` should be already broadcasted to
        # `ashape`, but they don't guarantee to be exactly same as
        # `param.ashape`.

        _fusion_thread_local.check_not_runtime()
        assert isinstance(ufunc_routines, list)
        assert all(isinstance(r, _UfuncRoutine) for r in ufunc_routines)
        assert isinstance(ashape, tuple)

        self.ops = ufunc_routines
        self.in_params = _VariableSet(*in_params)
        self.out_params = _VariableSet(*out_params)
        self.ashape = ashape
示例#5
0
    def emit_code(self):
        _fusion_thread_local.check_not_runtime()

        declaration, s1 = self._emit_declaration(self.params, self.in_params)
        operation = [op.emit_call_code() for op in self.ops]
        after_operation, s2 = self._emit_after_operation(self.out_params)
        index_name = 'i'
        indexed_array = s1 + s2
        indexer_name = next(iter(indexed_array)).indexer_name
        indexer_setup = self._emit_set_index(indexed_array, index_name)

        return _codeblock.CodeBlock(
            'CUPY_FOR({}, {}.size())'.format(index_name, indexer_name),
            indexer_setup + declaration + operation + after_operation)
示例#6
0
    def _emit_after_operation(out_params):
        """Returns a tuple of size 2.
        1. CUDA code: writing the results of operations back to global memory.
        2. The set of arrays which require indexer.
        """

        _fusion_thread_local.check_not_runtime()

        indexed_arrays = _VariableSet()
        codes = []
        for var in out_params:
            if isinstance(var, _TraceArray):
                indexed_arrays.add(var)
                f = '${var}[${indexer}.get()] = ${lvar};'
            else:
                f = '${var} = ${lvar};'
            codes.append(var.format(f))

        return codes, indexed_arrays
示例#7
0
    def _emit_declaration(params, in_params):
        """Returns a tuple of size 2.

        1. CUDA code: declaring local variables.
            2. The set of arrays which require indexer.
        """
        _fusion_thread_local.check_not_runtime()

        indexed_arrays = _VariableSet()
        code = []
        for var in params:
            if var in in_params:
                if isinstance(var, _TraceArray):
                    indexed_arrays.add(var)
                    f = '${type} ${lvar} = ${var}[${indexer}.get()];'
                else:
                    f = '${type} ${lvar} = ${var};'
            else:
                f = '${type} ${lvar};'
            code.append(var.format(f))

        return code, indexed_arrays