示例#1
0
def simple_function_with_paddable_arrays(a_dense, b_dense, exprs, iters):
    # void foo(a_dense, b_dense)
    #   for i
    #     for j
    #       for k
    #         expr0
    symbols = [i.base.function for i in [a_dense, b_dense]]
    body = iters[0](iters[1](iters[2](exprs[6])))
    f = Function('foo', body, 'void', symbols, ())
    subs = {}
    f = ResolveIterationVariable().visit(f, subs=subs)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
示例#2
0
def simple_function_fissionable(a, b, exprs, iters):
    # void foo(a, b)
    #   for i
    #     for j
    #       for k
    #         expr0
    #         expr2
    symbols = [i.base.function for i in [a, b]]
    body = iters[0](iters[1](iters[2]([exprs[0], exprs[2]])))
    f = Function('foo', body, 'void', symbols, ())
    subs = {}
    f = ResolveIterationVariable().visit(f, subs=subs)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
示例#3
0
def complex_function(a, b, c, d, exprs, iters):
    # void foo(a, b, c, d)
    #   for i
    #     for s
    #       expr0
    #     for j
    #       for k
    #         expr1
    #         expr2
    #     for p
    #       expr3
    symbols = [i.base.function for i in [a, b, c, d]]
    body = iters[0]([iters[3](exprs[2]),
                     iters[1](iters[2]([exprs[3], exprs[4]])),
                     iters[4](exprs[5])])
    f = Function('foo', body, 'void', symbols, ())
    subs = {}
    f = ResolveIterationVariable().visit(f, subs=subs)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
示例#4
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Default attributes required for compilation
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis 1 - required *also after* the Operator construction
        self.dtype = self._retrieve_dtype(expressions)
        self.output = self._retrieve_output_fields(expressions)

        # Analysis 2 - required *for* the Operator construction
        ordering = self._retrieve_loop_ordering(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine for symbolic optimization
        clusters = rewrite(clusters, mode=dse)

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters, ordering)

        # Introduce C-level profiling infrastructure
        self.sections = OrderedDict()
        nodes = self._profile_sections(nodes)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = FindSymbols('kernel-data').visit(nodes)
        dimensions = FindSymbols('dimensions').visit(nodes)
        dimensions += [d.parent for d in dimensions if d.is_Buffered]
        parameters += filter_ordered([d for d in dimensions if d.size is None],
                                     key=operator.attrgetter('name'))

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))
        parameters += [i.argument for i in dle_state.arguments]
        self._includes.extend(list(dle_state.includes))

        # Introduce all required C declarations
        nodes, elemental_functions = self._insert_declarations(
            dle_state, parameters)
        self.elemental_functions = elemental_functions

        # Track the DLE output, as it might be useful at execution time
        self._dle_state = dle_state

        # Finish instantiation
        super(OperatorBasic, self).__init__(self.name, nodes, 'int',
                                            parameters, ())
示例#5
0
    def _padding(self, state, **kwargs):
        """
        Introduce temporary buffers padded to the nearest multiple of the vector
        length, to maximize data alignment. At the bottom of the kernel, the
        values in the padded temporaries will be copied back into the input arrays.
        """

        mapper = OrderedDict()
        for node in state.nodes:
            # Assess feasibility of the transformation
            handle = FindSymbols('symbolics-writes').visit(node)
            if not handle:
                continue

            shape = max([i.shape for i in handle], key=len)
            if not shape:
                continue

            candidates = [i for i in handle if i.shape[-1] == shape[-1]]
            if not candidates:
                continue

            # Retrieve the maximum number of items in a SIMD register when processing
            # the expressions in /node/
            exprs = FindNodes(Expression).visit(node)
            exprs = [e for e in exprs if e.output_function in candidates]
            assert len(exprs) > 0
            dtype = exprs[0].dtype
            assert all(e.dtype == dtype for e in exprs)
            try:
                simd_items = get_simd_items(dtype)
            except KeyError:
                # Fallback to 16 (maximum expectable padding, for AVX512 registers)
                simd_items = simdinfo['avx512f'] / np.dtype(dtype).itemsize

            shapes = {
                k: k.shape[:-1] + (roundm(k.shape[-1], simd_items), )
                for k in candidates
            }
            mapper.update(
                OrderedDict([(k.indexed,
                              TensorFunction(name='p%s' % k.name,
                                             shape=shapes[k],
                                             dimensions=k.indices,
                                             onstack=k._mem_stack).indexed)
                             for k in candidates]))

        # Substitute original arrays with padded buffers
        processed = [
            SubstituteExpression(mapper).visit(n) for n in state.nodes
        ]

        # Build Iteration trees for initialization and copy-back of padded arrays
        mapper = OrderedDict([(k, v) for k, v in mapper.items()
                              if k.function.is_SymbolicData])
        init = copy_arrays(mapper, reverse=True)
        copyback = copy_arrays(mapper)

        processed = init + as_tuple(processed) + copyback

        return {'nodes': processed}
示例#6
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis
        self.dtype = self._retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = self._retrieve_symbols(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + [i for i in self.dimensions if i.size is None]

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine (DSE) for symbolic optimization
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table = OrderedDict([(i.name, FunMeta(i, True))
                                       for i in dle_state.elemental_functions])
        parameters.extend([i.argument for i in self.dle_arguments])
        self.dimensions.extend([i.argument for i in self.dle_arguments
                                if isinstance(i.argument, Dimension)])
        self._includes.extend(list(dle_state.includes))

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(dle_state.nodes, parameters)

        # Introduce all required C declarations
        nodes = self._insert_declarations(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())