文件: test_dle.py 项目: nw0/devito
def test_padding(simple_function_with_paddable_arrays):
    handle = transform(simple_function_with_paddable_arrays, mode='padding')
    assert """\
for (int i = 0; i < 3; i += 1)
  pa_dense[i] = a_dense[i];
void foo(float *restrict a_dense_vec, float *restrict b_dense_vec)
  float (*restrict a_dense) __attribute__((aligned(64))) = (float (*)) a_dense_vec;
  float (*restrict b_dense) __attribute__((aligned(64))) = (float (*)) b_dense_vec;
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
      for (int k = 0; k < 7; k += 1)
        pa_dense[i] = b_dense[i] + pa_dense[i] + 5.0F;
for (int i = 0; i < 3; i += 1)
  a_dense[i] = pa_dense[i];
}""" in str(handle.nodes)
文件: test_dle.py 项目: fymenq/devito
def test_create_elemental_functions_simple(simple_function):
    roots = [i[-1] for i in retrieve_iteration_tree(simple_function)]
    retagged = [i._rebuild(properties=tagger(0)) for i in roots]
    mapper = {
        i: j._rebuild(properties=(j.properties + (ELEMENTAL, )))
        for i, j in zip(roots, retagged)
    function = Transformer(mapper).visit(simple_function)
    handle = transform(function, mode='split')
    block = List(body=[handle.nodes] + handle.elemental_functions)
    output = str(block.ccode)
    # Make output compiler independent
    output = [
        i for i in output.split('\n')
        if all([j not in i for j in ('#pragma', '/*')])
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__((aligned(64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__((aligned(64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
void f_0(const int k_start, const int k_finish,"""
         """ float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec,"""
         """ const int i, const int i_size, const int j, const int j_size, const int k_size)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__((aligned(64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__((aligned(64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int k = k_start; k < k_finish; k += 1)
    a[i] = a[i] + b[i] + 5.0F;
    a[i] = -a[i]*c[i][j] + b[i]*d[i][j][k];
    def _specialize_iet(self, iet, **kwargs):
        Transform the IET into a backend-specific representation, such as code
        to be executed on a GPU or through a lower-level system (e.g., YASK).
        dle = kwargs.get("dle", configuration['dle'])

        # Apply the Devito Loop Engine (DLE) for loop optimization
        iet, state = transform(iet, *set_dle_mode(dle))

            OrderedDict([(i.name, MetaCall(i, True)) for i in state.efuncs]))

        return iet
文件: operator.py 项目: opesci/devito
    def _specialize_iet(self, iet, **kwargs):
        Transform the IET into a backend-specific representation, such as code
        to be executed on a GPU or through a lower-level system (e.g., YASK).
        dle = kwargs.get("dle", configuration['dle'])

        # Apply the Devito Loop Engine (DLE) for loop optimization
        iet, state = transform(iet, *set_dle_mode(dle))

        self._func_table.update(OrderedDict([(i.name, MetaCall(i, True))
                                             for i in state.efuncs]))

        return iet
def test_loop_nofission(simple_function):
    old = Rewriter.thresholds['min_fission'], Rewriter.thresholds['max_fission']
    Rewriter.thresholds['max_fission'], Rewriter.thresholds['min_fission'] = 0, 1
    handle = transform(simple_function, mode='fission')
    assert """\
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
      for (int k = 0; k < 7; k += 1)
        a[i] = a[i] + b[i] + 5.0F;
        a[i] = -a[i]*c[i][j] + b[i]*d[i][j][k];
  }""" in str(handle.nodes[0].ccode)
    Rewriter.thresholds['min_fission'], Rewriter.thresholds['max_fission'] = old
    def _specialize_iet(self, iet, **kwargs):
        """Transform the Iteration/Expression tree into a backend-specific
        representation, such as code to be executed on a GPU or through a
        lower-level tool."""
        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle = kwargs.get("dle", configuration['dle'])

        dle_state = transform(iet, *set_dle_mode(dle))

        self._dle_args = dle_state.arguments
        self._dle_flags = dle_state.flags
        self.func_table.update(OrderedDict([(i.name, MetaCall(i, True))
                                            for i in dle_state.elemental_functions]))
        self.dimensions.extend([i.argument for i in self._dle_args
                                if isinstance(i.argument, Dimension)])

        return dle_state.nodes
    def _make_copy(self, f, fixed, swap=False):
        Construct a Callable performing a copy of:

            * an arbitrary convex region of ``f`` into a contiguous Array, OR
            * if ``swap=True``, a contiguous Array into an arbitrary convex
              region of ``f``.
        buf_dims = []
        buf_indices = []
        for d in f.dimensions:
            if d not in fixed:
                buf_dims.append(Dimension(name='buf_%s' % d.root))
        buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

        f_offsets = []
        f_indices = []
        for d in f.dimensions:
            offset = Symbol(name='o%s' % d.root)
            f_indices.append(offset + (d.root if d not in fixed else 0))

        if swap is False:
            eq = DummyEq(buf[buf_indices], f[f_indices])
            name = 'gather%dd' % f.ndim
            eq = DummyEq(f[f_indices], buf[buf_indices])
            name = 'scatter%dd' % f.ndim

        iet = Expression(eq)
        for i, d in reversed(list(zip(buf_indices, buf_dims))):
            # The -1 below is because an Iteration, by default, generates <=
            iet = Iteration(iet, i, d.symbolic_size - 1, properties=PARALLEL)
        iet = List(body=[ArrayCast(f), ArrayCast(buf), iet])

        # Optimize the memory copy with the DLE
        from devito.dle import transform
        state = transform(iet, 'simd', {'openmp': self._threaded})

        parameters = [buf] + list(buf.shape) + [f] + f_offsets + state.input
        return Callable(name, state.nodes, 'void', parameters,
                        ('static', )), state.input
def test_padding(simple_function_with_paddable_arrays):
    handle = transform(simple_function_with_paddable_arrays, mode='padding')
    assert str(handle.nodes[0].ccode) == """\
for (int i = 0; i < 3; i += 1)
  pa_dense[i] = a_dense[i];
    assert """\
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
      for (int k = 0; k < 7; k += 1)
        pa_dense[i] = b_dense[i] + pa_dense[i] + 5.0F;
  }""" in str(handle.nodes[1].ccode)
    assert str(handle.nodes[2].ccode) == """\
def test_loop_fission(simple_function_fissionable):
    old = Rewriter.thresholds['min_fission'], Rewriter.thresholds['max_fission']
    Rewriter.thresholds['max_fission'], Rewriter.thresholds['min_fission'] = 0, 1
    handle = transform(simple_function_fissionable, mode='fission')
    assert """\
 for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
      for (int k = 0; k < 7; k += 1)
        a[i] = a[i] + b[i] + 5.0F;
      for (int k = 0; k < 7; k += 1)
        b[i] = a[i] + pow(b[i], 2) + 3;
  }""" in str(handle.nodes[0].ccode)
    Rewriter.thresholds['min_fission'], Rewriter.thresholds['max_fission'] = old
def test_create_elemental_functions_simple(simple_function):
    old = Rewriter.thresholds['elemental']
    Rewriter.thresholds['elemental'] = 0
    handle = transform(simple_function, mode='split')
    block = List(body=handle.nodes + handle.elemental_functions)
    output = str(block.ccode)
    # Make output compiler independent
    output = [
        i for i in output.split('\n')
        if all([j not in i for j in ('#pragma', '/*')])
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[5] __attribute__((aligned(64))) = (float (*)[5]) c_vec;
  float (*restrict d)[5][7] __attribute__((aligned(64))) = (float (*)[5][7]) d_vec;
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
      f_0_0((float*) a,(float*) b,(float*) c,(float*) d,i,j);
void f_0_0(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec, const int i, const int j)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[5] __attribute__((aligned(64))) = (float (*)[5]) c_vec;
  float (*restrict d)[5][7] __attribute__((aligned(64))) = (float (*)[5][7]) d_vec;
  for (int k = 0; k < 7; k += 1)
    a[i] = a[i] + b[i] + 5.0F;
    a[i] = -a[i]*c[i][j] + b[i]*d[i][j][k];
    Rewriter.thresholds['elemental'] = old
def test_loops_collapsed(fe, t0, t1, t2, t3, exprs, expected, iters):
    scope = [fe, t0, t1, t2, t3]
    node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs]
    ast = iters[6](iters[7](iters[8](node_exprs)))

    ast = iet_analyze(ast)

    nodes = transform(ast, mode='openmp').nodes
    iterations = FindNodes(Iteration).visit(nodes)
    assert len(iterations) == len(expected)

    # Check for presence of pragma omp
    for i, j in zip(iterations, expected):
        pragmas = i.pragmas
        if j is True:
            assert len(pragmas) == 1
            pragma = pragmas[0]
            assert 'omp for collapse' in pragma.value
            for k in pragmas:
                assert 'omp for collapse' not in k.value
def test_loops_ompized(fa, fb, fc, fd, t0, t1, t2, t3, exprs, expected, iters):
    scope = [fa, fb, fc, fd, t0, t1, t2, t3]
    node_exprs = [Expression(EVAL(i, *scope)) for i in exprs]
    ast = iters[6](iters[7](node_exprs))

    nodes = transform(ast, mode='openmp').nodes
    assert len(nodes) == 1
    ast = nodes[0]
    iterations = FindNodes(Iteration).visit(ast)
    assert len(iterations) == len(expected)

    # Check for presence of pragma omp
    for i, j in zip(iterations, expected):
        pragmas = i.pragmas
        if j is True:
            assert len(pragmas) == 1
            pragma = pragmas[0]
            assert 'omp for' in pragma.value
            for k in pragmas:
                assert 'omp for' not in k.value
文件: test_dle.py 项目: opesci/devito
    def test_iterations_ompized(self, fa, fb, fc, fd, t0, t1, t2, t3,
                                exprs, expected, iters):
        scope = [fa, fb, fc, fd, t0, t1, t2, t3]
        node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs]
        ast = iters[6](iters[7](node_exprs))

        ast = iet_analyze(ast)

        iet, _ = transform(ast, mode='openmp')
        iterations = FindNodes(Iteration).visit(iet)
        assert len(iterations) == len(expected)

        # Check for presence of pragma omp
        for i, j in zip(iterations, expected):
            pragmas = i.pragmas
            if j is True:
                assert len(pragmas) == 1
                pragma = pragmas[0]
                assert 'omp for' in pragma.value
                for k in pragmas:
                    assert 'omp for' not in k.value
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Default attributes required for compilation
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis 1 - required *also after* the Operator construction
        self.dtype = self._retrieve_dtype(expressions)
        self.output = self._retrieve_output_fields(expressions)

        # Analysis 2 - required *for* the Operator construction
        ordering = self._retrieve_loop_ordering(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine for symbolic optimization
        clusters = rewrite(clusters, mode=dse)

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters, ordering)

        # Introduce C-level profiling infrastructure
        self.sections = OrderedDict()
        nodes = self._profile_sections(nodes)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = FindSymbols('kernel-data').visit(nodes)
        dimensions = FindSymbols('dimensions').visit(nodes)
        dimensions += [d.parent for d in dimensions if d.is_Buffered]
        parameters += filter_ordered([d for d in dimensions if d.size is None],

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))
        parameters += [i.argument for i in dle_state.arguments]

        # Introduce all required C declarations
        nodes, elemental_functions = self._insert_declarations(
            dle_state, parameters)
        self.elemental_functions = elemental_functions

        # Track the DLE output, as it might be useful at execution time
        self._dle_state = dle_state

        # Finish instantiation
        super(OperatorBasic, self).__init__(self.name, nodes, 'int',
                                            parameters, ())
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering and analysis
        expressions = [LoweredEq(e, subs=subs) for e in expressions]
        self.dtype = retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = retrieve_symbols(

        # Set the direction of time acoording to the given TimeAxis
        for time in [d for d in self.dimensions if d.is_Time]:
            if not time.is_Stepping:
                time.reverse = time_axis == Backward

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + self.dimensions

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters, self.dtype)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(nodes, parameters)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
            OrderedDict([(i.name, MetaCall(i, True))
                         for i in dle_state.elemental_functions]))
        parameters.extend([i.argument for i in self.dle_arguments])
            i.argument for i in self.dle_arguments
            if isinstance(i.argument, Dimension)

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Initialise ArgumentEngine
        self.argument_engine = ArgumentEngine(clusters.ispace, parameters,

        parameters = self.argument_engine.arguments

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis
        self.dtype = self._retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = self._retrieve_symbols(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + [i for i in self.dimensions if i.size is None]

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine (DSE) for symbolic optimization
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table = OrderedDict([(i.name, FunMeta(i, True))
                                       for i in dle_state.elemental_functions])
        parameters.extend([i.argument for i in self.dle_arguments])
        self.dimensions.extend([i.argument for i in self.dle_arguments
                                if isinstance(i.argument, Dimension)])

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(dle_state.nodes, parameters)

        # Introduce all required C declarations
        nodes = self._insert_declarations(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
def test_create_efuncs_complex(complex_function):
    roots = [i[-1] for i in retrieve_iteration_tree(complex_function)]
    retagged = [j._rebuild(properties=tagger(i)) for i, j in enumerate(roots)]
    mapper = {
        i: j._rebuild(properties=(j.properties + (ELEMENTAL, )))
        for i, j in zip(roots, retagged)
    function = Transformer(mapper).visit(complex_function)
    handle = transform(function, mode='split')
    block = List(body=[handle.nodes] + handle.efuncs)
    output = str(block.ccode)
    # Make output compiler independent
    output = [
        i for i in output.split('\n')
        if all([j not in i for j in ('#pragma', '/*')])
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
  for (int i = 0; i <= 3; i += 1)
    f_0((float *)a,(float *)b,i_size,i,4,0);
    for (int j = 0; j <= 5; j += 1)
      f_1((float *)a,(float *)b,(float *)c,(float *)d,i_size,j_size,k_size,i,j,7,0);
    f_2((float *)a,(float *)b,i_size,i,4,0);
void f_0(float *restrict a_vec, float *restrict b_vec,"""
         """ const int i_size, const int i, const int sf_M, const int sf_m)
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  for (int s = sf_m; s <= sf_M; s += 1)
    b[i] = a[i] + pow(b[i], 2) + 3;
void f_1(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec,"""
         """ const int i_size, const int j_size, const int k_size,"""
         """ const int i, const int j, const int kf_M, const int kf_m)
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__ ((aligned (64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__ ((aligned (64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int k = kf_m; k <= kf_M; k += 1)
    a[i] = a[i]*b[i]*c[i][j]*d[i][j][k];
    a[i] = 4*(a[i] + c[i][j])*(b[i] + d[i][j][k]);
void f_2(float *restrict a_vec, float *restrict b_vec,"""
         """ const int i_size, const int i, const int qf_M, const int qf_m)
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  for (int q = qf_m; q <= qf_M; q += 1)
    a[i] = 8.0F*a[i] + 6.0F/b[i];
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = [i.xreplace(subs) for i in expressions]
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize_iet(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_args = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(OrderedDict([(i.name, MetaCall(i, True))
                                            for i in dle_state.elemental_functions]))
        self.dimensions.extend([i.argument for i in self.dle_args
                                if isinstance(i.argument, Dimension)])

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        nodes = self._build_casts(nodes)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
def test_create_elemental_functions_complex(complex_function):
    roots = [i[-1] for i in retrieve_iteration_tree(complex_function)]
    retagged = [j._rebuild(properties=tagger(i)) for i, j in enumerate(roots)]
    mapper = {i: j._rebuild(properties=(j.properties + (ELEMENTAL,)))
              for i, j in zip(roots, retagged)}
    function = Transformer(mapper).visit(complex_function)
    handle = transform(function, mode='split')
    block = List(body=handle.nodes + handle.elemental_functions)
    output = str(block.ccode)
    # Make output compiler independent
    output = [i for i in output.split('\n')
              if all([j not in i for j in ('#pragma', '/*')])]
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[5] __attribute__((aligned(64))) = (float (*)[5]) c_vec;
  float (*restrict d)[5][7] __attribute__((aligned(64))) = (float (*)[5][7]) d_vec;
  for (int i = 0; i < 3; i += 1)
    for (int j = 0; j < 5; j += 1)
void f_0(const int s_start, const int s_finish,"""
         """ float *restrict a_vec, float *restrict b_vec, const int i)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  for (int s = s_start; s < s_finish; s += 1)
    b[i] = a[i] + pow(b[i], 2) + 3;
void f_1(const int k_start, const int k_finish,"""
         """ float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec, const int i, const int j)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[5] __attribute__((aligned(64))) = (float (*)[5]) c_vec;
  float (*restrict d)[5][7] __attribute__((aligned(64))) = (float (*)[5][7]) d_vec;
  for (int k = k_start; k < k_finish; k += 1)
    a[i] = a[i]*b[i]*c[i][j]*d[i][j][k];
    a[i] = 4*(a[i] + c[i][j])*(b[i] + d[i][j][k]);
void f_2(const int q_start, const int q_finish,"""
         """ float *restrict a_vec, float *restrict b_vec, const int i)
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  for (int q = q_start; q < q_finish; q += 1)
    a[i] = 8.0F*a[i] + 6.0F/b[i];