Пример #1
    def _factorize(self, cluster, **kwargs):
        Collect terms in each expr in exprs based on the following heuristic:

            * Collect all literals;
            * Collect all temporaries produced by CSE;
            * If the expression has an operation count higher than
              self.threshold, then this is applied recursively until
              no more factorization opportunities are available.

        processed = []
        for expr in cluster.exprs:
            handle = collect_nested(expr)
            cost_handle = estimate_cost(handle)

            if cost_handle >= self.thresholds['min-cost-factorize']:
                handle_prev = handle
                cost_prev = estimate_cost(expr)
                while cost_handle < cost_prev:
                    handle_prev, handle = handle, collect_nested(handle)
                    cost_prev, cost_handle = cost_handle, estimate_cost(handle)
                cost_handle, handle = cost_prev, handle_prev


        return cluster.rebuild(processed)
Пример #2
    def _extract_time_invariants(self,
        Extract time-invariant subexpressions, and assign them to temporaries.

        # Extract time invariants
        make = lambda i: ScalarFunction(name=template(i)).indexify()
        rule = iq_timeinvariant(cluster.trace)
        costmodel = costmodel or (lambda e: estimate_cost(e) > 0)
        processed, found = xreplace_constrained(cluster.exprs, make, rule,

        if with_cse:
            leaves = [i for i in processed if i not in found]

            # Search for common sub-expressions amongst them (and only them)
            make = lambda i: ScalarFunction(name=template(i + len(found))
            found = common_subexprs_elimination(found, make)

            # Some temporaries may be droppable at this point
            processed = compact_temporaries(found + leaves)

        return cluster.reschedule(processed)
Пример #3
    def wrapper(self, state, **kwargs):
        if self.mode.intersection(set(self.triggers[func.__name__])):
            tic = time()
            state.update(flatten([func(self, c) for c in state.clusters]))
            toc = time()

            key = '%s%d' % (func.__name__, len(self.timings))
            self.timings[key] = toc - tic
            if self.profile:
                candidates = [c.exprs for c in state.clusters if c.is_dense]
                self.ops[key] = estimate_cost(flatten(candidates))
Пример #4
def common_subexprs_elimination(exprs, make, mode='default'):
    Perform common subexpressions elimination.

    Note: the output is not guranteed to be topologically sorted.

    :param exprs: The target SymPy expression, or a collection of SymPy expressions.
    :param make: A function to construct symbols used for replacement.
                 The function takes as input an integer ID; ID is computed internally
                 and used as a unique identifier for the constructed symbols.

    # Note: not defaulting to SymPy's CSE() function for three reasons:
    # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]);
    # - it sometimes "captures too much", losing factorization opportunities;
    # - very slow
    # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but
    # also ensuring some sort of post-processing
    assert mode == 'default'  # Only supported mode ATM

    processed = list(exprs)
    mapped = []
    while True:
        # Detect redundancies
        counted = count(mapped + processed, q_op).items()
        targets = OrderedDict([(k, estimate_cost(k)) for k, v in counted
                               if v > 1])
        if not targets:

        # Create temporaries
        hit = max(targets.values())
        picked = [k for k, v in targets.items() if v == hit]
        mapper = OrderedDict([(e, make(len(mapped) + i))
                              for i, e in enumerate(picked)])

        # Apply repleacements
        processed = [e.xreplace(mapper) for e in processed]
        mapped = [e.xreplace(mapper) for e in mapped]
        mapped = [Eq(v, k) for k, v in reversed(list(mapper.items()))] + mapped

        # Prepare for the next round
        for k in picked:
    processed = mapped + processed

    # Simply renumber the temporaries in ascending order
    mapper = {i.lhs: j.lhs for i, j in zip(mapped, reversed(mapped))}
    processed = [e.xreplace(mapper) for e in processed]

    # Some temporaries may be droppable at this point
    processed = compact_temporaries(processed)

    return processed
Пример #5
    def _extract_time_varying(self, cluster, template, **kwargs):
        Extract time-varying subexpressions, and assign them to temporaries.
        Time varying subexpressions arise for example when approximating
        derivatives through finite differences.

        make = lambda i: ScalarFunction(name=template(i)).indexify()
        rule = iq_timevarying(cluster.trace)
        costmodel = lambda i: estimate_cost(i) > 0
        processed, _ = xreplace_constrained(cluster.exprs, make, rule,

        return cluster.reschedule(processed)
Пример #6
    def _extract_time_varying(self, cluster, **kwargs):
        Extract time-varying subexpressions, and assign them to temporaries.
        Time varying subexpressions arise for example when approximating
        derivatives through finite differences.

        template = self.conventions['time-dependent'] + "%d"
        make = lambda i: ScalarFunction(name=template % i).indexify()

        rule = iq_timevarying(cluster.trace)

        cm = lambda i: estimate_cost(i) > 0

        processed, _ = xreplace_constrained(cluster.exprs, make, rule, cm)

        return cluster.rebuild(processed)
Пример #7
    def wrapper(self, state, **kwargs):
        # A template to construct temporaries
        tempname = self.conventions.get(func.__name__)
        if tempname:
            start = kwargs.get('start')
            tempname += '%d' if start is None else (('_%d_' % start) + '%d')
            template = lambda i: tempname % i
            template = None

        # Invoke the DSE pass
        tic = time()
        state.update(flatten([func(self, c, template, **kwargs)
                              for c in state.clusters]))
        toc = time()

        # Profiling
        key = '%s%d' % (func.__name__, len(self.timings))
        self.timings[key] = toc - tic
        if self.profile:
            candidates = [c.exprs for c in state.clusters if c.is_dense]
            self.ops[key] = estimate_cost(flatten(candidates))
Пример #8
    def _extract_time_invariants(self, cluster, **kwargs):
        Extract time-invariant subexpressions, and assign them to temporaries.

        # Extract time invariants
        template = self.conventions['time-invariant'] + "%d"
        make = lambda i: ScalarFunction(name=template % i).indexify()

        rule = iq_timeinvariant(cluster.trace)

        cm = lambda e: estimate_cost(e) > 0

        processed, found = xreplace_constrained(cluster.exprs, make, rule, cm)
        leaves = [i for i in processed if i not in found]

        # Search for common sub-expressions amongst them (and only them)
        template = "%s%s%s" % (self.conventions['redundancy'],
                               self.conventions['time-invariant'], '%d')
        make = lambda i: ScalarFunction(name=template % i).indexify()

        found = common_subexprs_elimination(found, make)

        return cluster.rebuild(found + leaves)
Пример #9
    def _eliminate_inter_stencil_redundancies(self, cluster, **kwargs):
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect_aliases.__doc__
        mapper, aliases = collect_aliases(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        shape = g.space_shape

        # Template for captured redundancies
        name = self.conventions['redundancy'] + "%d"
        template = lambda i: TensorFunction(
            name=name % i, shape=shape, dimensions=indices).indexed

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds[
                    'min-cost-time-hoist'] and g.time_invariant(v):
                candidates[v.rhs] = k
            elif cost >= self.thresholds[
                    'min-cost-space-hoist'] and naliases > 1:
                candidates[v.rhs] = k
                processed.append(Eq(k, v.rhs))

        # Create temporaries capturing redundant computation
        found = []
        rules = OrderedDict()
        stencils = []
        for c, (origin, alias) in enumerate(aliases.items()):
            temporary = Indexed(template(c), *indices)
            found.append(Eq(temporary, origin))
            # Track the stencil of each TensorFunction introduced
            for aliased, distance in alias.with_distance:
                coordinates = [
                    sum([i, j]) for i, j in distance.items() if i in indices
                rules[candidates[aliased]] = Indexed(template(c),

        # Create the alias clusters
        alias_clusters = clusterize(found, stencils)
        alias_clusters = sorted(alias_clusters, key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Пример #10
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Template for captured redundancies
        shape = tuple(i.symbolic_size for i in indices)
        make = lambda i: TensorFunction(
            name=template(i), shape=shape, dimensions=indices).indexed

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
                processed.append(Eq(k, v.rhs))

        # Create temporaries capturing redundant computation
        expressions = []
        stencils = []
        rules = OrderedDict()
        for c, (origin, alias) in enumerate(aliases.items()):
            if all(i not in candidates for i in alias.aliased):
            # Build alias expression
            function = make(c)
            expressions.append(Eq(Indexed(function, *indices), origin))
            # Build substitution rules
            for aliased, distance in alias.with_distance:
                coordinates = [
                    sum([i, j]) for i, j in distance.items() if i in indices
                temporary = Indexed(function, *tuple(coordinates))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary
            # Build cluster stencil
            stencil = alias.anti_stencil.anti(cluster.stencil)
            if all(time_invariants[i] for i in alias.aliased):
                # Optimization: drop time dimension if time-invariant and the
                # alias involves a complex calculation
                stencil = stencil.section(g.time_indices)

        # Create the alias clusters
        alias_clusters = clusterize(expressions, stencils, indices)
        alias_clusters = sorted(alias_clusters, key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]