def _(expr, terms): derivs, others = split(terms, lambda i: i.deriv is not None) if not derivs: return expr, Term(expr) # Map by type of derivative mapper = as_mapper(derivs, lambda i: key(i.deriv)) if len(mapper) == len(derivs): return expr, Term(expr) processed = [] for v in mapper.values(): fact, nonfact = split(v, lambda i: _is_const_coeff(i.other, i.deriv)) if fact: # Finally factorize derivative arguments func = fact[0].deriv._new_from_self exprs = [] for i in fact: if i.func: exprs.append(i.func(i.other, i.deriv.expr)) else: assert i.other == 1 exprs.append(i.deriv.expr) fact = [Term(func(expr=expr.func(*exprs)))] for i in fact + nonfact: if i.func: processed.append(i.func(i.other, i.deriv)) else: processed.append(i.other) others = [i.other for i in others] expr = expr.func(*(processed + others)) return expr, Term(expr)
def _extract(self, exprs, context, n): # Forbid CIRE involving Dimension-independent dependencies, e.g.: # r0 = ... # u[x, y] = ... r0*a[x, y] ... # NOTE: if one uses the DSL in a conventional way and sticks to the default # compilation pipelines where CSE always happens after CIRE, then `exclude` # will always be empty exclude = {i.source.indexed for i in context[None].scope.d_flow.independent()} mapper = Uxmapper() for e in exprs: for i in search_potential_deriv(e, n): if i.free_symbols & exclude: continue key = lambda a: a.is_Add terms, others = split(i.args, key) if self._opt_maxalias: # Treat `e` as an FD expression and pull out the derivative # coefficient from `i` # Note: typically derivative coefficients are numbers, but # sometimes they could be provided in symbolic form through an # arbitrary Function. In the latter case, we rely on the # heuristic that such Function's basically never span the whole # grid, but rather a single Grid dimension (e.g., `c[z, n]` for a # stencil of diameter `n` along `z`) if e.grid is not None and terms: key = partial(maybe_coeff_key, e.grid) others, more_terms = split(others, key) terms += more_terms mapper.add(i, self._make_symbol, terms) return mapper
def extract(cls, n, context, min_cost, max_alias, cluster, sregistry): make = lambda: Scalar(name=sregistry.make_name(), dtype=cluster.dtype ).indexify() # The `depth` determines "how big" the extracted sum-of-products will be. # We observe that in typical FD codes: # add(mul, mul, ...) -> stems from first order derivative # add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative # To search the muls in the former case, we need `depth=0`; to search the outer # muls in the latter case, we need `depth=2` depth = n exclude = { i.source.indexed for i in cluster.scope.d_flow.independent() } rule0 = lambda e: not e.free_symbols & exclude rule1 = lambda e: e.is_Mul and q_terminalop(e, depth) rule = lambda e: rule0(e) and rule1(e) extracted = OrderedDict() mapper = {} for e in cluster.exprs: for i in search(e, rule, 'all', 'bfs_first_hit'): if i in mapper: continue key = lambda a: a.is_Add terms, others = split(list(i.args), key) if max_alias: # Treat `e` as an FD expression and pull out the derivative # coefficient from `i` # Note: typically derivative coefficients are numbers, but # sometimes they could be provided in symbolic form through an # arbitrary Function. In the latter case, we rely on the # heuristic that such Function's basically never span the whole # grid, but rather a single Grid dimension (e.g., `c[z, n]` for a # stencil of diameter `n` along `z`) if e.grid is not None and terms: key = partial(maybe_coeff_key, e.grid) others, more_terms = split(others, key) terms.extend(more_terms) if terms: k = i.func(*terms) try: symbol, _ = extracted[k] except KeyError: symbol, _ = extracted.setdefault(k, (make(), e)) mapper[i] = i.func(symbol, *others) if mapper: extracted = [e.func(v, k) for k, (v, e) in extracted.items()] processed = [uxreplace(e, mapper) for e in cluster.exprs] return extracted + processed, extracted else: return cluster.exprs, []
def relax_incr_dimensions(iet, **kwargs): """ This pass adjusts the bounds of blocked Iterations in order to include the "remainder regions". Without the relaxation that occurs in this pass, the only way to iterate over the entire iteration space is to have step increments that are perfect divisors of the iteration space (e.g. in case of an iteration space of size 67 and block size 8 only 64 iterations would be computed, as `67 - 67mod8 = 64`. A simple 1D example: nested Iterations are transformed from: <Iteration x0_blk0; (x_m, x_M, x0_blk0_size)> <Iteration x; (x0_blk0, x0_blk0 + x0_blk0_size - 1, 1)> to: <Iteration x0_blk0; (x_m, x_M, x0_blk0_size)> <Iteration x; (x0_blk0, MIN(x_M, x0_blk0 + x0_blk0_size - 1)), 1)> """ mapper = {} for tree in retrieve_iteration_tree(iet): iterations = [i for i in tree if i.dim.is_Block] if not iterations: continue root = iterations[0] if root in mapper: continue assert all(i.direction is Forward for i in iterations) outer, inner = split(iterations, lambda i: not i.dim.parent.is_Block) # Get root's `symbolic_max` out of each outer Dimension roots_max = {i.dim.root: i.symbolic_max for i in outer} # Process inner iterations and adjust their bounds for n, i in enumerate(inner): # The Iteration's maximum is the MIN of (a) the `symbolic_max` of current # Iteration e.g. `x0_blk0 + x0_blk0_size - 1` and (b) the `symbolic_max` # of the current Iteration's root Dimension e.g. `x_M`. The generated # maximum will be `MIN(x0_blk0 + x0_blk0_size - 1, x_M) # In some corner cases an offset may be added (e.g. after CIRE passes) # E.g. assume `i.symbolic_max = x0_blk0 + x0_blk0_size + 1` and # `i.dim.symbolic_max = x0_blk0 + x0_blk0_size - 1` then the generated # maximum will be `MIN(x0_blk0 + x0_blk0_size + 1, x_M + 2)` root_max = roots_max[i.dim.root] + i.symbolic_max - i.dim.symbolic_max iter_max = evalrel(min, [i.symbolic_max, root_max]) mapper[i] = i._rebuild(limits=(i.symbolic_min, iter_max, i.step)) if mapper: iet = Transformer(mapper, nested=True).visit(iet) headers = [('%s(a,b)' % MIN.name, ('(((a) < (b)) ? (a) : (b))')), ('%s(a,b)' % MAX.name, ('(((a) > (b)) ? (a) : (b))'))] else: headers = [] return iet, {'headers': headers}
def topological_sort(exprs): """Topologically sort the temporaries in a list of equations.""" mapper = {e.lhs: e for e in exprs} assert len(mapper) == len(exprs) # Expect SSA # Build DAG and topologically-sort temporaries temporaries, tensors = split(exprs, lambda e: not e.lhs.is_Indexed) dag = DAG(nodes=temporaries) for e in temporaries: for r in retrieve_terminals(e.rhs): if r not in mapper: continue elif mapper[r] is e: # Avoid cyclic dependences, such as # Eq(f, f + 1) continue elif r.is_Indexed: # Only scalars enforce an ordering continue else: dag.add_edge(mapper[r], e, force_add=True) processed = dag.topological_sort() # Append tensor equations at the end in user-provided order processed.extend(tensors) return processed
def __new__(cls, *args, **kwargs): # A Mul, being a DifferentiableOp, may not trigger evaluation upon # construction (e.g., when an EvalDerivative is present among its # arguments), so here we apply a small set of basic simplifications # to avoid generating functional, but also ugly, code # (a*b)*c -> a*b*c (flattening) nested, others = split(args, lambda e: isinstance(e, Mul)) args = flatten(e.args for e in nested) + list(others) # a*0 -> 0 if any(i == 0 for i in args): return sympy.S.Zero # a*1 -> a args = [i for i in args if i != 1] # a*-1*-1 -> a nminus = len([i for i in args if i == sympy.S.NegativeOne]) if nminus % 2 == 0: args = [i for i in args if i != sympy.S.NegativeOne] # Reorder for homogeneity with pure SymPy types _mulsort(args) return super().__new__(cls, *args, **kwargs)
def _eval_numbers(expr, args): """ Helper function for in-place reduction of the expr arguments. """ numbers, others = split(args, lambda i: i.is_Number) if len(numbers) > 1: args[:] = [expr.func(*numbers)] + others
def _uxreplace(expr, rule): if expr in rule: v = rule[expr] if not isinstance(v, dict): return v, True args, eargs = split(expr.args, lambda i: i in v) args = [v[i] for i in args if v[i] is not None] changed = True else: args, eargs = [], expr.args changed = False if rule: for a in eargs: try: ax, flag = _uxreplace(a, rule) args.append(ax) changed |= flag except AttributeError: # E.g., un-sympified numbers args.append(a) if changed: return _uxreplace_handle(expr, args), True return expr, False
def _(expr, terms): derivs, others = split(terms, lambda i: i.deriv is not None) if len(derivs) == 1: # Linear => propagate found Derivative upstream deriv = derivs[0].deriv other = expr.func(*[i.other for i in others]) # De-nest terms return expr, Term(other, deriv, expr.func) else: return expr, Term(expr)
def run(expr): if expr.is_Atom or expr.is_Indexed: return expr, rule(expr) elif expr.is_Pow: base, flag = run(expr.base) if flag and costmodel(base): return expr.func(replace(base), expr.exp, evaluate=False), False elif flag and costmodel(expr): return replace(expr), False else: return expr.func(base, expr.exp, evaluate=False), rule(expr) else: children = [run(a) for a in expr.args] matching = [a for a, flag in children if flag] other = [a for a, _ in children if a not in matching] if not matching: return expr.func(*other, evaluate=False), False if eager is False: matched = expr.func(*matching, evaluate=False) if len(matching) == len(children) and rule(expr): # Go look for larger expressions first return matched, True elif rule(matched) and costmodel(matched): # E.g.: a*b*c*d -> a*r0 rebuilt = expr.func(*(other + [replace(matched)]), evaluate=False) return rebuilt, False else: # E.g.: a*b*c*d -> a*r0*r1*r2 replaced = [replace(e) for e in matching if costmodel(e)] unreplaced = [e for e in matching if not costmodel(e)] rebuilt = expr.func(*(other + replaced + unreplaced), evaluate=False) return rebuilt, False else: replaceable, unreplaced = split(matching, lambda e: costmodel(e)) if replaceable: # E.g.: a*b*c*d -> a*r0*r1*r2 replaced = [replace(e) for e in replaceable] rebuilt = expr.func(*(other + replaced + unreplaced), evaluate=False) return rebuilt, False matched = expr.func(*matching, evaluate=False) if rule(matched) and costmodel(matched): if len(matching) == len(children): # E.g.: a*b*c*d -> r0 return replace(matched), False else: # E.g.: a*b*c*d -> a*r0 rebuilt = expr.func(*(other + [replace(matched)]), evaluate=False) return rebuilt, False elif len(matching) == len(children) and rule(expr): # Go look for larger expressions return matched, True else: # E.g.: a*b*c*d; a,b,a*b replaceable but not satisfying the cost # model, hence giving up as c,d,c*d aren't replaceable return expr.func(*(matching + other), evaluate=False), False
def prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ # Process data-carriers (first overrides, then fill up with whatever is needed) args = ReducerMap() args.update( [p._arg_values(**kwargs) for p in self.input if p.name in kwargs]) args.update( [p._arg_values() for p in self.input if p.name not in args]) args = args.reduce_all() # Process dimensions (derived go after as they might need/affect their parents) derived, main = split(self.dimensions, lambda i: i.is_Derived) for p in main: args.update(p._arg_values(args, self._dspace[p], **kwargs)) for p in derived: args.update(p._arg_values(args, self._dspace[p], **kwargs)) # Sanity check for p in self.input: p._arg_check(args, self._dspace[p]) # Derive additional values for DLE arguments # TODO: This is not pretty, but it works for now. Ideally, the # DLE arguments would be massaged into the IET so as to comply # with the rest of the argument derivation procedure. for arg in self.dle_args: dim = arg.argument osize = args[arg.original_dim.symbolic_size.name] if dim.symbolic_size in self.parameters: if arg.value is None: args[dim.symbolic_size.name] = osize elif isinstance(arg.value, int): args[dim.symbolic_size.name] = arg.value else: args[dim.symbolic_size.name] = arg.value(osize) # Add in the profiler argument args[self.profiler.name] = self.profiler.new() # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly if kwargs.pop('autotune', False): args = self._autotune(args) # Check all user-provided keywords are known to the Operator for k, v in kwargs.items(): if k not in self.known_arguments: raise ValueError( "Unrecognized argument %s=%s passed to `apply`" % (k, v)) return args
def _(expr, mapper, nn_derivs=None): nn_derivs = nn_derivs or mapper.get(expr) args = [aggregate_coeffs(a, mapper, nn_derivs) for a in expr.args] expr = reuse_if_untouched(expr, args) # Separate arguments containing derivatives from those which do not hope_coeffs = [] with_derivs = [] for a in args: if isinstance(a, sympy.Derivative): with_derivs.append((a, [a], [])) else: derivs, others = split(a.args, lambda i: isinstance(i, sympy.Derivative)) if a.is_Add and derivs: with_derivs.append((a, derivs, others)) else: hope_coeffs.append(a) # E.g., non-linear term, expansion won't help (in fact, it would only # cause an increase in operation count), so we skip if len(with_derivs) > 1: return expr try: with_deriv, derivs, others = with_derivs.pop(0) except IndexError: # No derivatives found, give up return expr # Aggregating the potential coefficient won't help if, in the current scope # at least one derivative type does not appear more than once. In fact, aggregation # might even have a detrimental effect due to increasing the operation count by # expanding Muls), so we rather give if that's the case if not any(nn_derivs[i._metadata] > 1 for i in derivs): return expr # Is the potential coefficient really a coefficient? csymbols = set().union(*[i.free_symbols for i in hope_coeffs]) cdims = [i._defines for i in csymbols if i.is_Dimension] ddims = [set(i.dims) for i in derivs] if any(i & j for i, j in product(cdims, ddims)): return expr # Redundancies unlikely to pop up along the time dimension if any(d.is_Time for d in flatten(ddims)): return expr if len(derivs) == 1 and with_deriv is derivs[0]: expr = with_deriv._new_from_self(expr=expr.func(*hope_coeffs, with_deriv.expr)) else: others = [expr.func(*hope_coeffs, a) for a in others] derivs = [a._new_from_self(expr=expr.func(*hope_coeffs, a.expr)) for a in derivs] expr = with_deriv.func(*(derivs + others)) return expr
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ # Process data-carriers (first overrides, then fill up with whatever is needed) args = ReducerMap() args.update( [p._arg_values(**kwargs) for p in self.input if p.name in kwargs]) args.update( [p._arg_values() for p in self.input if p.name not in args]) args = args.reduce_all() # All TensorFunctions should be defined on the same Grid functions = [ kwargs.get(p, p) for p in self.input if p.is_TensorFunction ] mapper = ReducerMap([('grid', i.grid) for i in functions if i.grid]) try: grid = mapper.unique('grid') except (KeyError, ValueError): if mapper and configuration['mpi']: raise RuntimeError("Multiple `Grid`s found before `apply`") grid = None # Process dimensions (derived go after as they might need/affect their parents) derived, main = split(self.dimensions, lambda i: i.is_Derived) for p in main: args.update(p._arg_values(args, self._dspace[p], grid, **kwargs)) for p in derived: args.update(p._arg_values(args, self._dspace[p], grid, **kwargs)) # Sanity check for p in self.input: p._arg_check(args, self._dspace[p]) # Add in the profiler argument args[self._profiler.name] = self._profiler.timer.reset() # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly args = self._autotune( args, kwargs.pop('autotune', configuration['autotuning'])) # Check all user-provided keywords are known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) return args
def diff_parameters(iet, root): """ Derive the parameters of a sub-IET, `iet`, within a Callable, `root`, and split them into two groups: * the "read-only" parameters, and * the "dynamic" parameters, whose value changes at some point in `root`. """ # TODO: this is currently very rudimentary required = derive_parameters(iet) known = set(root.parameters) | set(i for i in required if i.is_Array) parameters, dynamic_parameters = split(required, lambda i: i in known) return required, parameters, dynamic_parameters
def __new__(cls, *args, **kwargs): # Here, often we get `evaluate=False` to prevent SymPy evaluation (e.g., # when `cls==EvalDerivative`), but in all cases we at least apply a small # set of basic simplifications # (a+b)+c -> a+b+c (flattening) nested, others = split(args, lambda e: isinstance(e, Add)) args = flatten(e.args for e in nested) + list(others) # a+0 -> a args = [i for i in args if i != 0] # Reorder for homogeneity with pure SymPy types _addsort(args) return super().__new__(cls, *args, **kwargs)
def __make_tfunc(self, name, iet, root, threads): # Create the SharedData required = derive_parameters(iet) known = (root.parameters + tuple(i for i in required if i.is_Array and i._mem_shared)) parameters, dynamic_parameters = split(required, lambda i: i in known) sdata = SharedData(name=self.sregistry.make_name(prefix='sdata'), nthreads_std=threads.size, fields=dynamic_parameters) parameters.append(sdata) # Prepend the unwinded SharedData fields, available upon thread activation preactions = [ DummyExpr(i, FieldFromPointer(i.name, sdata.symbolic_base)) for i in dynamic_parameters ] preactions.append( DummyExpr(sdata.symbolic_id, FieldFromPointer(sdata._field_id, sdata.symbolic_base))) # Append the flag reset postactions = [ List(body=[ BlankLine, DummyExpr( FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 1) ]) ] iet = List(body=preactions + [iet] + postactions) # Append the flag reset # The thread has work to do when it receives the signal that all locks have # been set to 0 by the main thread iet = Conditional( CondEq(FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 2), iet) # The thread keeps spinning until the alive flag is set to 0 by the main thread iet = While( CondNe(FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 0), iet) return Callable(name, iet, 'void', parameters, 'static'), sdata
def _extract(self, exprs, context, n): extracted = super()._extract(exprs, context, n).extracted rule = lambda e: any(a in extracted for a in e.args) mapper = Uxmapper() for e in exprs: for i in search(e, rule, 'all', 'dfs'): if not i.is_commutative: continue key = lambda a: a in extracted terms, others = split(i.args, key) mapper.add(i, self._make_symbol, terms) return mapper
def diff_parameters(iet, root, indirectly_provided=None): """ Derive the parameters of a sub-IET, `iet`, within a Callable, `root`, and split them into two groups: * the "read-only" parameters, and * the "dynamic" parameters, whose value changes at some point in `root`. The `indirectly_provided` are the parameters that are provided indirectly to `iet`, for example via a composite type (e.g., a C struct). """ required = derive_parameters(iet) required = [i for i in required if i not in as_tuple(indirectly_provided)] known = set(root.parameters) | set(i for i in required if i.is_Array) parameters, dynamic_parameters = split(required, lambda i: i in known) return required, parameters, dynamic_parameters
def extract(cls, n, context, min_cost, cluster, sregistry): make = lambda: Scalar(name=sregistry.make_name(), dtype=cluster.dtype ).indexify() # The `depth` determines "how big" the extracted sum-of-products will be. # We observe that in typical FD codes: # add(mul, mul, ...) -> stems from first order derivative # add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative # To search the muls in the former case, we need `depth=0`; to search the outer # muls in the latter case, we need `depth=2` depth = n exclude = { i.source.indexed for i in cluster.scope.d_flow.independent() } rule0 = lambda e: not e.free_symbols & exclude rule1 = lambda e: e.is_Mul and q_terminalop(e, depth) rule = lambda e: rule0(e) and rule1(e) extracted = OrderedDict() mapper = {} for e in cluster.exprs: for i in search(e, rule, 'all', 'bfs_first_hit'): if i in mapper: continue # Separate numbers and Functions, as they could be a derivative coeff terms, others = split(i.args, lambda a: a.is_Add) if terms: k = i.func(*terms) try: symbol, _ = extracted[k] except KeyError: symbol, _ = extracted.setdefault(k, (make(), e)) mapper[i] = i.func(symbol, *others) if mapper: extracted = [e.func(v, k) for k, (v, e) in extracted.items()] processed = [uxreplace(e, mapper) for e in cluster.exprs] return extracted + processed, extracted else: return cluster.exprs, []
def _generate(self, exprs, exclude): # E.g., extract `sin(x)` and `sqrt(x)` from `a*sin(x)*sqrt(x)` rule = lambda e: e.is_Function or (e.is_Pow and e.exp.is_Number and 0 < e.exp < 1) cbk_search = lambda e: search(e, rule, 'all', 'bfs_first_hit') basextr = self._do_generate(exprs, exclude, cbk_search) if not basextr: return yield basextr # E.g., extract `sin(x)*cos(x)` from `a*sin(x)*cos(x)` def cbk_search(expr): found, others = split(expr.args, lambda a: a in basextr) ret = [expr] if found else [] for a in others: ret.extend(cbk_search(a)) return ret cbk_compose = lambda e: split(e.args, lambda a: a in basextr)[0] yield self._do_generate(exprs, exclude, cbk_search, cbk_compose)
def _(expr): args = [factorize_derivatives(a) for a in expr.args] derivs, others = split(args, lambda a: isinstance(a, sympy.Derivative)) if not derivs: return expr # Map by type of derivative # Note: `D0(a) + D1(b) == D(a + b)` <=> `D0` and `D1`'s metadata match, # i.e. they are the same type of derivative mapper = as_mapper(derivs, lambda i: i._metadata) if len(mapper) == len(derivs): return expr args = list(others) for v in mapper.values(): c = v[0] if len(v) == 1: args.append(c) else: args.append(c._new_from_self(expr=expr.func(*[i.expr for i in v]))) expr = expr.func(*args) return expr
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args = ReducerMap(args.reduce_all()) except ValueError: raise ValueError( "Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError( "Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = args.reduce_all() # All DiscreteFunctions should be defined on the same Grid grids = {getattr(p, 'grid', None) for p in self.input} - {None} if len(grids) > 1 and configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() except KeyError: grid = None # Process Dimensions (derived go after as they might need/affect their parents) derived, main = split(self.dimensions, lambda i: i.is_Derived) for d in main: args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) for d in derived: args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) # Process Objects (which may need some `args`) for o in self.objects: args.update(o._arg_values(args, **kwargs)) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_as_ctype(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_as_ctype` args.update(p._arg_as_ctype(args, alias=p)) # Add in the profiler argument args[self._profiler.name] = self._profiler.timer.reset() # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly args = self._autotune( args, kwargs.pop('autotune', configuration['autotuning'])) # Check all user-provided keywords are known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) return args
def collect(extracted, ispace, min_storage): """ Find groups of aliasing expressions. We shall introduce the following (loose) terminology: * A ``terminal`` is the leaf of a mathematical operation. Terminals can be numbers (n), literals (l), or Indexeds (I). * ``R`` is the relaxation operator := ``R(n) = n``, ``R(l) = l``, ``R(I) = J``, where ``J`` has the same base as ``I`` but with all offsets stripped away. For example, ``R(a[i+2,j-1]) = a[i,j]``. * A ``relaxed expression`` is an expression in which all of the terminals are relaxed. Now we define the concept of aliasing. We say that an expression A aliases an expression B if: * ``R(A) == R(B)`` * all pairwise Indexeds in A and B access memory locations at a fixed constant distance along each Dimension. For example, consider the following expressions: * a[i+1] + b[i+1] * a[i+1] + b[j+1] * a[i] + c[i] * a[i+2] - b[i+2] * a[i+2] + b[i] * a[i-1] + b[i-1] Out of the expressions above, the following alias to `a[i] + b[i]`: * a[i+1] + b[i+1] : same operands and operations, distance along i: 1 * a[i-1] + b[i-1] : same operands and operations, distance along i: -1 Whereas the following do not: * a[i+1] + b[j+1] : because at least one index differs * a[i] + c[i] : because at least one of the operands differs * a[i+2] - b[i+2] : because at least one operation differs * a[i+2] + b[i] : because the distances along ``i`` differ (+2 and +0) """ # Find the potential aliases found = [] for expr in extracted: assert not expr.is_Equality indexeds = retrieve_indexed(expr) bases = [] offsets = [] for i in indexeds: ii = IterationInstance(i) if ii.is_irregular: break base = [] offset = [] for e, ai in zip(ii, ii.aindices): if q_constant(e): base.append(e) else: base.append(ai) offset.append((ai, e - ai)) bases.append(tuple(base)) offsets.append(LabeledVector(offset)) if not indexeds or len(bases) == len(indexeds): found.append(Candidate(expr, ispace, indexeds, bases, offsets)) # Create groups of aliasing expressions mapper = OrderedDict() unseen = list(found) while unseen: c = unseen.pop(0) group = [c] for u in list(unseen): # Is the arithmetic structure of `c` and `u` equivalent ? if not compare_ops(c.expr, u.expr): continue # Is `c` translated w.r.t. `u` ? if not c.translated(u): continue group.append(u) unseen.remove(u) group = Group(group) if min_storage: k = group.dimensions_translated else: k = group.dimensions mapper.setdefault(k, []).append(group) aliases = AliasMapper() queue = list(mapper.values()) while queue: groups = queue.pop(0) while groups: # For each Dimension, determine the Minimum Intervals (MI) spanning # all of the Groups diameters # Example: x's largest_diameter=2 => [x[-2,0], x[-1,1], x[0,2]] # Note: Groups that cannot evaluate their diameter are dropped mapper = defaultdict(int) for g in list(groups): try: mapper.update( {d: max(mapper[d], v) for d, v in g.diameter.items()}) except ValueError: groups.remove(g) intervalss = { d: make_rotations_table(d, v) for d, v in mapper.items() } # For each Group, find a rotation that is compatible with a given MI mapper = {} for d, intervals in intervalss.items(): # Not all groups may access all dimensions # Example: `d=t` and groups=[Group(...[t, x]...), Group(...[time, x]...)] impacted = [g for g in groups if d in g.dimensions] for interval in list(intervals): found = { g: g.find_rotation_distance(d, interval) for g in impacted } if all(distance is not None for distance in found.values()): # `interval` is OK ! mapper[interval] = found break if len(mapper) == len(intervalss): break # Try again with fewer groups # Heuristic: first try retaining the larger ones smallest = len(min(groups, key=len)) fallback = groups groups, remainder = split(groups, lambda g: len(g) > smallest) if groups: queue.append(remainder) elif len(remainder) > 1: # No luck with the heuristic, e.g. there are two groups # and both have same `len` queue.append(fallback[1:]) groups = [fallback.pop(0)] else: break for g in groups: c = g.pivot distances = defaultdict(int, [(i.dim, v.get(g)) for i, v in mapper.items()]) # Create the basis alias offsets = [ LabeledVector([(l, v[l] + distances[l]) for l in v.labels]) for v in c.offsets ] subs = { i: i.function[[l + v.fromlabel(l, 0) for l in b]] for i, b, v in zip(c.indexeds, c.bases, offsets) } alias = uxreplace(c.expr, subs) # All aliased expressions aliaseds = [extracted[i.expr] for i in g] # Distance of each aliased expression from the basis alias distances = [] for i in g: distance = [o.distance(v) for o, v in zip(i.offsets, offsets)] distance = [(d, set(v)) for d, v in LabeledVector.transpose(*distance)] distances.append( LabeledVector([(d, v.pop()) for d, v in distance])) aliases.add(alias, list(mapper), aliaseds, distances) return aliases
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args = ReducerMap(args.reduce_all()) except ValueError: raise ValueError( "Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError( "Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = args.reduce_all() # All DiscreteFunctions should be defined on the same Grid grids = {getattr(kwargs[p.name], 'grid', None) for p in overrides} grids.update({getattr(p, 'grid', None) for p in defaults}) grids.discard(None) if len(grids) > 1 and configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() args.update(grid._arg_values(**kwargs)) except KeyError: grid = None # Process Dimensions # A topological sorting is used so that derived Dimensions are processed after # their parents (note that a leaf Dimension can have an arbitrary long list of # ancestors) dag = DAG(self.dimensions, [(i, i.parent) for i in self.dimensions if i.is_Derived]) for d in reversed(dag.topological_sort()): args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) # Process Objects (which may need some `args`) for o in self.objects: args.update(o._arg_values(args, grid=grid, **kwargs)) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) for d in self.dimensions: if d.is_Derived: d._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_as_ctype(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_as_ctype` args.update(p._arg_as_ctype(args, alias=p)) # Execute autotuning and adjust arguments accordingly args = self._autotune( args, kwargs.pop('autotune', configuration['autotuning'])) # Check all user-provided keywords are known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) # Attach `grid` to the arguments map args = ArgumentsMap(grid, **args) return args
def relax_incr_dimensions(iet, **kwargs): """ Recast Iterations over IncrDimensions as ElementalFunctions; insert ElementalCalls to iterate over the "main" and "remainder" regions induced by the IncrDimensions. """ sregistry = kwargs['sregistry'] efuncs = [] mapper = {} for tree in retrieve_iteration_tree(iet): iterations = [i for i in tree if i.dim.is_Incr] if not iterations: continue root = iterations[0] if root in mapper: continue outer, inner = split(iterations, lambda i: not i.dim.parent.is_Incr) # Compute the iteration ranges ranges = [] for i in outer: maxb = i.symbolic_max - (i.symbolic_size % i.dim.step) ranges.append(((i.symbolic_min, maxb, i.dim.step), (maxb + 1, i.symbolic_max, i.symbolic_max - maxb))) # Remove any offsets # E.g., `x = x_m + 2 to x_M - 2` --> `x = x_m to x_M` outer = [i._rebuild(limits=(i.dim.root.symbolic_min, i.dim.root.symbolic_max, i.step)) for i in outer] # Create the ElementalFunction name = sregistry.make_name(prefix="bf") body = compose_nodes(outer) dynamic_parameters = flatten((i.symbolic_bounds, i.step) for i in outer) dynamic_parameters.extend([i.step for i in inner if not is_integer(i.step)]) efunc = make_efunc(name, body, dynamic_parameters) efuncs.append(efunc) # Create the ElementalCalls calls = [] for p in product(*ranges): dynamic_args_mapper = {} for i, (m, M, b) in zip(outer, p): dynamic_args_mapper[i.symbolic_min] = m dynamic_args_mapper[i.symbolic_max] = M dynamic_args_mapper[i.step] = b for j in inner: if j.dim.root is i.dim.root and not is_integer(j.step): value = j.step if b is i.step else b dynamic_args_mapper[j.step] = (value,) calls.append(efunc.make_call(dynamic_args_mapper)) mapper[root] = List(body=calls) iet = Transformer(mapper).visit(iet) return iet, {'efuncs': efuncs}
def cbk_search(expr): found, others = split(expr.args, lambda a: a in basextr) ret = [expr] if found else [] for a in others: ret.extend(cbk_search(a)) return ret
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args = ReducerMap(args.reduce_all()) except ValueError: raise ValueError("Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError("Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = args.reduce_all() # All DiscreteFunctions should be defined on the same Grid grids = {getattr(p, 'grid', None) for p in self.input} - {None} if len(grids) > 1 and configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() except KeyError: grid = None # Process Dimensions (derived go after as they might need/affect their parents) derived, main = split(self.dimensions, lambda i: i.is_Derived) for d in main: args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) for d in derived: args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) # Process Objects (which may need some `args`) for o in self.objects: args.update(o._arg_values(args, **kwargs)) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_as_ctype(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_as_ctype` args.update(p._arg_as_ctype(args, alias=p)) # Add in the profiler argument args[self._profiler.name] = self._profiler.timer.reset() # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly args = self._autotune(args, kwargs.pop('autotune', configuration['autotuning'])) # Check all user-provided keywords are known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) return args
def _prepare_arguments(self, autotune=None, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ # Sanity check -- all user-provided keywords must be known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = kwargs['args'] = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args.reduce_inplace() except ValueError: raise ValueError( "Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError( "Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = kwargs['args'] = args.reduce_all() # DiscreteFunctions may be created from CartesianDiscretizations, which in # turn could be Grids or SubDomains. Both may provide arguments discretizations = { getattr(kwargs[p.name], 'grid', None) for p in overrides } discretizations.update({getattr(p, 'grid', None) for p in defaults}) discretizations.discard(None) for i in discretizations: args.update(i._arg_values(**kwargs)) # There can only be one Grid from which DiscreteFunctions were created grids = {i for i in discretizations if isinstance(i, Grid)} if len(grids) > 1: # We loosely tolerate multiple Grids for backwards compatibility # with spacial subsampling, which should be revisited however. And # With MPI it would definitely break! if configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() except KeyError: grid = None # An ArgumentsMap carries additional metadata that may be used by # the subsequent phases of the arguments processing args = kwargs['args'] = ArgumentsMap(args, grid, self._allocator, self._platform) # Process Dimensions # A topological sorting is used so that derived Dimensions are processed after # their parents (note that a leaf Dimension can have an arbitrary long list of # ancestors) dag = DAG(self.dimensions, [(i, i.parent) for i in self.dimensions if i.is_Derived]) for d in reversed(dag.topological_sort()): args.update(d._arg_values(self._dspace[d], grid, **kwargs)) # Process Objects for o in self.objects: args.update(o._arg_values(grid=grid, **kwargs)) # In some "lower-level" Operators implementing a random piece of C, such as # one or more calls to third-party library functions, there could still be # at this point unprocessed arguments (e.g., scalars) kwargs.pop('args') args.update({k: v for k, v in kwargs.items() if k not in args}) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) for d in self.dimensions: if d.is_Derived: d._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_finalize(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_finalize` args.update(p._arg_finalize(args, alias=p)) # Execute autotuning and adjust arguments accordingly args.update( self._autotune(args, autotune or configuration['autotuning'])) return args
def detect_accesses(exprs): """ Return a mapper `M : F -> S`, where F are Functions appearing in `exprs` and S are Stencils. `M[f]` represents all data accesses to `f` within `exprs`. Also map `M[None]` to all Dimensions used in `exprs` as plain symbols, rather than as array indices. """ # Compute M : F -> S mapper = defaultdict(Stencil) for e in retrieve_indexed(exprs, deep=True): f = e.function for a, d0 in zip(e.indices, f.dimensions): if isinstance(a, ModuloDimension) and a.parent.is_Stepping: # Explicitly unfold SteppingDimensions-induced ModuloDimensions mapper[f][a.root].update([a.offset - a.root]) elif isinstance(a, Dimension): mapper[f][a].update([0]) elif a.is_Add: dims = {i for i in a.free_symbols if isinstance(i, Dimension)} if not dims: continue elif len(dims) > 1: # There are two reasons we may end up here, 1) indirect # accesses (e.g., a[b[x, y] + 1, y]) or 2) as a result of # skewing-based optimizations, such as time skewing (e.g., # `x - time + 1`) or CIRE rotation (e.g., `x + xx - 4`) d, others = split(dims, lambda i: d0 in i._defines) if any(i.is_Indexed for i in a.args) or len(d) != 1: # Case 1) -- with indirect accesses there's not much we can infer continue else: # Case 2) d, = d _, o = split(others, lambda i: i.is_Custom) off = sum(i for i in a.args if i.is_integer or i.free_symbols & o) else: d, = dims # At this point, typically, the offset will be an integer. # In some cases though it could be an expression, e.g. # `db0 + time_m - 1` (from CustomDimensions due to buffering) # or `x + o_x` (from MPI routines) or `time - ns` (from # guarded accesses to TimeFunctions) or ... In all these cases, # what really matters is the integer part of the offset, as # any other symbols may resolve to zero at runtime, which is # the base case scenario we fallback to off = sum(i for i in a.args if i.is_integer) # NOTE: `d in a.args` is too restrictive because of guarded # accesses such as `time / factor - 1` assert d in a.free_symbols if (d.is_Custom or d.is_Default) and d.symbolic_size.is_integer: # Explicitly unfold Default and CustomDimensions mapper[f][d].update(range(off, d.symbolic_size + off)) else: mapper[f][d].add(off) # Compute M[None] other_dims = set() for e in as_tuple(exprs): other_dims.update(i for i in e.free_symbols if isinstance(i, Dimension)) other_dims.update(e.implicit_dims) mapper[None] = Stencil([(i, 0) for i in other_dims]) return mapper
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ # Process data-carriers (first overrides, then fill up with whatever is needed) args = ReducerMap() args.update( [p._arg_values(**kwargs) for p in self.input if p.name in kwargs]) args.update( [p._arg_values() for p in self.input if p.name not in args]) args = args.reduce_all() # All TensorFunctions should be defined on the same Grid functions = [ kwargs.get(p, p) for p in self.input if p.is_TensorFunction ] mapper = ReducerMap([('grid', i.grid) for i in functions if i.grid]) try: grid = mapper.unique('grid') except (KeyError, ValueError): if mapper and configuration['mpi']: raise RuntimeError("Multiple `Grid`s found before `apply`") grid = None # Process dimensions (derived go after as they might need/affect their parents) derived, main = split(self.dimensions, lambda i: i.is_Derived) for p in main: args.update(p._arg_values(args, self._dspace[p], grid, **kwargs)) for p in derived: args.update(p._arg_values(args, self._dspace[p], grid, **kwargs)) # Sanity check for p in self.input: p._arg_check(args, self._dspace[p]) # Derive additional values for DLE arguments # TODO: This is not pretty, but it works for now. Ideally, the # DLE arguments would be massaged into the IET so as to comply # with the rest of the argument derivation procedure. for arg in self._dle_args: dim = arg.argument osize = (1 + arg.original_dim.symbolic_end - arg.original_dim.symbolic_start).subs(args) if arg.value is None: args[dim.symbolic_size.name] = osize elif isinstance(arg.value, int): args[dim.symbolic_size.name] = arg.value else: args[dim.symbolic_size.name] = arg.value(osize) # Add in the profiler argument args[self.profiler.name] = self.profiler.timer.reset() # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly if kwargs.pop('autotune', configuration['autotuning'].level): args = self._autotune(args) # Check all user-provided keywords are known to the Operator for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError( "Unrecognized argument %s=%s passed to `apply`" % (k, v)) return args