def map_product(self, expr, derivatives): from grudge.symbolic.tools import is_scalar from pytools import partition scalars, nonscalars = partition(is_scalar, expr.children) if len(nonscalars) != 1: return DerivativeJoiner()(expr) else: from pymbolic import flattened_product factor = flattened_product(scalars) nonscalar, = nonscalars sub_derivatives = {} nonscalar = self.rec(nonscalar, sub_derivatives) def do_map(expr): if is_scalar(expr): return expr else: return self.rec(expr, derivatives) for operator, operands in sub_derivatives.items(): for operand in operands: derivatives.setdefault(operator, []).append(factor * operand) return factor * nonscalar
def __init__(self, vec_expr_info_list, result_dtype_getter): self.result_dtype_getter = result_dtype_getter from hedge.optemplate.primitives import ScalarParameter from hedge.optemplate.mappers import (DependencyMapper, GeometricFactorCollector) from operator import or_ from pymbolic import var dep_mapper = DependencyMapper(include_subscripts=True, include_lookups=True, include_calls="descend_args") gfc = GeometricFactorCollector() # gather all dependencies deps = (reduce(or_, (dep_mapper(vei.expr) for vei in vec_expr_info_list)) | reduce(or_, (gfc(vei.expr) for vei in vec_expr_info_list))) # We're compiling a batch of vector expressions, some of which may # depend on results generated in this same batch. These dependencies # are also captured above, but they're not genuine external dependencies. # Hence we remove them here: deps -= set(var(vei.name) for vei in vec_expr_info_list) from pytools import partition def is_vector_pred(dep): return not isinstance(dep, ScalarParameter) vdeps, sdeps = partition(is_vector_pred, deps) vdeps = [(str(vdep), vdep) for vdep in vdeps] sdeps = [(str(sdep), sdep) for sdep in sdeps] vdeps.sort() sdeps.sort() self.vector_deps = [vdep for key, vdep in vdeps] self.scalar_deps = [sdep for key, sdep in sdeps] self.vector_dep_names = [ "hedge_v%d" % i for i in range(len(self.vector_deps)) ] self.scalar_dep_names = [ "hedge_s%d" % i for i in range(len(self.scalar_deps)) ] self.constant_dtypes = [ numpy.array(const).dtype for vei in vec_expr_info_list for const in ConstantGatherMapper()(vei.expr) ] var_i = var("i") subst_map = dict( list( zip(self.vector_deps, [var(vecname)[var_i] for vecname in self.vector_dep_names])) + list( zip(self.scalar_deps, [var(scaname) for scaname in self.scalar_dep_names])) + [(var(vei.name), var(vei.name)[var_i]) for vei in vec_expr_info_list if not vei.do_not_return]) def subst_func(expr): try: return subst_map[expr] except KeyError: return None self.vec_expr_info_list = [ vei.copy(expr=DefaultingSubstitutionMapper(subst_func)(vei.expr)) for vei in vec_expr_info_list ] self.result_vec_expr_info_list = [ vei for vei in vec_expr_info_list if not vei.do_not_return ]
def aggregate_assignments(inf_mapper, instructions, result, max_vectors_in_batch_expr): from pymbolic.primitives import Variable function_registry = inf_mapper.function_registry # {{{ aggregation helpers def get_complete_origins_set(insn, skip_levels=0): try: return insn_to_origins_cache[insn] except KeyError: pass if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels-1) insn_to_origins_cache[insn] = result return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = {Variable(assignee) for assignee in insn.get_assignees()} var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - {Variable(name) for name in names} return Assign( names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, priority=max(ass_1.priority, ass_2.priority)) # }}} # {{{ main aggregation pass insn_to_origins_cache = {} origins_map = { assignee: insn for insn in instructions for assignee in insn.get_assignees()} from pytools import partition from grudge.symbolic.primitives import DTAG_SCALAR unprocessed_assigns, other_insns = partition( lambda insn: ( isinstance(insn, Assign) and not isinstance(insn, ToDiscretizationScopedAssign) and not isinstance(insn, FromDiscretizationScopedAssign) and not is_external_call(insn.exprs[0], function_registry) and not any( inf_mapper.infer_for_name(n).domain_tag == DTAG_SCALAR for n in insn.names)), instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from grudge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop(i)) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set( my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies( each_vector=True) | other_assign.get_dependencies( each_vector=True)) if (new_assignee_count + new_dep_count > max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = { expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()} if isinstance(result, np.ndarray) and result.dtype.char == "O": externally_used_names |= {expr for expr in result} else: externally_used_names |= {result} def schedule_and_finalize_assignment(ass): dep_mapper = _make_dep_mapper(include_subscripts=False) names_exprs = list(zip(ass.names, ass.exprs)) my_assignees = {name for name, expr in names_exprs} names_exprs_deps = [ (name, expr, {dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)} & my_assignees) for name, expr in names_exprs] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return Assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[Variable(name) not in externally_used_names for name, expr in ordered_names_exprs], priority=ass.priority) return [schedule_and_finalize_assignment(ass) for ass in processed_assigns] + other_insns
def __init__(self, vec_expr_info_list, result_dtype_getter): self.result_dtype_getter = result_dtype_getter from hedge.optemplate.primitives import ScalarParameter from hedge.optemplate.mappers import ( DependencyMapper, GeometricFactorCollector) from operator import or_ from pymbolic import var dep_mapper = DependencyMapper( include_subscripts=True, include_lookups=True, include_calls="descend_args") gfc = GeometricFactorCollector() # gather all dependencies deps = (reduce(or_, (dep_mapper(vei.expr) for vei in vec_expr_info_list)) | reduce(or_, (gfc(vei.expr) for vei in vec_expr_info_list))) # We're compiling a batch of vector expressions, some of which may # depend on results generated in this same batch. These dependencies # are also captured above, but they're not genuine external dependencies. # Hence we remove them here: deps -= set(var(vei.name) for vei in vec_expr_info_list) from pytools import partition def is_vector_pred(dep): return not isinstance(dep, ScalarParameter) vdeps, sdeps = partition(is_vector_pred, deps) vdeps = [(str(vdep), vdep) for vdep in vdeps] sdeps = [(str(sdep), sdep) for sdep in sdeps] vdeps.sort() sdeps.sort() self.vector_deps = [vdep for key, vdep in vdeps] self.scalar_deps = [sdep for key, sdep in sdeps] self.vector_dep_names = ["hedge_v%d" % i for i in range(len(self.vector_deps))] self.scalar_dep_names = ["hedge_s%d" % i for i in range(len(self.scalar_deps))] self.constant_dtypes = [ numpy.array(const).dtype for vei in vec_expr_info_list for const in ConstantGatherMapper()(vei.expr)] var_i = var("i") subst_map = dict( list(zip(self.vector_deps, [var(vecname).index(var_i) for vecname in self.vector_dep_names])) +list(zip(self.scalar_deps, [var(scaname) for scaname in self.scalar_dep_names])) +[(var(vei.name), var(vei.name).index(var_i)) for vei in vec_expr_info_list if not vei.do_not_return]) def subst_func(expr): try: return subst_map[expr] except KeyError: return None self.vec_expr_info_list = [ vei.copy(expr=DefaultingSubstitutionMapper(subst_func)(vei.expr)) for vei in vec_expr_info_list] self.result_vec_expr_info_list = [ vei for vei in vec_expr_info_list if not vei.do_not_return]
def aggregate_assignments(self, instructions, result): from pymbolic.primitives import Variable # aggregation helpers ------------------------------------------------- def get_complete_origins_set(insn, skip_levels=0): if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels-1) return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = set(Variable(assignee) for assignee in insn.get_assignees()) var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - set(Variable(name) for name in names) return Assign( names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, dep_mapper_factory=self.dep_mapper_factory, priority=max(ass_1.priority, ass_2.priority)) # main aggregation pass ----------------------------------------------- origins_map = dict( (assignee, insn) for insn in instructions for assignee in insn.get_assignees()) from pytools import partition unprocessed_assigns, other_insns = partition( lambda insn: isinstance(insn, Assign), instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from pytools import any from hedge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop()) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set( my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if self.max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies( each_vector=True) | other_assign.get_dependencies( each_vector=True)) if (new_assignee_count + new_dep_count \ > self.max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = set( expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()) from hedge.tools import is_obj_array if is_obj_array(result): externally_used_names |= set(expr for expr in result) else: externally_used_names |= set([result]) def schedule_and_finalize_assignment(ass): dep_mapper = self.dep_mapper_factory() names_exprs = zip(ass.names, ass.exprs) my_assignees = set(name for name, expr in names_exprs) names_exprs_deps = [ (name, expr, set(dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)) & my_assignees) for name, expr in names_exprs] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return self.finalize_multi_assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[Variable(name) not in externally_used_names for name, expr in ordered_names_exprs], priority=ass.priority) return [schedule_and_finalize_assignment(ass) for ass in processed_assigns] + other_insns
def aggregate_assignments(self, instructions, result): from pymbolic.primitives import Variable # {{{ aggregation helpers def get_complete_origins_set(insn, skip_levels=0): if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels - 1) return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = set( Variable(assignee) for assignee in insn.get_assignees()) var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - set(Variable(name) for name in names) return Assign(names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, dep_mapper_factory=self.dep_mapper_factory, priority=max(ass_1.priority, ass_2.priority)) # }}} # {{{ main aggregation pass origins_map = dict((assignee, insn) for insn in instructions for assignee in insn.get_assignees()) from pytools import partition unprocessed_assigns, other_insns = partition( lambda insn: isinstance(insn, Assign) and not insn. is_scalar_valued, instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from pytools import any from hedge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop()) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set(my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if self.max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies(each_vector=True) | other_assign.get_dependencies(each_vector=True)) if (new_assignee_count + new_dep_count > self.max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = set(expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()) from hedge.tools import is_obj_array if is_obj_array(result): externally_used_names |= set(expr for expr in result) else: externally_used_names |= set([result]) def schedule_and_finalize_assignment(ass): dep_mapper = self.dep_mapper_factory() names_exprs = zip(ass.names, ass.exprs) my_assignees = set(name for name, expr in names_exprs) names_exprs_deps = [ (name, expr, set(dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)) & my_assignees) for name, expr in names_exprs ] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return self.finalize_multi_assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[ Variable(name) not in externally_used_names for name, expr in ordered_names_exprs ], priority=ass.priority) return [ schedule_and_finalize_assignment(ass) for ass in processed_assigns ] + other_insns