def diff_kernel(kernel, diff_outputs, by, diff_iname_prefix="diff_i", batch_axes_in_by=frozenset(), copy_outputs=frozenset()): """ :arg batch_axes_in_by: a :class:`set` of axis indices in the variable named *by* that are not part of the differentiation. :return: a string containing the name of a new variable holding the derivative of *var_name* by the desired *diff_context.by_name*, or *None* if no dependency exists. """ assert isinstance(kernel, LoopKernel) from loopy.kernel.creation import apply_single_writer_depencency_heuristic kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=True) if isinstance(diff_outputs, str): diff_outputs = [ dout.strip() for dout in diff_outputs.split(",") if dout.strip() ] by_arg = kernel.arg_dict[by] additional_shape = by_arg.shape var_name_gen = kernel.get_var_name_generator() # {{{ differentiate instructions diff_context = DifferentiationContext(kernel, var_name_gen, by, diff_iname_prefix=diff_iname_prefix, additional_shape=additional_shape) result = {} for dout in diff_outputs: result = diff_context.get_diff_var(dout) for cout in copy_outputs: diff_context.import_output_var(cout) # }}} return diff_context.get_new_kernel(), result
def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i", batch_axes_in_by=frozenset(), copy_outputs=set()): """ :arg batch_axes_in_by: a :class:`set` of axis indices in the variable named *by* that are not part of the differentiation. :return: a string containing the name of a new variable holding the derivative of *var_name* by the desired *diff_context.by_name*, or *None* if no dependency exists. """ from loopy.kernel.creation import apply_single_writer_depencency_heuristic knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=True) if isinstance(diff_outputs, str): diff_outputs = [ dout.strip() for dout in diff_outputs.split(",") if dout.strip()] by_arg = knl.arg_dict[by] additional_shape = by_arg.shape var_name_gen = knl.get_var_name_generator() # {{{ differentiate instructions diff_context = DifferentiationContext( knl, var_name_gen, by, diff_iname_prefix=diff_iname_prefix, additional_shape=additional_shape) result = {} for dout in diff_outputs: result = diff_context.get_diff_var(dout) for cout in copy_outputs: diff_context.import_output_var(cout) # }}} return diff_context.get_new_kernel(), result
def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, force_retain_argument=False): """Extract an assignment (to a temporary variable or an argument) as a :ref:`substitution-rule`. The temporary may be an array, in which case the array indices will become arguments to the substitution rule. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. :arg force_retain_argument: If True and if *lhs_name* is an argument, it is kept even if it is no longer referenced. This operation will change all usage sites of *lhs_name* matched by *within*. If there are further usage sites of *lhs_name*, then the original assignment to *lhs_name* as well as the temporary variable is left in place. """ if isinstance(extra_arguments, str): extra_arguments = tuple(s.strip() for s in extra_arguments.split(",")) # {{{ establish the relevant definition of lhs_name for each usage site dep_kernel = expand_subst(kernel) from loopy.kernel.creation import apply_single_writer_depencency_heuristic dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel) id_to_insn = dep_kernel.id_to_insn def get_relevant_definition_insn_id(usage_insn_id): insn = id_to_insn[usage_insn_id] def_id = set() for dep_id in insn.depends_on: dep_insn = id_to_insn[dep_id] if lhs_name in dep_insn.write_dependency_names(): if lhs_name in dep_insn.read_dependency_names(): raise LoopyError("instruction '%s' both reads *and* " "writes '%s'--cannot transcribe to substitution " "rule" % (dep_id, lhs_name)) def_id.add(dep_id) else: rec_result = get_relevant_definition_insn_id(dep_id) if rec_result is not None: def_id.add(rec_result) if len(def_id) > 1: raise LoopyError("more than one write to '%s' found in " "depdendencies of '%s'--definition cannot be resolved " "(writer instructions ids: %s)" % (lhs_name, usage_insn_id, ", ".join(def_id))) if not def_id: return None else: def_id, = def_id return def_id usage_to_definition = {} for insn in dep_kernel.instructions: if lhs_name not in insn.read_dependency_names(): continue def_id = get_relevant_definition_insn_id(insn.id) if def_id is None: raise LoopyError("no write to '%s' found in dependency tree " "of '%s'--definition cannot be resolved" % (lhs_name, insn.id)) usage_to_definition[insn.id] = def_id definition_insn_ids = set() for insn in kernel.instructions: if lhs_name in insn.write_dependency_names(): definition_insn_ids.add(insn.id) # }}} if not definition_insn_ids: raise LoopyError("no assignments to variable '%s' found" % lhs_name) from loopy.match import parse_stack_match within = parse_stack_match(within) rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) tts = AssignmentToSubstChanger(rule_mapping_context, lhs_name, definition_insn_ids, usage_to_definition, extra_arguments, within) kernel = rule_mapping_context.finish_kernel(tts.map_kernel(kernel)) from loopy.kernel.data import SubstitutionRule # {{{ create new substitution rules new_substs = kernel.substitutions.copy() for def_id, subst_name in six.iteritems(tts.definition_insn_id_to_subst_name): def_insn = kernel.id_to_insn[def_id] from loopy.kernel.data import Assignment assert isinstance(def_insn, Assignment) from pymbolic.primitives import Variable, Subscript if isinstance(def_insn.assignee, Subscript): indices = def_insn.assignee.index_tuple elif isinstance(def_insn.assignee, Variable): indices = () else: raise LoopyError( "Unrecognized LHS type: %s" % type(def_insn.assignee).__name__) arguments = [] for i in indices: if not isinstance(i, Variable): raise LoopyError("In defining instruction '%s': " "asignee index '%s' is not a plain variable. " "Perhaps use loopy.affine_map_inames() " "to perform substitution." % (def_id, i)) arguments.append(i.name) new_substs[subst_name] = SubstitutionRule( name=subst_name, arguments=tuple(arguments) + extra_arguments, expression=def_insn.expression) # }}} # {{{ delete temporary variable if possible # (copied below if modified) new_temp_vars = kernel.temporary_variables new_args = kernel.args if lhs_name in kernel.temporary_variables: if not any(six.itervalues(tts.saw_unmatched_usage_sites)): # All usage sites matched--they're now substitution rules. # We can get rid of the variable. new_temp_vars = new_temp_vars.copy() del new_temp_vars[lhs_name] if lhs_name in kernel.arg_dict and not force_retain_argument: if not any(six.itervalues(tts.saw_unmatched_usage_sites)): # All usage sites matched--they're now substitution rules. # We can get rid of the argument new_args = new_args[:] for i in range(len(new_args)): if new_args[i].name == lhs_name: del new_args[i] break # }}} import loopy as lp kernel = lp.remove_instructions( kernel, set( insn_id for insn_id, still_used in six.iteritems( tts.saw_unmatched_usage_sites) if not still_used)) return kernel.copy( substitutions=new_substs, temporary_variables=new_temp_vars, args=new_args, )
def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): """Return a string in the `dot <http://graphviz.org/>`_ language depicting dependencies among kernel instructions. """ # make sure all automatically added stuff shows up from loopy.kernel.creation import apply_single_writer_depencency_heuristic kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False) if iname_cluster and not kernel.schedule: try: from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) except RuntimeError as e: iname_cluster = False from warnings import warn warn("error encountered during scheduling for dep graph -- " "cannot perform iname clustering: %s(%s)" % (type(e).__name__, e)) dep_graph = {} lines = [] from loopy.kernel.data import MultiAssignmentBase, CInstruction for insn in kernel.instructions: if isinstance(insn, MultiAssignmentBase): op = "%s <- %s" % (insn.assignees, insn.expression) if len(op) > 200: op = op[:200] + "..." elif isinstance(insn, CInstruction): op = "<C instruction %s>" % insn.id else: op = "<instruction %s>" % insn.id if use_insn_id: insn_label = insn.id tooltip = op else: insn_label = op tooltip = insn.id lines.append("\"%s\" [label=\"%s\",shape=\"box\",tooltip=\"%s\"];" % ( insn.id, repr(insn_label)[1:-1], repr(tooltip)[1:-1], )) for dep in insn.depends_on: dep_graph.setdefault(insn.id, set()).add(dep) # {{{ O(n^3) transitive reduction # first, compute transitive closure by fixed point iteration while True: changed_something = False for insn_1 in dep_graph: for insn_2 in dep_graph.get(insn_1, set()).copy(): for insn_3 in dep_graph.get(insn_2, set()).copy(): if insn_3 not in dep_graph.get(insn_1, set()): changed_something = True dep_graph[insn_1].add(insn_3) if not changed_something: break for insn_1 in dep_graph: for insn_2 in dep_graph.get(insn_1, set()).copy(): for insn_3 in dep_graph.get(insn_2, set()).copy(): if insn_3 in dep_graph.get(insn_1, set()): dep_graph[insn_1].remove(insn_3) # }}} for insn_1 in dep_graph: for insn_2 in dep_graph.get(insn_1, set()): lines.append("%s -> %s" % (insn_2, insn_1)) if iname_cluster: from loopy.schedule import (EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, ReturnFromKernel) for sched_item in kernel.schedule: if isinstance(sched_item, EnterLoop): lines.append("subgraph cluster_%s { label=\"%s\"" % (sched_item.iname, sched_item.iname)) elif isinstance(sched_item, LeaveLoop): lines.append("}") elif isinstance(sched_item, RunInstruction): lines.append(sched_item.insn_id) elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)): pass else: raise LoopyError("schedule item not unterstood: %r" % sched_item) return "digraph %s {\n%s\n}" % (kernel.name, "\n".join(lines))
def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, force_retain_argument=False): """Extract an assignment (to a temporary variable or an argument) as a :ref:`substitution-rule`. The temporary may be an array, in which case the array indices will become arguments to the substitution rule. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. :arg force_retain_argument: If True and if *lhs_name* is an argument, it is kept even if it is no longer referenced. This operation will change all usage sites of *lhs_name* matched by *within*. If there are further usage sites of *lhs_name*, then the original assignment to *lhs_name* as well as the temporary variable is left in place. """ if isinstance(extra_arguments, str): extra_arguments = tuple(s.strip() for s in extra_arguments.split(",")) # {{{ establish the relevant definition of lhs_name for each usage site dep_kernel = expand_subst(kernel) from loopy.kernel.creation import apply_single_writer_depencency_heuristic dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel) id_to_insn = dep_kernel.id_to_insn def get_relevant_definition_insn_id(usage_insn_id): insn = id_to_insn[usage_insn_id] def_id = set() for dep_id in insn.depends_on: dep_insn = id_to_insn[dep_id] if lhs_name in dep_insn.write_dependency_names(): if lhs_name in dep_insn.read_dependency_names(): raise LoopyError( "instruction '%s' both reads *and* " "writes '%s'--cannot transcribe to substitution " "rule" % (dep_id, lhs_name)) def_id.add(dep_id) else: rec_result = get_relevant_definition_insn_id(dep_id) if rec_result is not None: def_id.add(rec_result) if len(def_id) > 1: raise LoopyError( "more than one write to '%s' found in " "depdendencies of '%s'--definition cannot be resolved " "(writer instructions ids: %s)" % (lhs_name, usage_insn_id, ", ".join(def_id))) if not def_id: return None else: def_id, = def_id return def_id usage_to_definition = {} for insn in dep_kernel.instructions: if lhs_name not in insn.read_dependency_names(): continue def_id = get_relevant_definition_insn_id(insn.id) if def_id is None: raise LoopyError("no write to '%s' found in dependency tree " "of '%s'--definition cannot be resolved" % (lhs_name, insn.id)) usage_to_definition[insn.id] = def_id definition_insn_ids = set() for insn in kernel.instructions: if lhs_name in insn.write_dependency_names(): definition_insn_ids.add(insn.id) # }}} if not definition_insn_ids: raise LoopyError("no assignments to variable '%s' found" % lhs_name) from loopy.match import parse_stack_match within = parse_stack_match(within) rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) tts = AssignmentToSubstChanger(rule_mapping_context, lhs_name, definition_insn_ids, usage_to_definition, extra_arguments, within) kernel = rule_mapping_context.finish_kernel(tts.map_kernel(kernel)) from loopy.kernel.data import SubstitutionRule # {{{ create new substitution rules new_substs = kernel.substitutions.copy() for def_id, subst_name in six.iteritems( tts.definition_insn_id_to_subst_name): def_insn = kernel.id_to_insn[def_id] from loopy.kernel.data import Assignment assert isinstance(def_insn, Assignment) from pymbolic.primitives import Variable, Subscript if isinstance(def_insn.assignee, Subscript): indices = def_insn.assignee.index_tuple elif isinstance(def_insn.assignee, Variable): indices = () else: raise LoopyError("Unrecognized LHS type: %s" % type(def_insn.assignee).__name__) arguments = [] for i in indices: if not isinstance(i, Variable): raise LoopyError("In defining instruction '%s': " "asignee index '%s' is not a plain variable. " "Perhaps use loopy.affine_map_inames() " "to perform substitution." % (def_id, i)) arguments.append(i.name) new_substs[subst_name] = SubstitutionRule( name=subst_name, arguments=tuple(arguments) + extra_arguments, expression=def_insn.expression) # }}} # {{{ delete temporary variable if possible # (copied below if modified) new_temp_vars = kernel.temporary_variables new_args = kernel.args if lhs_name in kernel.temporary_variables: if not any(six.itervalues(tts.saw_unmatched_usage_sites)): # All usage sites matched--they're now substitution rules. # We can get rid of the variable. new_temp_vars = new_temp_vars.copy() del new_temp_vars[lhs_name] if lhs_name in kernel.arg_dict and not force_retain_argument: if not any(six.itervalues(tts.saw_unmatched_usage_sites)): # All usage sites matched--they're now substitution rules. # We can get rid of the argument new_args = new_args[:] for i in range(len(new_args)): if new_args[i].name == lhs_name: del new_args[i] break # }}} import loopy as lp kernel = lp.remove_instructions( kernel, set(insn_id for insn_id, still_used in six.iteritems( tts.saw_unmatched_usage_sites) if not still_used)) return kernel.copy( substitutions=new_substs, temporary_variables=new_temp_vars, args=new_args, )
def preprocess_kernel(kernel, device=None): if device is not None: from warnings import warn warn("passing 'device' to preprocess_kernel() is deprecated", DeprecationWarning, stacklevel=2) from loopy.kernel import kernel_state if kernel.state >= kernel_state.PREPROCESSED: return kernel # {{{ cache retrieval from loopy import CACHING_ENABLED if CACHING_ENABLED: input_kernel = kernel try: result = preprocess_cache[kernel] logger.debug("%s: preprocess cache hit" % kernel.name) return result except KeyError: pass # }}} logger.info("%s: preprocess start" % kernel.name) from loopy.check import check_identifiers_in_subst_rules check_identifiers_in_subst_rules(kernel) # {{{ check that there are no l.auto-tagged inames from loopy.kernel.data import AutoLocalIndexTagBase for iname, tag in six.iteritems(kernel.iname_to_tag): if (isinstance(tag, AutoLocalIndexTagBase) and iname in kernel.all_inames()): raise LoopyError("kernel with automatically-assigned " "local axes passed to preprocessing") # }}} from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) # Ordering restriction: # Type inference and reduction iname uniqueness don't handle substitutions. # Get them out of the way. kernel = infer_unknown_types(kernel, expect_completion=False) check_for_writes_to_predicates(kernel) check_reduction_iname_uniqueness(kernel) from loopy.kernel.creation import apply_single_writer_depencency_heuristic kernel = apply_single_writer_depencency_heuristic(kernel) # Ordering restrictions: # # - realize_reduction must happen after type inference because it needs # to be able to determine the types of the reduced expressions. # # - realize_reduction must happen after default dependencies are added # because it manipulates the depends_on field, which could prevent # defaults from being applied. kernel = realize_reduction(kernel, unknown_types_ok=False) # Ordering restriction: # add_axes_to_temporaries_for_ilp because reduction accumulators # need to be duplicated by this. from loopy.transform.ilp import add_axes_to_temporaries_for_ilp_and_vec kernel = add_axes_to_temporaries_for_ilp_and_vec(kernel) kernel = find_temporary_scope(kernel) # boostability should be removed in 2017.x. kernel = find_idempotence(kernel) kernel = limit_boostability(kernel) kernel = kernel.target.preprocess(kernel) logger.info("%s: preprocess done" % kernel.name) kernel = kernel.copy( state=kernel_state.PREPROCESSED) # {{{ prepare for caching # PicklableDtype instances for example need to know the target they're working # towards in order to pickle and unpickle them. This is the first pass that # uses caching, so we need to be ready to pickle. This means propagating # this target information. if CACHING_ENABLED: input_kernel = prepare_for_caching(input_kernel) kernel = prepare_for_caching(kernel) # }}} if CACHING_ENABLED: preprocess_cache[input_kernel] = kernel return kernel