def _get_topological_order(kernel): """ Returns a :class:`list` of insn ids of *kernel* in a topological sort order. If there is a dependency cycle within the instructions of *kernel* raises a :class:`loopy.diagnostic.DependencyCycleFound` exception. """ from pytools.graph import compute_sccs from loopy.diagnostic import DependencyCycleFound dep_map = {insn.id: insn.depends_on for insn in kernel.instructions} # pytools.graph.compute_sccs serves 2 purposes: # 1. computes topological sort order of instructions. # 2. provides info. about any cycles in the graph. sccs = compute_sccs(dep_map) order = [] for scc in sccs: if len(scc) != 1: raise DependencyCycleFound(", ".join(scc)) order.append(scc[0]) return order
def _find_boostable_insn_ids(kernel): """There used to exist a broken heuristic called "boostability" that allowed instructions to be pushed into hardware-parallel loops. This function survives of that, for now, to provide a thin veneer of compatibility. """ logger.debug("%s: idempotence" % kernel.name) writer_map = kernel.writer_map() arg_names = {arg.name for arg in kernel.args} var_names = arg_names | set(kernel.temporary_variables.keys()) reads_map = { insn.id: insn.read_dependency_names() & var_names for insn in kernel.instructions } from collections import defaultdict dep_graph = defaultdict(set) for insn in kernel.instructions: dep_graph[insn.id] = { writer_id for var in reads_map[insn.id] for writer_id in writer_map.get(var, set()) } # Find SCCs of dep_graph. These are used for checking if the instruction is # in a dependency cycle. from pytools.graph import compute_sccs sccs = {item: scc for scc in compute_sccs(dep_graph) for item in scc} non_idempotently_updated_vars = set() boostable_insn_ids = set() for insn in kernel.instructions: boostable = len( sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id] if boostable: boostable_insn_ids.add(insn.id) else: non_idempotently_updated_vars.update(insn.assignee_var_names()) # {{{ remove boostability from isns that access non-idempotently updated vars for insn_id in boostable_insn_ids.copy(): insn = kernel.id_to_insn[insn_id] if bool(non_idempotently_updated_vars & insn.dependency_names()): boostable_insn_ids.remove(insn_id) # }}} return boostable_insn_ids
def test_compute_sccs(): from pytools.graph import compute_sccs import random rng = random.Random(0) def generate_random_graph(nnodes): graph = dict((i, set()) for i in range(nnodes)) for i in range(nnodes): for j in range(nnodes): # Edge probability 2/n: Generates decently interesting inputs. if rng.randint(0, nnodes - 1) <= 1: graph[i].add(j) return graph def verify_sccs(graph, sccs): visited = set() def visit(node): if node in visited: return [] else: visited.add(node) result = [] for child in graph[node]: result = result + visit(child) return result + [node] for scc in sccs: scc = set(scc) assert not scc & visited # Check that starting from each element of the SCC results # in the same set of reachable nodes. for scc_root in scc: visited.difference_update(scc) result = visit(scc_root) assert set(result) == scc, (set(result), scc) for nnodes in range(10, 20): for i in range(40): graph = generate_random_graph(nnodes) verify_sccs(graph, compute_sccs(graph))
def infer_unknown_types_for_a_single_kernel(kernel, clbl_inf_ctx): """Infer types on temporaries and arguments.""" logger.debug("%s: infer types" % kernel.name) from functools import partial debug = partial(_debug, kernel) import time start_time = time.time() unexpanded_kernel = kernel if kernel.substitutions: from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) new_temp_vars = kernel.temporary_variables.copy() new_arg_dict = kernel.arg_dict.copy() # {{{ find names_with_unknown_types # contains both arguments and temporaries names_for_type_inference = [] import loopy as lp for tv in kernel.temporary_variables.values(): assert tv.dtype is not lp.auto if tv.dtype is None: names_for_type_inference.append(tv.name) for arg in kernel.args: assert arg.dtype is not lp.auto if arg.dtype is None: names_for_type_inference.append(arg.name) # }}} logger.debug("finding types for {count:d} names".format( count=len(names_for_type_inference))) writer_map = kernel.writer_map() dep_graph = { written_var: { read_var for insn_id in writer_map.get(written_var, []) for read_var in kernel.id_to_insn[insn_id].read_dependency_names() if read_var in names_for_type_inference } for written_var in names_for_type_inference } from pytools.graph import compute_sccs # To speed up processing, we sort the variables by computing the SCCs of the # type dependency graph. Each SCC represents a set of variables whose types # mutually depend on themselves. The SCCs are returned and processed in # topological order. sccs = compute_sccs(dep_graph) item_lookup = _DictUnionView([new_temp_vars, new_arg_dict]) type_inf_mapper = TypeInferenceMapper(kernel, clbl_inf_ctx, item_lookup) from loopy.symbolic import SubstitutionRuleExpander subst_expander = SubstitutionRuleExpander(kernel.substitutions) # {{{ work on type inference queue from loopy.kernel.data import TemporaryVariable, KernelArgument old_calls_to_new_calls = {} for var_chain in sccs: changed_during_last_queue_run = False var_queue = var_chain[:] failed_names = set() while var_queue or changed_during_last_queue_run: if not var_queue and changed_during_last_queue_run: changed_during_last_queue_run = False # Optimization: If there's a single variable in the SCC without # a self-referential dependency, then the type is known after a # single iteration (we don't need to look at the expressions # again). if len(var_chain) == 1: single_var, = var_chain if single_var not in dep_graph[single_var]: break var_queue = var_chain[:] name = var_queue.pop(0) item = item_lookup[name] debug("inferring type for %s %s", type(item).__name__, item.name) try: (result, symbols_with_unknown_types, new_old_calls_to_new_calls, clbl_inf_ctx) = (_infer_var_type(kernel, item.name, type_inf_mapper, subst_expander)) except DependencyTypeInferenceFailure: result = () symbols_with_unknown_types = () type_inf_mapper = type_inf_mapper.copy(clbl_inf_ctx=clbl_inf_ctx) if result: new_dtype, = result debug(" success: %s", new_dtype) if new_dtype != item.dtype: debug(" changed from: %s", item.dtype) changed_during_last_queue_run = True if isinstance(item, TemporaryVariable): new_temp_vars[name] = item.copy(dtype=new_dtype) elif isinstance(item, KernelArgument): new_arg_dict[name] = item.copy(dtype=new_dtype) else: raise LoopyError( "unexpected item type in type inference") old_calls_to_new_calls.update(new_old_calls_to_new_calls) # we've made progress, reset failure markers failed_names = set() else: debug(" failure") if item.name in failed_names: # this item has failed before, give up. advice = "" if symbols_with_unknown_types: advice += ( " (need type of '%s'--check for missing arguments)" % ", ".join(symbols_with_unknown_types)) debug("could not determine type of '%s'%s" % (item.name, advice)) # We're done here break # remember that this item failed failed_names.add(item.name) if set(var_queue) == failed_names: # We did what we could... print(var_queue, failed_names, item.name) break # can't infer type yet, put back into var_queue var_queue.append(name) # }}} # {{{ check if insn missed during type inference def _instruction_missed_during_inference(insn): for assignee in insn.assignees: if isinstance(assignee, Lookup): assignee = assignee.aggregate if isinstance(assignee, Variable): if assignee.name in kernel.arg_dict: if kernel.arg_dict[assignee.name].dtype is None: return False else: assert assignee.name in kernel.temporary_variables if kernel.temporary_variables[assignee.name].dtype is None: return False elif isinstance(assignee, (Subscript, LinearSubscript)): if assignee.aggregate.name in kernel.arg_dict: if kernel.arg_dict[assignee.aggregate.name].dtype is None: return False else: assert assignee.aggregate.name in kernel.temporary_variables if kernel.temporary_variables[ assignee.aggregate.name].dtype is None: return False else: assert isinstance(assignee, SubArrayRef) if assignee.subscript.aggregate.name in kernel.arg_dict: if kernel.arg_dict[ assignee.subscript.aggregate.name].dtype is None: return False else: assert assignee.subscript.aggregate.name in ( kernel.temporary_variables) if kernel.temporary_variables[ assignee.subscript.aggregate.name] is None: return False return True # }}} for insn in kernel.instructions: if isinstance(insn, lp.MultiAssignmentBase): # just a dummy run over the expression, to pass over all the # functions if _instruction_missed_during_inference(insn): type_inf_mapper(insn.expression, return_tuple=len(insn.assignees) != 1, return_dtype_set=True) elif isinstance(insn, (_DataObliviousInstruction, lp.CInstruction)): pass else: raise NotImplementedError("Unknown instructions type %s." % (type(insn).__name__)) clbl_inf_ctx = type_inf_mapper.clbl_inf_ctx old_calls_to_new_calls.update(type_inf_mapper.old_calls_to_new_calls) end_time = time.time() logger.debug("type inference took {dur:.2f} seconds".format(dur=end_time - start_time)) pre_type_specialized_knl = unexpanded_kernel.copy( temporary_variables=new_temp_vars, args=[new_arg_dict[arg.name] for arg in kernel.args], ) type_specialized_kernel = change_names_of_pymbolic_calls( pre_type_specialized_knl, old_calls_to_new_calls) return type_specialized_kernel, clbl_inf_ctx
def infer_unknown_types(kernel, expect_completion=False): """Infer types on temporaries and arguments.""" logger.debug("%s: infer types" % kernel.name) from functools import partial debug = partial(_debug, kernel) import time start_time = time.time() unexpanded_kernel = kernel if kernel.substitutions: from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) new_temp_vars = kernel.temporary_variables.copy() new_arg_dict = kernel.arg_dict.copy() # {{{ find names_with_unknown_types # contains both arguments and temporaries names_for_type_inference = [] import loopy as lp for tv in kernel.temporary_variables.values(): assert tv.dtype is not lp.auto if tv.dtype is None: names_for_type_inference.append(tv.name) for arg in kernel.args: assert arg.dtype is not lp.auto if arg.dtype is None: names_for_type_inference.append(arg.name) # }}} logger.debug("finding types for {count:d} names".format( count=len(names_for_type_inference))) writer_map = kernel.writer_map() dep_graph = { written_var: { read_var for insn_id in writer_map.get(written_var, []) for read_var in kernel.id_to_insn[insn_id].read_dependency_names() if read_var in names_for_type_inference} for written_var in names_for_type_inference} from pytools.graph import compute_sccs # To speed up processing, we sort the variables by computing the SCCs of the # type dependency graph. Each SCC represents a set of variables whose types # mutually depend on themselves. The SCCs are returned and processed in # topological order. sccs = compute_sccs(dep_graph) item_lookup = _DictUnionView([ new_temp_vars, new_arg_dict ]) type_inf_mapper = TypeInferenceMapper(kernel, item_lookup) from loopy.symbolic import SubstitutionRuleExpander subst_expander = SubstitutionRuleExpander(kernel.substitutions) # {{{ work on type inference queue from loopy.kernel.data import TemporaryVariable, KernelArgument for var_chain in sccs: changed_during_last_queue_run = False queue = var_chain[:] failed_names = set() while queue or changed_during_last_queue_run: if not queue and changed_during_last_queue_run: changed_during_last_queue_run = False # Optimization: If there's a single variable in the SCC without # a self-referential dependency, then the type is known after a # single iteration (we don't need to look at the expressions # again). if len(var_chain) == 1: single_var, = var_chain if single_var not in dep_graph[single_var]: break queue = var_chain[:] name = queue.pop(0) item = item_lookup[name] debug("inferring type for %s %s", type(item).__name__, item.name) result, symbols_with_unavailable_types = ( _infer_var_type( kernel, item.name, type_inf_mapper, subst_expander)) failed = not result if not failed: new_dtype, = result if new_dtype.target is None: new_dtype = new_dtype.with_target(kernel.target) debug(" success: %s", new_dtype) if new_dtype != item.dtype: debug(" changed from: %s", item.dtype) changed_during_last_queue_run = True if isinstance(item, TemporaryVariable): new_temp_vars[name] = item.copy(dtype=new_dtype) elif isinstance(item, KernelArgument): new_arg_dict[name] = item.copy(dtype=new_dtype) else: raise LoopyError("unexpected item type in type inference") else: debug(" failure") if failed: if item.name in failed_names: # this item has failed before, give up. advice = "" if symbols_with_unavailable_types: advice += ( " (need type of '%s'--check for missing arguments)" % ", ".join(symbols_with_unavailable_types)) if expect_completion: raise LoopyError( "could not determine type of '%s'%s" % (item.name, advice)) else: # We're done here. break # remember that this item failed failed_names.add(item.name) if set(queue) == failed_names: # We did what we could... print(queue, failed_names, item.name) assert not expect_completion break # can't infer type yet, put back into queue queue.append(name) else: # we've made progress, reset failure markers failed_names = set() # }}} end_time = time.time() logger.debug("type inference took {dur:.2f} seconds".format( dur=end_time - start_time)) return unexpanded_kernel.copy( temporary_variables=new_temp_vars, args=[new_arg_dict[arg.name] for arg in kernel.args], )