def iet_insert_C_decls(iet, func_table): """ Given an Iteration/Expression tree ``iet``, build a new tree with the necessary symbol declarations. Declarations are placed as close as possible to the first symbol use. :param iet: The input Iteration/Expression tree. :param func_table: A mapper from callable names to :class:`Callable`s called from within ``iet``. """ # Resolve function calls first scopes = [] me = MapExpressions() for k, v in me.visit(iet).items(): if k.is_Call: func = func_table[k.name] if func.local: scopes.extend(me.visit(func.root, queue=list(v)).items()) else: scopes.append((k, v)) # Determine all required declarations allocator = Allocator() mapper = OrderedDict() for k, v in scopes: if k.is_scalar: # Inline declaration mapper[k] = LocalExpression(**k.args) elif k.write is None or k.write._mem_external: # Nothing to do, e.g., variable passed as kernel argument continue elif k.write._mem_stack: # On the stack key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key, stop='asap') or [iet] allocator.push_stack(site[-1], k.write) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(k.write) # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) iet = NestedTransformer(mapper).visit(iet) for k, v in list(func_table.items()): if v.local: func_table[k] = MetaCall( Transformer(mapper).visit(v.root), v.local) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) iet = List(header=decls + allocs, body=iet, footer=frees) return iet
def _insert_declarations(self, dle_state, parameters): """Populate the Operator's body with the required array and variable declarations, to generate a legal C file.""" nodes = dle_state.nodes # Resolve function calls first scopes = [] for k, v in FindScopes().visit(nodes).items(): if k.is_FunCall: function = dle_state.func_table[k.name] scopes.extend(FindScopes().visit(function, queue=list(v)).items()) else: scopes.append((k, v)) # Determine all required declarations allocator = Allocator() mapper = OrderedDict() for k, v in scopes: if k.is_scalar: # Inline declaration mapper[k] = LocalExpression(**k.args) elif k.output_function._mem_external: # Nothing to do, variable passed as kernel argument continue elif k.output_function._mem_stack: # On the stack, as established by the DLE key = lambda i: i.dim not in k.output_function.indices site = filter_iterations(v, key=key, stop='consecutive') allocator.push_stack(site[-1], k.output_function) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(k.output_function) # Introduce declarations on the stack for k, v in allocator.onstack: allocs = as_tuple([Element(i) for i in v]) mapper[k] = Iteration(allocs + k.nodes, **k.args_frozen) nodes = Transformer(mapper).visit(nodes) elemental_functions = Transformer(mapper).visit( dle_state.elemental_functions) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) nodes = List(header=decls + allocs, body=nodes, footer=frees) return nodes, elemental_functions
def _insert_declarations(self, nodes): """Populate the Operator's body with the required array and variable declarations, to generate a legal C file.""" # Resolve function calls first scopes = [] for k, v in FindScopes().visit(nodes).items(): if k.is_FunCall: func = self.func_table[k.name] if func.local: scopes.extend(FindScopes().visit(func.root, queue=list(v)).items()) else: scopes.append((k, v)) # Determine all required declarations allocator = Allocator() mapper = OrderedDict() for k, v in scopes: if k.is_scalar: # Inline declaration mapper[k] = LocalExpression(**k.args) elif k.output_function._mem_external: # Nothing to do, variable passed as kernel argument continue elif k.output_function._mem_stack: # On the stack, as established by the DLE key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key, stop='asap') or [nodes] allocator.push_stack(site[-1], k.output_function) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(k.output_function) # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) nodes = NestedTransformer(mapper).visit(nodes) for k, v in list(self.func_table.items()): if v.local: self.func_table[k] = FunMeta( Transformer(mapper).visit(v.root), v.local) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) nodes = List(header=decls + allocs, body=nodes, footer=frees) return nodes
def iet_insert_C_decls(iet, external=None): """ Given an IET, build a new tree with the necessary symbol declarations. Declarations are placed as close as possible to the first symbol occurrence. Parameters ---------- iet : Node The input Iteration/Expression tree. external : tuple, optional The symbols defined in some outer Callable, which therefore must not be re-defined. """ external = external or [] # Classify and then schedule declarations to stack/heap allocator = Allocator() mapper = OrderedDict() for k, v in MapExpressions().visit(iet).items(): if k.is_Expression: if k.is_scalar_assign: # Inline declaration mapper[k] = LocalExpression(**k.args) continue objs = [k.write] elif k.is_Call: objs = k.params for i in objs: try: if i.is_LocalObject: # On the stack site = v[-1] if v else iet allocator.push_stack(site, i) elif i.is_Array: if i in external: # The Array is to be defined in some foreign IET continue elif i._mem_stack: # On the stack key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key, stop='asap') or [iet] allocator.push_stack(site[-1], i) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(i) except AttributeError: # E.g., a generic SymPy expression pass # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) iet = Transformer(mapper, nested=True).visit(iet) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) iet = List(header=decls + allocs, body=iet, footer=frees) return iet
def iet_insert_decls(iet, external): """ Transform the input IET inserting the necessary symbol declarations. Declarations are placed as close as possible to the first symbol occurrence. Parameters ---------- iet : Node The input Iteration/Expression tree. external : tuple, optional The symbols defined in some outer Callable, which therefore must not be re-defined. """ iet = as_tuple(iet) # Classify and then schedule declarations to stack/heap allocator = Allocator() mapper = OrderedDict() for k, v in MapSections().visit(iet).items(): if k.is_Expression: if k.is_scalar_assign: # Inline declaration mapper[k] = LocalExpression(**k.args) continue objs = [k.write] elif k.is_Call: objs = k.arguments for i in objs: try: if i.is_LocalObject: # On the stack site = v if v else iet allocator.push_stack(site[-1], i) elif i.is_Array: if i in as_tuple(external): # The Array is defined in some other IET continue elif i._mem_stack: # On the stack key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key) or iet allocator.push_stack(site[-1], i) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(i) except AttributeError: # E.g., a generic SymPy expression pass # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) iet = Transformer(mapper, nested=True).visit(iet) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) iet = List(header=decls + allocs, body=iet, footer=frees) return iet
def iet_insert_C_decls(iet, func_table=None): """ Given an Iteration/Expression tree ``iet``, build a new tree with the necessary symbol declarations. Declarations are placed as close as possible to the first symbol use. :param iet: The input Iteration/Expression tree. :param func_table: (Optional) a mapper from callable names within ``iet`` to :class:`Callable`s. """ func_table = func_table or {} allocator = Allocator() mapper = OrderedDict() # Detect all IET nodes accessing symbols that need to be declared scopes = [] me = MapExpressions() for k, v in me.visit(iet).items(): if k.is_Call: func = func_table.get(k.name) if func is not None and func.local: scopes.extend(me.visit(func.root, queue=list(v)).items()) scopes.append((k, v)) # Classify, and then schedule declarations to stack/heap for k, v in scopes: if k.is_Expression: if k.is_scalar: # Inline declaration mapper[k] = LocalExpression(**k.args) continue objs = [k.write] elif k.is_Call: objs = k.params else: raise NotImplementedError("Cannot schedule declarations for IET " "node of type `%s`" % type(k)) for i in objs: try: if i.is_LocalObject: # On the stack site = v[-1] if v else iet allocator.push_stack(site, i) elif i.is_Array: if i._mem_external: # Nothing to do; e.g., a user-provided Function continue elif i._mem_stack: # On the stack key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key, stop='asap') or [iet] allocator.push_stack(site[-1], i) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(i) except AttributeError: # E.g., a generic SymPy expression pass # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) iet = Transformer(mapper, nested=True).visit(iet) for k, v in list(func_table.items()): if v.local: func_table[k] = MetaCall( Transformer(mapper).visit(v.root), v.local) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) iet = List(header=decls + allocs, body=iet, footer=frees) return iet
def iet_insert_C_decls(iet, func_table=None): """ Given an Iteration/Expression tree ``iet``, build a new tree with the necessary symbol declarations. Declarations are placed as close as possible to the first symbol use. :param iet: The input Iteration/Expression tree. :param func_table: (Optional) a mapper from callable names within ``iet`` to :class:`Callable`s. """ func_table = func_table or {} allocator = Allocator() mapper = OrderedDict() # First, schedule declarations for Expressions scopes = [] me = MapExpressions() for k, v in me.visit(iet).items(): if k.is_Call: func = func_table.get(k.name) if func is not None and func.local: scopes.extend(me.visit(func.root, queue=list(v)).items()) else: scopes.append((k, v)) for k, v in scopes: if k.is_scalar: # Inline declaration mapper[k] = LocalExpression(**k.args) elif k.write is None or k.write._mem_external: # Nothing to do, e.g., variable passed as kernel argument continue elif k.write._mem_stack: # On the stack key = lambda i: not i.is_Parallel site = filter_iterations(v, key=key, stop='asap') or [iet] allocator.push_stack(site[-1], k.write) else: # On the heap, as a tensor that must be globally accessible allocator.push_heap(k.write) # Then, schedule declarations callables arguments passed by reference/pointer # (as modified internally by the callable) scopes = [(k, v) for k, v in me.visit(iet).items() if k.is_Call] for k, v in scopes: site = v[-1] if v else iet for i in k.params: try: if i.is_LocalObject: # On the stack allocator.push_stack(site, i) elif i.is_Array: if i._mem_stack: # On the stack allocator.push_stack(site, i) elif i._mem_heap: # On the heap allocator.push_heap(i) except AttributeError: # E.g., a generic SymPy expression pass # Introduce declarations on the stack for k, v in allocator.onstack: mapper[k] = tuple(Element(i) for i in v) iet = NestedTransformer(mapper).visit(iet) for k, v in list(func_table.items()): if v.local: func_table[k] = MetaCall(Transformer(mapper).visit(v.root), v.local) # Introduce declarations on the heap (if any) if allocator.onheap: decls, allocs, frees = zip(*allocator.onheap) iet = List(header=decls + allocs, body=iet, footer=frees) return iet