def place_definitions(self, iet, **kwargs): """ Create a new IET with symbols allocated/deallocated in some memory space. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ storage = Storage() already_defined = list(iet.parameters) for k, v in MapExprStmts().visit(iet).items(): if k.is_Expression: if k.is_definition: site = v[-1] if v else iet self._alloc_scalar_on_low_lat_mem(site, k, storage) continue objs = [k.write] elif k.is_Dereference: already_defined.append(k.array) if k.parray in already_defined: objs = [] else: objs = [k.parray] elif k.is_Call: objs = k.arguments for i in objs: if i in already_defined: continue try: if i.is_LocalObject: site = v[-1] if v else iet self._alloc_object_on_low_lat_mem(site, i, storage) elif i.is_Array: site = iet if i._mem_local: # If inside a ParallelRegion, make sure we allocate # inside of it for n in v: if n.is_ParallelBlock: site = n break if i._mem_heap: self._alloc_array_on_high_bw_mem(site, i, storage) else: self._alloc_array_on_low_lat_mem(site, i, storage) elif i.is_PointerArray: self._alloc_pointed_array_on_high_bw_mem( iet, i, storage) except AttributeError: # E.g., a generic SymPy expression pass iet = self._dump_storage(iet, storage) return iet, {}
def _(iet): # Special symbol which gives user code control over data deallocations devicerm = DeviceRM() # Collect written and read-only symbols writes = set() reads = set() for i, v in MapExprStmts().visit(iet).items(): if not i.is_Expression: # No-op continue if not any( isinstance(j, self.lang.DeviceIteration) for j in v): # Not an offloaded Iteration tree continue if i.write.is_DiscreteFunction: writes.add(i.write) reads.update({r for r in i.reads if r.is_DiscreteFunction}) # Populate `storage` storage = Storage() for i in filter_sorted(writes): if is_on_device(i, self.gpu_fit): self._map_function_on_high_bw_mem(iet, i, storage, devicerm) for i in filter_sorted(reads - writes): if is_on_device(i, self.gpu_fit): self._map_function_on_high_bw_mem(iet, i, storage, devicerm, True) iet = self._dump_storage(iet, storage) return iet, {'args': devicerm}
def place_definitions(self, iet): """ Create a new IET with symbols allocated/deallocated in some memory space. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ storage = Storage() for k, v in MapExprStmts().visit(iet).items(): if k.is_Expression: if k.is_definition: site = v[-1] if v else iet self._alloc_scalar_on_low_lat_mem(site, k, storage) continue objs = [k.write] elif k.is_Call: objs = k.arguments for i in objs: try: if i.is_LocalObject: site = v[-1] if v else iet self._alloc_object_on_low_lat_mem(site, i, storage) elif i.is_Array: if i in iet.parameters: # The Array is passed as a Callable argument continue elif i._mem_stack: self._alloc_array_on_low_lat_mem(iet, i, storage) else: self._alloc_array_on_high_bw_mem(i, storage) elif i.is_Function: self._map_function_on_high_bw_mem(i, storage) except AttributeError: # E.g., a generic SymPy expression pass # Introduce symbol definitions going in the low latency memory mapper = dict(storage._on_low_lat_mem) iet = Transformer(mapper, nested=True).visit(iet) # Introduce symbol definitions going in the high bandwidth memory if storage._on_high_bw_mem: decls, allocs, frees = zip(*storage._on_high_bw_mem) body = List(header=decls + allocs, body=iet.body, footer=frees) iet = iet._rebuild(body=body) return iet, {}
def place_definitions(self, iet, **kwargs): """ Create a new IET where all symbols have been declared, allocated, and deallocated in one or more memory spaces. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ # Process inline definitions storage = Storage() for k, v in MapExprStmts().visit(iet).items(): if k.is_Expression and k.is_initializable: self._alloc_scalar_on_low_lat_mem((iet, ) + v, k, storage) iet = self._inject_definitions(iet, storage) # Process all other definitions, essentially all temporary objects # created by the compiler up to this point (Array, LocalObject, etc.) storage = Storage() defines = FindSymbols('defines-aliases').visit(iet) for i in FindSymbols().visit(iet): if i in defines: continue elif i.is_LocalObject: self._alloc_object_on_low_lat_mem(iet, i, storage) elif i.is_Array: if i._mem_heap: self._alloc_array_on_high_bw_mem(iet, i, storage) else: self._alloc_array_on_low_lat_mem(iet, i, storage) elif i.is_ObjectArray: self._alloc_object_array_on_low_lat_mem(iet, i, storage) elif i.is_PointerArray: self._alloc_pointed_array_on_high_bw_mem(iet, i, storage) iet = self._inject_definitions(iet, storage) return iet, {}
def derive_transfers(self, iet): def needs_transfer(f): return (isinstance(f, AbstractFunction) and is_on_device(f, self.gpu_fit) and f._mem_mapped) writes = set() reads = set() for i, v in MapExprStmts().visit(iet).items(): if not any(isinstance(j, self.lang.DeviceIteration) for j in v) and \ not isinstance(iet, DeviceFunction): # Not an offloaded Iteration tree continue writes.update({w for w in i.writes if needs_transfer(w)}) reads.update({ f for f in i.functions if needs_transfer(f) and f not in writes }) return (reads, writes)
def place_definitions(self, iet, **kwargs): """ Create a new IET with symbols allocated/deallocated in some memory space. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ storage = Storage() for k, v in MapExprStmts().visit(iet).items(): if k.is_Expression: if k.is_definition: site = v[-1] if v else iet self._alloc_scalar_on_low_lat_mem(site, k, storage) continue objs = [k.write] elif k.is_Call: objs = k.arguments for i in objs: try: if i.is_LocalObject: site = v[-1] if v else iet self._alloc_object_on_low_lat_mem(site, i, storage) elif i.is_Array: if i in iet.parameters: # The Array is passed as a Callable argument continue elif i._mem_stack: self._alloc_array_on_low_lat_mem(iet, i, storage) else: self._alloc_array_on_high_bw_mem(i, storage) except AttributeError: # E.g., a generic SymPy expression pass iet = self._dump_storage(iet, storage) return iet, {}
def derive_transfers(self, iet): def needs_transfer(f): return (is_on_device(f, self.gpu_fit) and isinstance(f, (Array, Function, AbstractSparseFunction))) writes = set() reads = set() for i, v in MapExprStmts().visit(iet).items(): if not i.is_Expression: # No-op continue if not any(isinstance(j, self.lang.DeviceIteration) for j in v): # Not an offloaded Iteration tree continue if needs_transfer(i.write): writes.add(i.write) reads.update({r for r in i.reads if needs_transfer(r)}) return (reads, writes)
def place_definitions(self, iet, **kwargs): """ Create a new IET where all symbols have been declared, allocated, and deallocated in one or more memory spaces. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ storage = Storage() refmap = FindSymbols().visit(iet).mapper placed = list(iet.parameters) for k, v in MapExprStmts().visit(iet).items(): if k.is_LocalExpression: placed.append(k.write) objs = [] elif k.is_Expression: if k.is_definition: site = v[-1] if v else iet self._alloc_scalar_on_low_lat_mem(site, k, storage) continue objs = [k.write] elif k.is_Dereference: placed.append(k.array) if k.parray in placed: objs = [] else: objs = [k.parray] elif k.is_Call: objs = list(k.functions) if k.retobj is not None: objs.append(k.retobj.function) elif k.is_PointerCast: placed.append(k.function) objs = [] for i in objs: if i in placed: continue try: if i.is_LocalObject: # LocalObject's get placed as close as possible to # their first appearence site = iet for n in v: if i in refmap[n]: break site = n self._alloc_object_on_low_lat_mem(site, i, storage) elif i.is_Array: # Array's get placed as far as possible from their # first appearence site = iet if i._mem_local: # If inside a ParallelBlock, make sure we allocate # inside of it for n in v: if n.is_ParallelBlock: site = n break if i._mem_heap: self._alloc_array_on_high_bw_mem(site, i, storage) else: self._alloc_array_on_low_lat_mem(site, i, storage) elif i.is_ObjectArray: # ObjectArray's get placed at the top of the IET self._alloc_object_array_on_low_lat_mem( iet, i, storage) elif i.is_PointerArray: # PointerArray's get placed at the top of the IET self._alloc_pointed_array_on_high_bw_mem( iet, i, storage) except AttributeError: # E.g., a generic SymPy expression pass iet = self._dump_storage(iet, storage) return iet, {}
def place_definitions(self, iet, **kwargs): """ Create a new IET with symbols allocated/deallocated in some memory space. Parameters ---------- iet : Callable The input Iteration/Expression tree. """ storage = Storage() # Collect and declare symbols for k, v in MapExprStmts().visit(iet).items(): if k.is_Expression: if k.is_definition: site = v[-1] if v else iet self._alloc_scalar_on_low_lat_mem(site, k, storage) continue objs = [k.write] elif k.is_Call: objs = k.arguments for i in objs: try: if i.is_LocalObject: site = v[-1] if v else iet self._alloc_object_on_low_lat_mem(site, i, storage) elif i.is_Array: if i in iet.parameters: # The Array is passed as a Callable argument continue elif i._mem_stack: self._alloc_array_on_low_lat_mem(iet, i, storage) else: self._alloc_array_on_high_bw_mem(i, storage) except AttributeError: # E.g., a generic SymPy expression pass # Place symbols in a memory space if not iet.is_ElementalFunction: writes = set() reads = set() for efunc in kwargs.get('efuncs', []): for i in FindNodes(Expression).visit(efunc): if i.write.is_Function: writes.add(i.write) reads = (reads | {r for r in i.reads if r.is_Function}) - writes for i in filter_sorted(writes): self._map_function_on_high_bw_mem(i, storage) for i in filter_sorted(reads): self._map_function_on_high_bw_mem(i, storage, read_only=True) # Introduce symbol definitions going in the low latency memory mapper = dict(storage._on_low_lat_mem) iet = Transformer(mapper, nested=True).visit(iet) # Introduce symbol definitions going in the high bandwidth memory header = [] footer = [] for decl, alloc, free in storage._on_high_bw_mem: if decl is None: header.append(alloc) else: header.extend([decl, alloc]) footer.append(free) if header or footer: body = List(header=header, body=iet.body, footer=footer) iet = iet._rebuild(body=body) return iet, {}