def on_assign(self, states, assign): rhs = assign.value if isinstance(rhs, ir.Inst): newdef = self._fix_var( states, assign, assign.value.list_vars(), ) # Has a replacement that is not the current variable if newdef is not None and newdef.target is not ir.UNDEFINED: if states["varname"] != newdef.target.name: replmap = {states["varname"]: newdef.target} rhs = copy(rhs) ir_utils.replace_vars_inner(rhs, replmap) return ir.Assign( target=assign.target, value=rhs, loc=assign.loc, ) elif isinstance(rhs, ir.Var): newdef = self._fix_var(states, assign, [rhs]) # Has a replacement that is not the current variable if newdef is not None and states["varname"] != newdef.target.name: return ir.Assign( target=assign.target, value=newdef.target, loc=assign.loc, ) return assign
def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope, loc): last_ind = dim_size if end_length != 0: # set last index to size minus stencil size to avoid invalid # memory access index_const = ir.Var(scope, mk_unique_var("stencil_const_var"), loc) self.typemap[index_const.name] = types.intp if isinstance(end_length, numbers.Number): const_assign = ir.Assign(ir.Const(end_length, loc), index_const, loc) else: const_assign = ir.Assign(end_length, index_const, loc) gen_nodes.append(const_assign) last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc) self.typemap[last_ind.name] = types.intp g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc) check_func = numba.njit(_compute_last_ind) func_typ = types.functions.Dispatcher(check_func) self.typemap[g_var.name] = func_typ g_obj = ir.Global("_compute_last_ind", check_func, loc) g_assign = ir.Assign(g_obj, g_var, loc) gen_nodes.append(g_assign) index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc) self.calltypes[index_call] = func_typ.get_call_type( self.typingctx, [types.intp, types.intp], {}) index_assign = ir.Assign(index_call, last_ind, loc) gen_nodes.append(index_assign) return last_ind
def run_pass(self, state): func_ir = state.func_ir # get the FunctionIR object for blk in func_ir.blocks.values(): for stmt in blk.find_insts(ir.Assign): if (isinstance(stmt.value, ir.FreeVar) and stmt.value.name in BufferMeta.class_names): break else: continue break else: return False # one does not changes the IR for blk in func_ir.blocks.values(): loc = blk.loc scope = blk.scope for ret in blk.find_insts(ir.Return): name = "free_omnisci_buffer_fn" value = ir.Global(name, free_omnisci_buffer, loc) target = scope.make_temp(loc) stmt = ir.Assign(value, target, loc) blk.insert_before_terminator(stmt) fn_call = ir.Expr.call(func=target, args=[ret.value], kws=(), loc=loc) lhs = scope.make_temp(loc) var = ir.Assign(fn_call, lhs, blk.loc) blk.insert_before_terminator(var) break return True # we changed the IR
def test_assign(self): a = ir.Assign(self.var_a, self.var_b, self.loc1) b = ir.Assign(self.var_a, self.var_b, self.loc1) c = ir.Assign(self.var_a, self.var_b, self.loc2) d = ir.Assign(self.var_c, self.var_b, self.loc1) e = ir.Assign(self.var_a, self.var_c, self.loc1) self.check(a, same=[b, c], different=[d, e])
def gen_block(): parent = ir.Scope(None, self.loc1) tmp = ir.Block(parent, self.loc2) assign1 = ir.Assign(self.var_a, self.var_b, self.loc3) assign2 = ir.Assign(self.var_a, self.var_c, self.loc3) assign3 = ir.Assign(self.var_c, self.var_b, self.loc3) tmp.append(assign1) tmp.append(assign2) tmp.append(assign3) return tmp
def handle_border(slice_fn_ty, dim, scope, loc, slice_func_var, stmts, border_inds, border_tuple_items, other_arg, other_first): # Handle the border for start or end of the index range. # ---- Generate call to slice func. sig = self.typingctx.resolve_function_type( slice_fn_ty, (types.intp,) * 2, {}) si = border_inds[dim] assert(isinstance(si, (int, ir.Var))) si_var = ir.Var(scope, mk_unique_var("$border_ind"), loc) self.typemap[si_var.name] = types.intp if isinstance(si, int): si_assign = ir.Assign(ir.Const(si, loc), si_var, loc) else: si_assign = ir.Assign(si, si_var, loc) stmts.append(si_assign) slice_callexpr = ir.Expr.call( func=slice_func_var, args=(other_arg, si_var) if other_first else (si_var, other_arg), kws=(), loc=loc) self.calltypes[slice_callexpr] = sig # ---- Generate slice var border_slice_var = ir.Var(scope, mk_unique_var("$slice"), loc) self.typemap[border_slice_var.name] = types.slice2_type slice_assign = ir.Assign(slice_callexpr, border_slice_var, loc) stmts.append(slice_assign) border_tuple_items[dim] = border_slice_var border_ind_var = ir.Var(scope, mk_unique_var( "$border_index_tuple_var"), loc) self.typemap[border_ind_var.name] = types.containers.UniTuple( types.slice2_type, ndims) tuple_call = ir.Expr.build_tuple(border_tuple_items, loc) tuple_assign = ir.Assign(tuple_call, border_ind_var, loc) stmts.append(tuple_assign) setitem_call = ir.SetItem(out_arr, border_ind_var, zero_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[border_ind_var.name], self.typemap[out_arr.name].dtype ) stmts.append(setitem_call)
def _dbgprint_after_each_array_assignments(lowerer, loop_body, typemap): for label, block in loop_body.items(): new_block = block.copy() new_block.clear() loc = block.loc scope = block.scope for inst in block.body: new_block.append(inst) # Append print after assignment if isinstance(inst, ir.Assign): # Only apply to numbers if typemap[inst.target.name] not in types.number_domain: continue # Make constant string strval = "{} =".format(inst.target.name) strconsttyp = types.StringLiteral(strval) lhs = ir.Var(scope, mk_unique_var("str_const"), loc) assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc), target=lhs, loc=loc) typemap[lhs.name] = strconsttyp new_block.append(assign_lhs) # Make print node print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) new_block.append(print_node) sig = numba.typing.signature(types.none, typemap[lhs.name], typemap[inst.target.name]) lowerer.fndesc.calltypes[print_node] = sig loop_body[label] = new_block
def replace_return_with_setitem(self, blocks, exit_value_var, parfor_body_exit_label): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): # previous stmt should have been a cast prev_stmt = new_body.pop() assert (isinstance(prev_stmt, ir.Assign) and isinstance(prev_stmt.value, ir.Expr) and prev_stmt.value.op == 'cast') new_body.append( ir.Assign(prev_stmt.value.value, exit_value_var, loc)) new_body.append(ir.Jump(parfor_body_exit_label, loc)) else: new_body.append(stmt) block.body = new_body
def replace_var_with_array_in_block(vars, block, typemap, calltypes): new_block = [] for inst in block.body: if isinstance(inst, ir.Assign) and inst.target.name in vars: const_node = ir.Const(0, inst.loc) const_var = ir.Var(inst.target.scope, mk_unique_var("$const_ind_0"), inst.loc) typemap[const_var.name] = types.uintp const_assign = ir.Assign(const_node, const_var, inst.loc) new_block.append(const_assign) setitem_node = ir.SetItem(inst.target, const_var, inst.value, inst.loc) calltypes[setitem_node] = signature( types.none, types.npytypes.Array(typemap[inst.target.name], 1, "C"), types.intp, typemap[inst.target.name], ) new_block.append(setitem_node) continue elif isinstance(inst, parfor.Parfor): replace_var_with_array_internal(vars, {0: inst.init_block}, typemap, calltypes) replace_var_with_array_internal(vars, inst.loop_body, typemap, calltypes) new_block.append(inst) return new_block
def _strip_phi_nodes(self, func_ir): """Strip Phi nodes from ``func_ir`` For each phi node, put incoming value to their respective incoming basic-block at possibly the latest position (i.e. after the latest assignment to the corresponding variable). """ exporters = defaultdict(list) phis = set() # Find all variables that needs to be exported for label, block in func_ir.blocks.items(): for assign in block.find_insts(ir.Assign): if isinstance(assign.value, ir.Expr): if assign.value.op == 'phi': phis.add(assign) phi = assign.value for ib, iv in zip(phi.incoming_blocks, phi.incoming_values): exporters[ib].append((assign.target, iv)) # Rewrite the blocks with the new exporting assignments newblocks = {} for label, block in func_ir.blocks.items(): newblk = copy(block) newblocks[label] = newblk # strip phis newblk.body = [stmt for stmt in block.body if stmt not in phis] # insert exporters for target, rhs in exporters[label]: # If RHS is undefined if rhs is ir.UNDEFINED: # Put in a NULL initializer, set the location to be in what # will eventually materialize as the prologue. rhs = ir.Expr.null(loc=func_ir.loc) assign = ir.Assign( target=target, value=rhs, loc=rhs.loc ) # Insert at the earliest possible location; i.e. after the # last assignment to rhs assignments = [stmt for stmt in newblk.find_insts(ir.Assign) if stmt.target == rhs] if assignments: last_assignment = assignments[-1] newblk.insert_after(assign, last_assignment) else: newblk.prepend(assign) func_ir.blocks = newblocks return func_ir
def _insert_outgoing_phis(self): """ Add assignments to forward requested outgoing values to subsequent blocks. """ for phiname, varname in self.dfainfo.outgoing_phis.items(): target = self.current_scope.get_or_define(phiname, loc=self.loc) stmt = ir.Assign(value=self.get(varname), target=target, loc=self.loc) self.definitions[target.name].append(stmt.value) if not self.current_block.is_terminated: self.current_block.append(stmt) else: self.current_block.insert_before_terminator(stmt)
def mutate_with_body(self, func_ir, blocks, blk_start, blk_end, body_blocks, dispatcher_factory, extra): ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks) assert extra is not None args = extra["args"] assert len(args) == 1 arg = args[0] scope = blocks[blk_start].scope loc = blocks[blk_start].loc if isinstance(arg, ir.Arg): arg = ir.Var(scope, arg.name, loc) set_state = [] restore_state = [] # global for Numba itself gvar = scope.redefine("$ngvar", loc) set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc)) # getattr for set chunksize function in Numba spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc) spcvar = scope.redefine("$spc", loc) set_state.append(ir.Assign(spcattr, spcvar, loc)) # call set_parallel_chunksize orig_pc_var = scope.redefine("$save_pc", loc) cs_var = scope.redefine("$cs_var", loc) set_state.append(ir.Assign(arg, cs_var, loc)) spc_call = ir.Expr.call(spcvar, [cs_var], (), loc) set_state.append(ir.Assign(spc_call, orig_pc_var, loc)) restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc) restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc)) blocks[blk_start].body = (blocks[blk_start].body[1:-1] + set_state + [blocks[blk_start].body[-1]]) blocks[blk_end].body = restore_state + blocks[blk_end].body func_ir._definitions = build_definitions(blocks) ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
def on_assign(self, states, assign): if assign.target.name == states["varname"]: scope = states["scope"] defmap = states["defmap"] # Allow first assignment to retain the name if len(defmap) == 0: newtarget = assign.target _logger.debug("first assign: %s", newtarget) else: newtarget = scope.redefine(assign.target.name, loc=assign.loc) assign = ir.Assign(target=newtarget, value=assign.value, loc=assign.loc) defmap[states["label"]].append(assign) return assign
def _find_def_from_top(self, states, label, loc): """Find definition reaching block of ``label``. This method would look at all dominance frontiers. Insert phi node if necessary. """ _logger.debug("find_def_from_top label %r", label) cfg = states["cfg"] defmap = states["defmap"] phimap = states["phimap"] domfronts = states["df+"] for deflabel, defstmt in defmap.items(): df = domfronts[deflabel] if label in df: scope = states["scope"] loc = states["block"].loc # fresh variable freshvar = scope.redefine(states["varname"], loc=loc) # insert phi phinode = ir.Assign( target=freshvar, value=ir.Expr.phi(loc=loc), loc=loc, ) _logger.debug("insert phi node %s at %s", phinode, label) defmap[label].insert(0, phinode) phimap[label].append(phinode) # Find incoming values for the Phi node for pred, _ in cfg.predecessors(label): incoming_def = self._find_def_from_bottom( states, pred, loc=loc, ) _logger.debug("incoming_def %s", incoming_def) phinode.value.incoming_values.append(incoming_def.target) phinode.value.incoming_blocks.append(pred) return phinode else: idom = cfg.immediate_dominators()[label] if idom == label: # We have searched to the top of the idom tree. # Since we still cannot find a definition, # we will warn. _warn_about_uninitialized_variable(states["varname"], loc) return UndefinedVariable _logger.debug("idom %s from label %s", idom, label) return self._find_def_from_bottom(states, idom, loc=loc)
def on_assign(self, states, assign): if assign.target.name == states['varname']: scope = states['scope'] defmap = states['defmap'] # Allow first assignment to retain the name if len(defmap) == 0: newtarget = assign.target _logger.debug("first assign: %s", newtarget) assert newtarget.name in scope.localvars else: newtarget = scope.redefine(assign.target.name, loc=assign.loc) assign = ir.Assign(target=newtarget, value=assign.value, loc=assign.loc) defmap[states['label']].append(assign) return assign
def _bypass_with_context(blocks, blk_start, blk_end, forwardvars): """Given the starting and ending block of the with-context, replaces the head block with a new block that jumps to the end. *blocks* is modified inplace. """ sblk = blocks[blk_start] scope = sblk.scope loc = sblk.loc newblk = ir.Block(scope=scope, loc=loc) for k, v in forwardvars.items(): newblk.append(ir.Assign(value=scope.get_exact(k), target=scope.get_exact(v), loc=loc)) newblk.append(ir.Jump(target=blk_end, loc=loc)) blocks[blk_start] = newblk
def store(self, value, name, redefine=False): """ Store *value* (a Expr or Var instance) into the variable named *name* (a str object). Returns the target variable. """ if redefine or self.current_block_offset in self.cfa.backbone: rename = not (name in self.code_cellvars) target = self.current_scope.redefine(name, loc=self.loc, rename=rename) else: target = self.current_scope.get_or_define(name, loc=self.loc) if isinstance(value, ir.Var): value = self.assigner.assign(value, target) stmt = ir.Assign(value=value, target=target, loc=self.loc) self.current_block.append(stmt) self.definitions[target.name].append(value) return target
def test_block(self): def gen_block(): parent = ir.Scope(None, self.loc1) tmp = ir.Block(parent, self.loc2) assign1 = ir.Assign(self.var_a, self.var_b, self.loc3) assign2 = ir.Assign(self.var_a, self.var_c, self.loc3) assign3 = ir.Assign(self.var_c, self.var_b, self.loc3) tmp.append(assign1) tmp.append(assign2) tmp.append(assign3) return tmp a = gen_block() b = gen_block() c = gen_block().append(ir.Assign(self.var_a, self.var_b, self.loc3)) self.check(a, same=[b], different=[c])
def get_column_read_nodes(c_type, cvar, arrow_readers_var, i): loc = cvar.loc func_text = 'def f(arrow_readers):\n' func_text += ' col_size = get_column_size_parquet(arrow_readers, {})\n'.format( i) # generate strings differently if c_type == string_type: # pass size for easier allocation and distributed analysis func_text += ' column = read_parquet_str(arrow_readers, {}, col_size)\n'.format( i) else: el_type = get_element_type(c_type) if el_type == repr(types.NPDatetime('ns')): func_text += ' column_tmp = np.empty(col_size, dtype=np.int64)\n' # TODO: fix alloc func_text += ' column = sdc.hiframes.api.ts_series_to_arr_typ(column_tmp)\n' else: func_text += ' column = np.empty(col_size, dtype=np.{})\n'.format( el_type) func_text += ' status = read_parquet(arrow_readers, {}, column, np.int32({}))\n'.format( i, _type_to_pq_dtype_number[el_type]) loc_vars = {} exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) size_func = loc_vars['f'] _, f_block = compile_to_numba_ir( size_func, { 'get_column_size_parquet': get_column_size_parquet, 'read_parquet': read_parquet, 'read_parquet_str': read_parquet_str, 'np': np, 'sdc': sdc, 'StringArray': StringArray }).blocks.popitem() replace_arg_nodes(f_block, [arrow_readers_var]) out_nodes = f_block.body[:-3] for stmt in reversed(out_nodes): if stmt.target.name.startswith("column"): assign = ir.Assign(stmt.target, cvar, loc) break out_nodes.append(assign) return out_nodes
def replace_return_with_setitem(self, blocks, index_vars, out_name): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ ret_blocks = [] for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): ret_blocks.append(label) # If 1D array then avoid the tuple construction. if len(index_vars) == 1: rvar = ir.Var(scope, out_name, loc) ivar = ir.Var(scope, index_vars[0], loc) new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc)) else: # Convert the string names of the index variables into # ir.Var's. var_index_vars = [] for one_var in index_vars: index_var = ir.Var(scope, one_var, loc) var_index_vars += [index_var] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) # Build a tuple from the index ir.Var's. tuple_call = ir.Expr.build_tuple(var_index_vars, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) rvar = ir.Var(scope, out_name, loc) # Write the return statements original value into # the array using the tuple index. si = ir.SetItem(rvar, s_index_var, stmt.value, loc) new_body.append(si) else: new_body.append(stmt) block.body = new_body return ret_blocks
def on_assign(self, states, assign): if assign.target.name == states['varname']: scope = states['scope'] defmap = states['defmap'] # Allow first assignment to retain the name if len(defmap) == 0: newtarget = assign.target _logger.debug("first assign: %s", newtarget) if newtarget.name not in scope.localvars: wmsg = f"variable {newtarget.name!r} is not in scope." warnings.warn( errors.NumbaIRAssumptionWarning(wmsg, loc=assign.loc)) else: newtarget = scope.redefine(assign.target.name, loc=assign.loc) assign = ir.Assign(target=newtarget, value=assign.value, loc=assign.loc) defmap[states['label']].append(assign) return assign
def apply(self): """ Rewrite all matching getitems as static_getitems. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if isinstance(inst, ir.Assign): expr = inst.value if expr in self.getitems: const = self.getitems[expr] new_expr = ir.Expr.static_getitem(value=expr.value, index=const, index_var=expr.index, loc=expr.loc) inst = ir.Assign(value=new_expr, target=inst.target, loc=inst.loc) new_block.append(inst) return new_block
def apply(self): """ Rewrite all matching getitems as static_getitems where the index is the literal value of the string. """ new_block = ir.Block(self.block.scope, self.block.loc) for inst in self.block.body: if isinstance(inst, ir.Assign): expr = inst.value if expr in self.getitems: const, lit_val = self.getitems[expr] new_expr = ir.Expr.static_getitem(value=expr.value, index=lit_val, index_var=expr.index, loc=expr.loc) self.calltypes[new_expr] = self.calltypes[expr] inst = ir.Assign(value=new_expr, target=inst.target, loc=inst.loc) new_block.append(inst) return new_block
def apply(self): """ Rewrite `var = call <print function>(...)` as a sequence of `print(...)` and `var = const(None)`. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if inst in self.prints: expr = self.prints[inst] print_node = ir.Print(args=expr.args, vararg=expr.vararg, loc=expr.loc) new_block.append(print_node) assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc), target=inst.target, loc=inst.loc) new_block.append(assign_node) else: new_block.append(inst) return new_block
def assign(self, rhs, typ, name="pf_assign") -> ir.Var: """Assign a value to a new variable Parameters ---------- rhs : object The value typ : types.Type type of the value name : str variable name to store to Returns ------- res : ir.Var """ loc = self._loc var = self._scope.redefine(name, loc) self._typemap[var.name] = typ assign = ir.Assign(rhs, var, loc) self._lowerer.lower_inst(assign) return var
def assign_inplace(self, rhs, typ, name) -> ir.Var: """Assign a value to a new variable or inplace if it already exist Parameters ---------- rhs : object The value typ : types.Type type of the value name : str variable name to store to Returns ------- res : ir.Var """ loc = self._loc var = ir.Var(self._scope, name, loc) assign = ir.Assign(rhs, var, loc) self._typemap.setdefault(var.name, typ) self._lowerer.lower_inst(assign) return var
def run_pass(self, state): # run as subpipeline from numba.core.compiler_machinery import PassManager pm = PassManager("subpipeline") pm.add_pass(PartialTypeInference, "performs partial type inference") pm.finalize() pm.run(state) mutated = False func_ir = state.func_ir for block in func_ir.blocks.values(): for assign in block.find_insts(ir.Assign): binop = assign.value if not (isinstance(binop, ir.Expr) and binop.op == 'binop'): continue if self.is_dtype_comparison(func_ir, binop): var = func_ir.get_assignee(binop) typ = state.typemap.get(var.name, None) if isinstance(typ, types.BooleanLiteral): loc = binop.loc rhs = ir.Const(typ.literal_value, loc) new_assign = ir.Assign(rhs, var, loc) # replace instruction block.insert_after(new_assign, assign) block.remove(assign) mutated = True if mutated: pm = PassManager("subpipeline") # rewrite consts / dead branch pruning pm.add_pass(DeadCodeElimination, "dead code elimination") pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants") pm.add_pass(DeadBranchPrune, "dead branch pruning") pm.finalize() pm.run(state) return mutated
def _handle_matches(self): """Iterate over the matches, trying to find which instructions should be rewritten, deleted, or moved. """ replace_map = {} dead_vars = set() used_vars = defaultdict(int) for instr in self.array_assigns.values(): expr = instr.value arr_inps = [] arr_expr = self._get_array_operator(expr), arr_inps new_expr = ir.Expr( op="arrayexpr", loc=expr.loc, expr=arr_expr, ty=self.typemap[instr.target.name], ) new_instr = ir.Assign(new_expr, instr.target, instr.loc) replace_map[instr] = new_instr self.array_assigns[instr.target.name] = new_instr for operand in self._get_operands(expr): operand_name = operand.name if operand_name in self.array_assigns: child_assign = self.array_assigns[operand_name] child_expr = child_assign.value child_operands = child_expr.list_vars() for operand in child_operands: used_vars[operand.name] += 1 arr_inps.append(self._translate_expr(child_expr)) if child_assign.target.is_temp: dead_vars.add(child_assign.target.name) replace_map[child_assign] = None elif operand_name in self.const_assigns: arr_inps.append(self.const_assigns[operand_name]) else: used_vars[operand.name] += 1 arr_inps.append(operand) return replace_map, dead_vars, used_vars
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed, typemap, calltypes): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT >= 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT >= 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError( "stencil kernel index is not " "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) stmt_index_var_typ = typemap[stmt_index_var.name] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(stmt_index_var_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher(sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [stmt_index_var, index_var], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [stmt_index_var_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: acc_call = ir.Expr.binop(operator.add, stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] stmt_index_var_typ = typemap[stmt_index_var.name] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append( ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var( "ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem( stmt_index_var, const_index_vars[dim], loc) new_body.append( ir.Assign(getitemcall, getitemvar, loc)) # Get the type of this particular part of the index tuple. one_index_typ = stmt_index_var_typ[dim] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(one_index_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher( sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [getitemvar, index_vars[dim]], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [one_index_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) else: acc_call = ir.Expr.binop( operator.add, getitemvar, index_vars[dim], loc) new_body.append( ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, s_index_var, loc), stmt.target, loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0, 0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "stencil kernel index is not constant," "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)
def gen_getitem(out_var, in_var, ind, calltypes, nodes): loc = out_var.loc getitem = ir.Expr.static_getitem(in_var, ind, None, loc) calltypes[getitem] = None nodes.append(ir.Assign(getitem, out_var, loc))