def op_STORE_SUBSCR(self, inst, target, index, value): index = self.get(index) target = self.get(target) value = self.get(value) stmt = ir.SetItem(target=target, index=index, value=value, loc=self.loc) self.current_block.append(stmt)
def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require(isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) dtype = array_typ.dtype seq, op = find_build_sequence(func_ir, list_var) size = len(seq) size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append(_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ,), {}) typemap[empty_func.name] = fnty # stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append(_new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True
def replace_return_with_setitem(self, blocks, index_vars, out_name): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ ret_blocks = [] for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): ret_blocks.append(label) # If 1D array then avoid the tuple construction. if len(index_vars) == 1: rvar = ir.Var(scope, out_name, loc) ivar = ir.Var(scope, index_vars[0], loc) new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc)) else: # Convert the string names of the index variables into # ir.Var's. var_index_vars = [] for one_var in index_vars: index_var = ir.Var(scope, one_var, loc) var_index_vars += [index_var] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) # Build a tuple from the index ir.Var's. tuple_call = ir.Expr.build_tuple(var_index_vars, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) rvar = ir.Var(scope, out_name, loc) # Write the return statements original value into # the array using the tuple index. si = ir.SetItem(rvar, s_index_var, stmt.value, loc) new_body.append(si) else: new_body.append(stmt) block.body = new_body return ret_blocks
def test_setitem(self): a = ir.SetItem(self.var_a, self.var_b, self.var_c, self.loc1) b = ir.SetItem(self.var_a, self.var_b, self.var_c, self.loc1) c = ir.SetItem(self.var_a, self.var_b, self.var_c, self.loc2) d = ir.SetItem(self.var_d, self.var_b, self.var_c, self.loc1) e = ir.SetItem(self.var_a, self.var_d, self.var_c, self.loc1) f = ir.SetItem(self.var_a, self.var_b, self.var_d, self.loc1) self.check(a, same=[b, c], different=[d, e, f])
def op_STORE_SLICE_3(self, inst, base, start, stop, value, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt)
def op_STORE_SLICE_0(self, inst, base, value, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt)
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate( stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var( "$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind( start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append(numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var( "$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple(types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError("cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype ) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0, ir.Loc("stencilparfor_dummy", -1))) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_blocks, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr.name) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # replace return value to setitem to output array return_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(return_node, ir.Return) last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() while not isinstance(last_node, ir.Assign) or not isinstance( last_node.value, ir.Expr) or not last_node.value.op == 'cast': last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(last_node, ir.Assign) assert isinstance(last_node.value, ir.Expr) assert last_node.value.op == 'cast' return_val = last_node.value.value # create parfor index var if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) stencil_blocks[max( stencil_blocks.keys())].body.append(tuple_assign) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) setitem_call = ir.SetItem(out_arr, parfor_ind_var, return_val, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[max(stencil_blocks.keys())].body.append(setitem_call) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set) parfor.patterns = [('stencil', [start_lengths, end_lengths])] gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def _inline_arraycall(func_ir, cfg, visited, loop, enable_prange=False): """Look for array(list) call in the exit block of a given loop, and turn list operations into array operations in the loop if the following conditions are met: 1. The exit block contains an array call on the list; 2. The list variable is no longer live after array call; 3. The list is created in the loop entry block; 4. The loop is created from an range iterator whose length is known prior to the loop; 5. There is only one list_append operation on the list variable in the loop body; 6. The block that contains list_append dominates the loop head, which ensures list length is the same as loop length; If any condition check fails, no modification will be made to the incoming IR. """ debug_print = _make_debug_print("inline_arraycall") # There should only be one loop exit require(len(loop.exits) == 1) exit_block = next(iter(loop.exits)) list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block]) # check if dtype is present in array call dtype_def = None dtype_mod_def = None if 'dtype' in array_kws: require(isinstance(array_kws['dtype'], ir.Var)) # We require that dtype argument to be a constant of getattr Expr, and we'll # remember its definition for later use. dtype_def = get_definition(func_ir, array_kws['dtype']) require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr') dtype_mod_def = get_definition(func_ir, dtype_def.value) list_var_def = get_definition(func_ir, list_var) debug_print("list_var = ", list_var, " def = ", list_var_def) if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast': list_var_def = get_definition(func_ir, list_var_def.value) # Check if the definition is a build_list require(isinstance(list_var_def, ir.Expr) and list_var_def.op == 'build_list') # Look for list_append in "last" block in loop body, which should be a block that is # a post-dominator of the loop header. list_append_stmts = [] for label in loop.body: # We have to consider blocks of this loop, but not sub-loops. # To achieve this, we require the set of "in_loops" of "label" to be visited loops. in_visited_loops = [l.header in visited for l in cfg.in_loops(label)] if not all(in_visited_loops): continue block = func_ir.blocks[label] debug_print("check loop body block ", label) for stmt in block.find_insts(ir.Assign): lhs = stmt.target expr = stmt.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = get_definition(func_ir, expr.func) if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \ and func_def.attr == 'append': list_def = get_definition(func_ir, func_def.value) debug_print("list_def = ", list_def, list_def == list_var_def) if list_def == list_var_def: # found matching append call list_append_stmts.append((label, block, stmt)) # Require only one list_append, otherwise we won't know the indices require(len(list_append_stmts) == 1) append_block_label, append_block, append_stmt = list_append_stmts[0] # Check if append_block (besides loop entry) dominates loop header. # Since CFG doesn't give us this info without loop entry, we approximate # by checking if the predecessor set of the header block is the same # as loop_entries plus append_block, which is certainly more restrictive # than necessary, and can be relaxed if needed. preds = set(l for l, b in cfg.predecessors(loop.header)) debug_print("preds = ", preds, (loop.entries | set([append_block_label]))) require(preds == (loop.entries | set([append_block_label]))) # Find iterator in loop header iter_vars = [] iter_first_vars = [] loop_header = func_ir.blocks[loop.header] for stmt in loop_header.find_insts(ir.Assign): expr = stmt.value if isinstance(expr, ir.Expr): if expr.op == 'iternext': iter_def = get_definition(func_ir, expr.value) debug_print("iter_def = ", iter_def) iter_vars.append(expr.value) elif expr.op == 'pair_first': iter_first_vars.append(stmt.target) # Require only one iterator in loop header require(len(iter_vars) == 1 and len(iter_first_vars) == 1) iter_var = iter_vars[0] # variable that holds the iterator object iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator # Final requirement: only one loop entry, and we're going to modify it by: # 1. replacing the list definition with an array definition; # 2. adding a counter for the array iteration. require(len(loop.entries) == 1) loop_entry = func_ir.blocks[next(iter(loop.entries))] terminator = loop_entry.terminator scope = loop_entry.scope loc = loop_entry.loc stmts = [] removed = [] def is_removed(val, removed): if isinstance(val, ir.Var): for x in removed: if x.name == val.name: return True return False # Skip list construction and skip terminator, add the rest to stmts for i in range(len(loop_entry.body) - 1): stmt = loop_entry.body[i] if isinstance(stmt, ir.Assign) and (stmt.value == list_def or is_removed(stmt.value, removed)): removed.append(stmt.target) else: stmts.append(stmt) debug_print("removed variables: ", removed) # Define an index_var to index the array. # If the range happens to be single step ranges like range(n), or range(m, n), # then the index_var correlates to iterator index; otherwise we'll have to # define a new counter. range_def = guard(_find_iter_range, func_ir, iter_var) index_var = ir.Var(scope, mk_unique_var("index"), loc) if range_def and range_def[0] == 0: # iterator starts with 0, index_var can just be iter_first_var index_var = iter_first_var else: # index_var = -1 # starting the index with -1 since it will incremented in loop header stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc)) # Insert statement to get the size of the loop iterator size_var = ir.Var(scope, mk_unique_var("size"), loc) if range_def: start, stop, range_func_def = range_def if start == 0: size_val = stop else: size_val = ir.Expr.binop(fn='-', lhs=stop, rhs=start, loc=loc) # we can parallelize this loop if enable_prange = True, by changing # range function from range, to prange. if enable_prange and isinstance(range_func_def, ir.Global): range_func_def.name = 'internal_prange' range_func_def.value = internal_prange else: len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc) stmts.append(_new_definition(func_ir, len_func_var, ir.Global('range_iter_len', range_iter_len, loc=loc), loc)) size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc) stmts.append(_new_definition(func_ir, size_var, size_val, loc)) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # Insert array allocation array_var = ir.Var(scope, mk_unique_var("array"), loc) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) if dtype_def and dtype_mod_def: # when dtype is present, we'll call emtpy with dtype dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc) dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc) stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)) stmts.append(_new_definition(func_ir, dtype_var, ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc)) stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) array_kws = [('dtype', dtype_var)] else: # otherwise we'll call unsafe_empty_inferred stmts.append(_new_definition(func_ir, empty_func, ir.Global('unsafe_empty_inferred', unsafe_empty_inferred, loc=loc), loc)) array_kws = [] # array_var = empty_func(size_tuple_var) stmts.append(_new_definition(func_ir, array_var, ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc)) # Add back removed just in case they are used by something else for var in removed: stmts.append(_new_definition(func_ir, var, array_var, loc)) # Add back terminator stmts.append(terminator) # Modify loop_entry loop_entry.body = stmts if range_def: if range_def[0] != 0: # when range doesn't start from 0, index_var becomes loop index # (iter_first_var) minus an offset (range_def[0]) terminator = loop_header.terminator assert(isinstance(terminator, ir.Branch)) # find the block in the loop body that header jumps to block_id = terminator.truebr blk = func_ir.blocks[block_id] loc = blk.loc blk.body.insert(0, _new_definition(func_ir, index_var, ir.Expr.binop(fn='-', lhs=iter_first_var, rhs=range_def[0], loc=loc), loc)) else: # Insert index_var increment to the end of loop header loc = loop_header.loc terminator = loop_header.terminator stmts = loop_header.body[0:-1] next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc) one = ir.Var(scope, mk_unique_var("one"), loc) # one = 1 stmts.append(_new_definition(func_ir, one, ir.Const(value=1,loc=loc), loc)) # next_index_var = index_var + 1 stmts.append(_new_definition(func_ir, next_index_var, ir.Expr.binop(fn='+', lhs=index_var, rhs=one, loc=loc), loc)) # index_var = next_index_var stmts.append(_new_definition(func_ir, index_var, next_index_var, loc)) stmts.append(terminator) loop_header.body = stmts # In append_block, change list_append into array assign for i in range(len(append_block.body)): if append_block.body[i] == append_stmt: debug_print("Replace append with SetItem") append_block.body[i] = ir.SetItem(target=array_var, index=index_var, value=append_stmt.value.args[0], loc=append_stmt.loc) # replace array call, by changing "a = array(b)" to "a = b" stmt = func_ir.blocks[exit_block].body[array_call_index] # stmt can be either array call or SetItem, we only replace array call if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): stmt.value = array_var func_ir._definitions[stmt.target.name] = [stmt.value] return True
def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require( isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] # Get the type of the array to be created. array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) # Get the element type of the array to be created. dtype = array_typ.dtype # Get the sequence of operations to provide values to the new array. seq, _ = find_build_sequence(func_ir, list_var) size = len(seq) # Create a tuple to pass to empty below to specify the new array size. size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append( _new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append( _new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # The general approach is to create an empty array and then fill # the elements in one-by-one from their specificiation. # Get the numpy type to pass to empty. nptype = types.DType(dtype) # Create a variable to hold the numpy empty function. empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ, ), {'dtype': nptype}) typemap[empty_func.name] = fnty stmts.append( _new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) # We pass two arguments to empty, first the size tuple and second # the dtype of the new array. Here, we created typ_var which is # the dtype argument of the new array. typ_var in turn is created # by getattr of the dtype string on the numpy module. # Create var for numpy module. g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(np) g_np = ir.Global('np', np, loc) stmts.append(_new_definition(func_ir, g_np_var, g_np, loc)) # Create var for result of numpy.<dtype>. typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) typemap[typ_var.name] = nptype dtype_str = str(dtype) if dtype_str == 'bool': dtype_str = 'bool_' # Get dtype attribute of numpy module. np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc)) # Create the call to numpy.empty passing the size tuple and dtype var. empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) # Fill in the new empty array one-by-one. for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append( _new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True