示例#1
0
    def _handle_pq_table(self, lhs, rhs):
        if guard(find_callname, self.func_ir,
                 rhs) == ('read_table', 'pyarrow.parquet'):
            if len(rhs.args) != 1:
                raise ValueError("Invalid read_table() arguments")
            self.arrow_tables[lhs.name] = rhs.args[0]
            return []
        # match t.to_pandas()
        func_def = guard(get_definition, self.func_ir, rhs.func)
        assert func_def is not None
        # rare case where function variable is assigned to a new variable
        if isinstance(func_def, ir.Var):
            rhs.func = func_def
            return self._handle_pq_table(lhs, rhs)
        if (isinstance(func_def, ir.Expr) and func_def.op == 'getattr'
                and func_def.value.name in self.arrow_tables
                and func_def.attr == 'to_pandas'):
            table_types = None
            if lhs.name in self.locals:
                table_types = self.locals[lhs.name]
                self.locals.pop(lhs.name)
            if lhs.name in self.reverse_copies and self.reverse_copies[
                    lhs.name] in self.locals:
                table_types = self.locals[self.reverse_copies[lhs.name]]
                self.locals.pop(self.reverse_copies[lhs.name])

            col_items, nodes = self.pq_handler.gen_parquet_read(
                self.arrow_tables[func_def.value.name], table_types)
            self.df_vars[lhs.name] = self._process_df_build_map(col_items)
            self._update_df_cols()
            return nodes
        return None
示例#2
0
    def _handle_str_contains(self, lhs, rhs):
        """
        Handle string contains like:
          B = df.column.str.contains('oo*', regex=True)
        """
        func_def = guard(get_definition, self.func_ir, rhs.func)
        assert func_def is not None
        # rare case where function variable is assigned to a new variable
        if isinstance(func_def, ir.Var):
            rhs.func = func_def
            return self._handle_str_contains(lhs, rhs)
        str_col = guard(self._get_str_contains_col, func_def)
        if str_col is None:
            return None
        kws = dict(rhs.kws)
        pat = rhs.args[0]
        regex = True  # default regex arg is True
        if 'regex' in kws:
            regex = get_constant(self.func_ir, kws['regex'], regex)
        if regex:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_regex(str_arr, pat)
        else:

            def f(str_arr, pat):
                hpat.hiframes_api.str_contains_noregex(str_arr, pat)

        f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [str_col, pat])
        nodes = f_block.body[:-3]  # remove none return
        nodes[-1].target = lhs
        return nodes
示例#3
0
    def test_find_const_global(self):
        """
        Test find_const() for values in globals (ir.Global) and freevars
        (ir.FreeVar) that are considered constants for compilation.
        """
        FREEVAR_C = 12

        def foo(a):
            b = GLOBAL_B
            c = FREEVAR_C
            return a + b + c

        f_ir = compiler.run_frontend(foo)
        block = f_ir.blocks[0]
        const_b = None
        const_c = None

        for inst in block.body:
            if isinstance(inst, ir.Assign) and inst.target.name == 'b':
                const_b = ir_utils.guard(
                    ir_utils.find_const, f_ir, inst.target)
            if isinstance(inst, ir.Assign) and inst.target.name == 'c':
                const_c = ir_utils.guard(
                    ir_utils.find_const, f_ir, inst.target)

        self.assertEqual(const_b, GLOBAL_B)
        self.assertEqual(const_c, FREEVAR_C)
def _get_const_two_irs(ir1, ir2, var):
    """get constant in either of two IRs if available
    otherwise, throw GuardException
    """
    var_const = guard(find_const, ir1, var)
    if var_const is not None:
        return var_const
    var_const = guard(find_const, ir2, var)
    if var_const is not None:
        return var_const
    raise GuardException
示例#5
0
def _get_const_two_irs(ir1, ir2, var):
    """get constant in either of two IRs if available
    otherwise, throw GuardException
    """
    var_const = guard(find_const, ir1, var)
    if var_const is not None:
        return var_const
    var_const = guard(find_const, ir2, var)
    if var_const is not None:
        return var_const
    raise GuardException
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i in range(len(block.body)):
                instr = block.body[i]
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        func_def = guard(get_definition, self.func_ir, expr.func)
                        debug_print("found call to ", expr.func, " def = ", func_def)
                        if isinstance(func_def, ir.Expr) and func_def.op == "make_function":
                            new_blocks = self.inline_closure_call(block, i, func_def)
                            for block in new_blocks:
                                work_list.append(block)
                            modified = True
                            # current block is modified, skip the rest
                            break

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                        self.flags.auto_parallel):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#7
0
文件: hiframes.py 项目: zmyer/hpat
 def _handle_pd_DataFrame(self, lhs, rhs):
     if guard(find_callname, self.func_ir, rhs) == ('DataFrame', 'pandas'):
         if len(rhs.args) != 1:
             raise ValueError("Invalid DataFrame() arguments (one expected)")
         arg_def = guard(get_definition, self.func_ir, rhs.args[0])
         if not isinstance(arg_def, ir.Expr) or arg_def.op != 'build_map':
             raise ValueError("Invalid DataFrame() arguments (map expected)")
         out, items = self._fix_df_arrays(arg_def.items)
         self.df_vars[lhs.name] = self._process_df_build_map(items)
         self._update_df_cols()
         # remove DataFrame call
         return out
     return None
示例#8
0
def _find_arraycall(func_ir, block):
    """Look for statement like "x = numpy.array(y)" or "x[..] = y"
    immediately after the closure call that creates list y (the i-th
    statement in block).  Return the statement index if found, or
    raise GuardException.
    """
    array_var = None
    array_call_index = None
    list_var_dead_after_array_call = False
    list_var = None

    i = 0
    while i < len(block.body):
        instr = block.body[i]
        if isinstance(instr, ir.Del):
            # Stop the process if list_var becomes dead
            if list_var and array_var and instr.value == list_var.name:
                list_var_dead_after_array_call = True
                break
            pass
        elif isinstance(instr, ir.Assign):
            # Found array_var = array(list_var)
            lhs  = instr.target
            expr = instr.value
            if (guard(find_callname, func_ir, expr) == ('array', 'numpy') and
                isinstance(expr.args[0], ir.Var)):
                list_var = expr.args[0]
                array_var = lhs
                array_stmt_index = i
                array_kws = dict(expr.kws)
        elif (isinstance(instr, ir.SetItem) and
              isinstance(instr.value, ir.Var) and
              not list_var):
            list_var = instr.value
            # Found array_var[..] = list_var, the case for nested array
            array_var = instr.target
            array_def = get_definition(func_ir, array_var)
            require(guard(_find_unsafe_empty_inferred, func_ir, array_def))
            array_stmt_index = i
            array_kws = {}
        else:
            # Bail out otherwise
            break
        i = i + 1
    # require array_var is found, and list_var is dead after array_call.
    require(array_var and list_var_dead_after_array_call)
    _make_debug_print("find_array_call")(block.body[array_stmt_index])
    return list_var, array_stmt_index, array_kws
示例#9
0
 def match(self, func_ir, block, typemap, calltypes):
     if len(calltypes) == 0:
         return False
     self.crnt_block = block
     self.new_body = guard(_inline_const_arraycall, block, func_ir,
                           self.typingctx, typemap, calltypes)
     return self.new_body != None
示例#10
0
 def _handle_merge(self, lhs, rhs):
     if guard(find_callname, self.func_ir, rhs) == ('merge', 'pandas'):
         if len(rhs.args) < 2:
             raise ValueError("left and right arguments required for merge")
         left_df = rhs.args[0]
         right_df = rhs.args[1]
         kws = dict(rhs.kws)
         if 'on' in kws:
             left_on = get_constant(self.func_ir, kws['on'], None)
             right_on = left_on
         else:  # pragma: no cover
             if 'left_on' not in kws or 'right_on' not in kws:
                 raise ValueError("merge 'on' or 'left_on'/'right_on'"
                                  "arguments required")
             left_on = get_constant(self.func_ir, kws['left_on'], None)
             right_on = get_constant(self.func_ir, kws['right_on'], None)
         if left_on is None or right_on is None:
             raise ValueError("merge key values should be constant strings")
         scope = lhs.scope
         loc = lhs.loc
         self.df_vars[lhs.name] = {}
         # add columns from left to output
         for col, _ in self.df_vars[left_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         # add columns from right to output
         for col, _ in self.df_vars[right_df.name].items():
             self.df_vars[lhs.name][col] = ir.Var(scope, mk_unique_var(col),
                                                  loc)
         self._update_df_cols()
         return [
             hiframes_join.Join(lhs.name, left_df.name, right_df.name,
                                left_on, right_on, self.df_vars, lhs.loc)
         ]
     return None
示例#11
0
    def test_inline_update_target_def(self):

        def test_impl(a):
            if a == 1:
                b = 2
            else:
                b = 3
            return b

        func_ir = compiler.run_frontend(test_impl)
        blocks = list(func_ir.blocks.values())
        for block in blocks:
            for i, stmt in enumerate(block.body):
                # match b = 2 and replace with lambda: 2
                if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var)
                        and guard(find_const, func_ir, stmt.value) == 2):
                    # replace expr with a dummy call
                    func_ir._definitions[stmt.target.name].remove(stmt.value)
                    stmt.value = ir.Expr.call(ir.Var(block.scope, "myvar", loc=stmt.loc), (), (), stmt.loc)
                    func_ir._definitions[stmt.target.name].append(stmt.value)
                    #func = g.py_func#
                    inline_closure_call(func_ir, {}, block, i, lambda: 2)
                    break

        self.assertEqual(len(func_ir._definitions['b']), 2)
示例#12
0
    def test_inline_var_dict_ret(self):
        # make sure inline_closure_call returns the variable replacement dict
        # and it contains the original variable name used in locals
        @numba.njit(locals={'b': numba.float64})
        def g(a):
            b = a + 1
            return b

        def test_impl():
            return g(1)

        func_ir = compiler.run_frontend(test_impl)
        blocks = list(func_ir.blocks.values())
        for block in blocks:
            for i, stmt in enumerate(block.body):
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'):
                    func_def = guard(get_definition, func_ir, stmt.value.func)
                    if (isinstance(func_def, (ir.Global, ir.FreeVar))
                            and isinstance(func_def.value, CPUDispatcher)):
                        py_func = func_def.value.py_func
                        _, var_map = inline_closure_call(
                            func_ir, py_func.__globals__, block, i, py_func)
                        break

        self.assertTrue('b' in var_map)
示例#13
0
    def gen_parquet_read(self, file_name, table_types):
        import pyarrow.parquet as pq
        scope = file_name.scope
        loc = file_name.loc

        if table_types is None:
            fname_def = guard(get_definition, self.func_ir, file_name)
            if not isinstance(fname_def, ir.Const) or not isinstance(
                    fname_def.value, str):
                raise ValueError("Parquet schema not available")
            file_name_str = fname_def.value
            col_names, col_types = parquet_file_schema(file_name_str)
        else:
            col_names = list(table_types.keys())
            col_types = list(table_types.values())

        out_nodes = []
        col_items = []
        for i, cname in enumerate(col_names):
            # get column type from schema
            c_type = col_types[i]
            # create a variable for column and assign type
            varname = mk_unique_var(cname)
            #self.locals[varname] = c_type
            cvar = ir.Var(scope, varname, loc)
            col_items.append((cname, cvar))

            out_nodes += get_column_read_nodes(c_type, cvar, file_name, i)

        return col_items, out_nodes
示例#14
0
def get_tuple_items(var, block, func_ir):
    """
    Returns tuple items. If tuple is constant creates and returns constants
    """
    def wrap_into_var(value, block, func_ir, loc):
        stmt = declare_constant(value, block, func_ir, loc)

        return stmt.target

    val = guard(find_const, func_ir, var)
    if val is not None:
        if isinstance(val, tuple):
            return [wrap_into_var(v, block, func_ir, var.loc) for v in val]

        return None

    try:
        rhs = func_ir.get_definition(var)
        if isinstance(rhs, Expr):
            if rhs.op == 'build_tuple':
                return list(rhs.items)
    except Exception:
        pass

    return None
示例#15
0
def inline_calls(func_ir):
    work_list = list(func_ir.blocks.items())
    while work_list:
        label, block = work_list.pop()
        for i, instr in enumerate(block.body):
            if isinstance(instr, ir.Assign):
                lhs = instr.target
                expr = instr.value
                if isinstance(expr, ir.Expr) and expr.op == 'call':
                    func_def = guard(get_definition, func_ir, expr.func)
                    if isinstance(func_def, ir.Global) and isinstance(
                            func_def.value, CPUDispatcher):
                        py_func = func_def.value.py_func
                        new_blocks = inline_closure_call(
                            func_ir,
                            func_ir.func_id.func.__globals__,
                            block,
                            i,
                            py_func,
                            work_list=work_list)
                        # for block in new_blocks:
                        #     work_list.append(block)
                        # current block is modified, skip the rest
                        # (included in new blocks)
                        break
示例#16
0
文件: transforms.py 项目: esc/numba
    def get_ctxmgr_obj(var_ref):
        """Return the context-manager object and extra info.

        The extra contains the arguments if the context-manager is used
        as a call.
        """
        # If the contextmanager used as a Call
        dfn = func_ir.get_definition(var_ref)
        if isinstance(dfn, ir.Expr) and dfn.op == 'call':
            args = [get_var_dfn(x) for x in dfn.args]
            kws = {k: get_var_dfn(v) for k, v in dfn.kws}
            extra = {'args': args, 'kwargs': kws}
            var_ref = dfn.func
        else:
            extra = None

        ctxobj = ir_utils.guard(ir_utils.find_global_value, func_ir, var_ref)

        # check the contextmanager object
        if ctxobj is ir.UNDEFINED:
            raise errors.CompilerError(
                "Undefined variable used as context manager",
                loc=blocks[blk_start].loc,
                )

        if ctxobj is None:
            raise errors.CompilerError(_illegal_cm_msg, loc=dfn.loc)

        return ctxobj, extra
示例#17
0
    def test_inline_var_dict_ret(self):
        # make sure inline_closure_call returns the variable replacement dict
        # and it contains the original variable name used in locals
        @numba.njit(locals={'b': numba.float64})
        def g(a):
            b = a + 1
            return b

        def test_impl():
            return g(1)

        func_ir = compiler.run_frontend(test_impl)
        blocks = list(func_ir.blocks.values())
        for block in blocks:
            for i, stmt in enumerate(block.body):
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'):
                    func_def = guard(get_definition, func_ir, stmt.value.func)
                    if (isinstance(func_def, (ir.Global, ir.FreeVar))
                            and isinstance(func_def.value, CPUDispatcher)):
                        py_func = func_def.value.py_func
                        _, var_map = inline_closure_call(
                            func_ir, py_func.__globals__, block, i, py_func)
                        break

        self.assertTrue('b' in var_map)
示例#18
0
    def match(self, func_ir, block, typemap, calltypes):
        # TODO: 1. save instructions of build_map, build_list for read_csv params
        # 2. check that vars are used only in read_csv
        # 3. replace vars with build_tuple inplace

        self.func_ir = func_ir
        self.block = block
        self.consts = consts = {}

        # Find all assignments with a right-hand read_csv() call
        for inst in find_operations(block=block, op_name='call'):
            expr = inst.value
            call = guard(find_callname, func_ir, expr)
            if call not in self._pandas_read_csv_calls:
                continue
            # collect constant parameters with type list and dict
            # in order to replace with tuple
            for key, var in expr.kws:
                if key not in self._read_csv_const_args:
                    continue
                try:
                    const = func_ir.infer_constant(var)
                except errors.ConstantInferenceError:
                    try:
                        const = ConstantInference(func_ir).infer_constant(
                            var.name)
                    except errors.ConstantInferenceError:
                        continue
                if isinstance(const, (list, dict)):
                    consts.setdefault(inst, {})[key] = const

        return len(consts) > 0
示例#19
0
    def _analyze_call_array(self, lhs, arr, func_name, args, array_dists):
        """analyze distributions of array functions (arr.func_name)
        """
        if func_name == 'transpose':
            if len(args) == 0:
                raise ValueError("Transpose with no arguments is not"
                                 " supported")
            in_arr_name = arr.name
            arg0 = guard(get_constant, self.func_ir, args[0])
            if isinstance(arg0, tuple):
                arg0 = arg0[0]
            if arg0 != 0:
                raise ValueError("Transpose with non-zero first argument"
                                 " is not supported")
            self._meet_array_dists(lhs, in_arr_name, array_dists)
            return

        if func_name in ('astype', 'reshape', 'copy'):
            in_arr_name = arr.name
            self._meet_array_dists(lhs, in_arr_name, array_dists)
            # TODO: support 1D_Var reshape
            if func_name == 'reshape' and array_dists[
                    lhs] == Distribution.OneD_Var:
                self._analyze_call_set_REP(lhs, args, array_dists)
            return

        # Array.tofile() is supported for all distributions
        if func_name == 'tofile':
            return

        # set REP if not found
        self._analyze_call_set_REP(lhs, args, array_dists)
示例#20
0
    def run(self):
        """ Finds all calls to StencilFuncs in the IR and converts them to parfor.
        """
        from numba.stencil import StencilFunc

        # Get all the calls in the function IR.
        call_table, _ = get_call_table(self.func_ir.blocks)
        stencil_calls = []
        stencil_dict = {}
        for call_varname, call_list in call_table.items():
            if isinstance(call_list[0], StencilFunc):
                # Remember all calls to StencilFuncs.
                stencil_calls.append(call_varname)
                stencil_dict[call_varname] = call_list[0]
        if not stencil_calls:
            return  # return early if no stencil calls found

        # find and transform stencil calls
        for label, block in self.func_ir.blocks.items():
            for i, stmt in reversed(list(enumerate(block.body))):
                # Found a call to a StencilFunc.
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'
                        and stmt.value.func.name in stencil_calls):
                    kws = dict(stmt.value.kws)
                    # Create dictionary of input argument number to
                    # the argument itself.
                    input_dict = {
                        i: stmt.value.args[i]
                        for i in range(len(stmt.value.args))
                    }
                    in_args = stmt.value.args
                    arg_typemap = tuple(self.typemap[i.name] for i in in_args)
                    for arg_type in arg_typemap:
                        if isinstance(arg_type, types.BaseTuple):
                            raise ValueError("Tuple parameters not supported " \
                                "for stencil kernels in parallel=True mode.")

                    out_arr = kws.get('out')

                    # Get the StencilFunc object corresponding to this call.
                    sf = stencil_dict[stmt.value.func.name]
                    stencil_blocks, rt, arg_to_arr_dict = get_stencil_blocks(
                        sf, self.typingctx, arg_typemap, block.scope,
                        block.loc, input_dict, self.typemap, self.calltypes)
                    index_offsets = sf.options.get('index_offsets', None)
                    gen_nodes = self._mk_stencil_parfor(
                        label, in_args, out_arr, stencil_blocks, index_offsets,
                        stmt.target, rt, sf, arg_to_arr_dict)
                    block.body = block.body[:i] + gen_nodes + block.body[i +
                                                                         1:]
                # Found a call to a stencil via numba.stencil().
                elif (isinstance(stmt, ir.Assign)
                      and isinstance(stmt.value, ir.Expr)
                      and stmt.value.op == 'call'
                      and guard(find_callname, self.func_ir,
                                stmt.value) == ('stencil', 'numba')):
                    # remove dummy stencil() call
                    stmt.value = ir.Const(0, stmt.loc)
示例#21
0
 def _handle_rolling_setup(self, lhs, rhs):
     """
     Handle Series rolling calls like:
       r = df.column.rolling(3)
     """
     func_def = guard(get_definition, self.func_ir, rhs.func)
     assert func_def is not None
     # rare case where function variable is assigned to a new variable
     if isinstance(func_def, ir.Var):
         rhs.func = func_def
         return self._handle_rolling_setup(lhs, rhs)
     # df.column.rolling
     if (isinstance(func_def, ir.Expr) and func_def.op == 'getattr'
             and func_def.value.name in self.df_cols
             and func_def.attr == 'rolling'):
         center = False
         kws = dict(rhs.kws)
         if rhs.args:
             window = rhs.args[0]
         elif 'window' in kws:
             window = kws['window']
         else:
             raise ValueError("window argument to rolling() required")
         window = get_constant(self.func_ir, window, window)
         if 'center' in kws:
             center = get_constant(self.func_ir, kws['center'], center)
         self.rolling_calls[lhs.name] = [func_def.value, window, center]
         return []  # remove
     return None
示例#22
0
文件: compiler.py 项目: samaid/sdc
def inline_calls(func_ir, _locals):
    work_list = list(func_ir.blocks.items())
    while work_list:
        label, block = work_list.pop()
        for i, instr in enumerate(block.body):
            if isinstance(instr, ir.Assign):
                lhs = instr.target
                expr = instr.value
                if isinstance(expr, ir.Expr) and expr.op == 'call':
                    func_def = guard(get_definition, func_ir, expr.func)
                    if (isinstance(func_def, (ir.Global, ir.FreeVar))
                            and isinstance(func_def.value, CPUDispatcher)):
                        py_func = func_def.value.py_func
                        inline_out = inline_closure_call(
                            func_ir, py_func.__globals__, block, i, py_func,
                            work_list=work_list)

                        # TODO remove if when inline_closure_call() output fix
                        # is merged in Numba
                        if isinstance(inline_out, tuple):
                            var_dict = inline_out[1]
                            # TODO: update '##distributed' and '##threaded' in _locals
                            _locals.update((var_dict[k].name, v)
                                           for k, v in func_def.value.locals.items()
                                           if k in var_dict)
                        # for block in new_blocks:
                        #     work_list.append(block)
                        # current block is modified, skip the rest
                        # (included in new blocks)
                        break

    # sometimes type inference fails after inlining since blocks are inserted
    # at the end and there are agg constraints (categorical_split case)
    # CFG simplification fixes this case
    func_ir.blocks = ir_utils.simplify_CFG(func_ir.blocks)
示例#23
0
文件: pio.py 项目: feitianyiren/hpat
    def _infer_h5_typ(self, rhs):
        # infer the type if it is of the from f['A']['B'][:] or f['A'][b,:]
        # with constant filename
        # TODO: static_getitem has index_var for sure?
        # make sure it's slice, TODO: support non-slice like integer
        require(rhs.op in ('getitem', 'static_getitem'))
        # XXX can't know the type of index here especially if it is bool arr
        # make sure it is not string (we're not in the middle a select chain)
        index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var
        index_val = guard(find_const, self.func_ir, index_var)
        require(not isinstance(index_val, str))
        # index_def = get_definition(self.func_ir, index_var)
        # require(isinstance(index_def, ir.Expr) and index_def.op == 'call')
        # require(find_callname(self.func_ir, index_def) == ('slice', 'builtins'))
        # collect object names until the call
        val_def = rhs
        obj_name_list = []
        while True:
            val_def = get_definition(self.func_ir, val_def.value)
            require(isinstance(val_def, ir.Expr))
            if val_def.op == 'call':
                return self._get_h5_type_file(val_def, obj_name_list)

            # object_name should be constant str
            require(val_def.op in ('getitem', 'static_getitem'))
            val_index_var = val_def.index if val_def.op == 'getitem' else val_def.index_var
            obj_name = find_str_const(self.func_ir, val_index_var)
            obj_name_list.append(obj_name)
示例#24
0
文件: pio.py 项目: zmyer/hpat
 def _handle_f_close_call(self, stmt, lhs_var, rhs):
     func_def = guard(get_definition, self.func_ir, rhs.func)
     assert func_def is not None
     # rare case where function variable is assigned to a new variable
     if isinstance(func_def, ir.Var):
         rhs.func = func_def
         return self._handle_f_close_call(stmt, lhs_var, rhs)
     if (isinstance(func_def, ir.Expr) and func_def.op == 'getattr'
             and func_def.value.name in self.h5_files
             and func_def.attr == 'close'):
         f_id = func_def.value
         scope = lhs_var.scope
         loc = lhs_var.loc
         # g_pio_var = Global(hpat.pio_api)
         g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
         g_pio = ir.Global('pio_api', hpat.pio_api, loc)
         g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
         # attr call: h5close_attr = getattr(g_pio_var, h5close)
         h5close_attr_call = ir.Expr.getattr(g_pio_var, "h5close", loc)
         attr_var = ir.Var(scope, mk_unique_var("$h5close_attr"), loc)
         attr_assign = ir.Assign(h5close_attr_call, attr_var, loc)
         # h5close(f_id)
         close_call = ir.Expr.call(attr_var, [f_id], (), loc)
         close_assign = ir.Assign(close_call, lhs_var, loc)
         return [g_pio_assign, attr_assign, close_assign]
     return None
示例#25
0
    def _handle_str_contains(self, lhs, rhs, assign, call_table):
        fname = guard(find_callname, self.func_ir, rhs)
        if fname is None:
            return None

        if fname == ('str_contains_regex', 'hpat.hiframes_api'):
            comp_func = 'hpat.str_ext.contains_regex'
        elif fname == ('str_contains_noregex', 'hpat.hiframes_api'):
            comp_func = 'hpat.str_ext.contains_noregex'
        else:
            return None

        str_arr = rhs.args[0]
        pat = rhs.args[1]
        func_text = 'def f(str_arr, pat):\n'
        func_text += '  l = len(str_arr)\n'
        func_text += '  S = np.empty(l, dtype=np.bool_)\n'
        func_text += '  for i in numba.parfor.internal_prange(l):\n'
        func_text += '    S[i] = {}(str_arr[i], pat)\n'.format(comp_func)
        loc_vars = {}
        exec(func_text, {}, loc_vars)
        f = loc_vars['f']
        f_blocks = compile_to_numba_ir(
            f, {
                'numba': numba,
                'np': np,
                'hpat': hpat
            }, self.typingctx,
            (self.typemap[str_arr.name], self.typemap[pat.name]), self.typemap,
            self.calltypes).blocks
        replace_arg_nodes(f_blocks[min(f_blocks.keys())], [str_arr, pat])
        # replace call with result of parfor (S)
        # S is target of last statement in 1st block of f
        assign.value = f_blocks[min(f_blocks.keys())].body[-2].target
        return (f_blocks, [assign])
示例#26
0
    def test_inline_update_target_def(self):
        def test_impl(a):
            if a == 1:
                b = 2
            else:
                b = 3
            return b

        func_ir = compiler.run_frontend(test_impl)
        blocks = list(func_ir.blocks.values())
        for block in blocks:
            for i, stmt in enumerate(block.body):
                # match b = 2 and replace with lambda: 2
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Var)
                        and guard(find_const, func_ir, stmt.value) == 2):
                    # replace expr with a dummy call
                    func_ir._definitions[stmt.target.name].remove(stmt.value)
                    stmt.value = ir.Expr.call(
                        ir.Var(block.scope, "myvar", loc=stmt.loc), (), (),
                        stmt.loc)
                    func_ir._definitions[stmt.target.name].append(stmt.value)
                    #func = g.py_func#
                    inline_closure_call(func_ir, {}, block, i, lambda: 2)
                    break

        self.assertEqual(len(func_ir._definitions['b']), 2)
示例#27
0
    def get_ctxmgr_obj(var_ref):
        """Return the context-manager object and extra info.

        The extra contains the arguments if the context-manager is used
        as a call.
        """
        # If the contextmanager used as a Call
        dfn = func_ir.get_definition(var_ref)
        if isinstance(dfn, ir.Expr) and dfn.op == 'call':
            args = [get_var_dfn(x) for x in dfn.args]
            kws = {k: get_var_dfn(v) for k, v in dfn.kws}
            extra = {'args': args, 'kwargs': kws}
            var_ref = dfn.func
        else:
            extra = None

        ctxobj = ir_utils.guard(ir_utils.find_global_value, func_ir, var_ref)

        # check the contextmanager object
        if ctxobj is ir.UNDEFINED:
            raise errors.CompilerError(
                "Undefined variable used as context manager",
                loc=blocks[blk_start].loc,
            )

        if ctxobj is None:
            raise errors.CompilerError(_illegal_cm_msg, loc=dfn.loc)

        return ctxobj, extra
示例#28
0
 def _inline_stencil(self, instr, call_name, func_def):
     from numba.stencil import StencilFunc
     lhs = instr.target
     expr = instr.value
     # We keep the escaping variables of the stencil kernel
     # alive by adding them to the actual kernel call as extra
     # keyword arguments, which is ignored anyway.
     if (isinstance(func_def, ir.Global) and func_def.name == 'stencil'
             and isinstance(func_def.value, StencilFunc)):
         if expr.kws:
             expr.kws += func_def.value.kws
         else:
             expr.kws = func_def.value.kws
         return True
     # Otherwise we proceed to check if it is a call to numba.stencil
     require(call_name == ('stencil', 'numba.stencil')
             or call_name == ('stencil', 'numba'))
     require(expr not in self._processed_stencils)
     self._processed_stencils.append(expr)
     if not len(expr.args) == 1:
         raise ValueError("As a minimum Stencil requires"
                          " a kernel as an argument")
     stencil_def = guard(get_definition, self.func_ir, expr.args[0])
     require(
         isinstance(stencil_def, ir.Expr)
         and stencil_def.op == "make_function")
     kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__,
                                stencil_def.code)
     options = dict(expr.kws)
     if 'neighborhood' in options:
         fixed = guard(self._fix_stencil_neighborhood, options)
         if not fixed:
             raise ValueError(
                 "stencil neighborhood option should be a tuple"
                 " with constant structure such as ((-w, w),)")
     if 'index_offsets' in options:
         fixed = guard(self._fix_stencil_index_offsets, options)
         if not fixed:
             raise ValueError(
                 "stencil index_offsets option should be a tuple"
                 " with constant structure such as (offset, )")
     sf = StencilFunc(kernel_ir, 'constant', options)
     sf.kws = expr.kws  # hack to keep variables live
     sf_global = ir.Global('stencil', sf, expr.loc)
     self.func_ir._definitions[lhs.name] = [sf_global]
     instr.value = sf_global
     return True
示例#29
0
    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        dtype = array_typ.dtype
        seq, op = find_build_sequence(func_ir, list_var)
        size = len(seq)
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)

        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ

        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))

        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {})
        typemap[empty_func.name] = fnty #

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True
示例#30
0
    def _analyze_assign(self, inst, array_dists, parfor_dists):
        lhs = inst.target.name
        rhs = inst.value
        # treat return casts like assignments
        if isinstance(rhs, ir.Expr) and rhs.op == 'cast':
            rhs = rhs.value

        if isinstance(rhs, ir.Var) and (is_array(self.typemap, lhs) or
                                        is_array_container(self.typemap, lhs)):
            self._meet_array_dists(lhs, rhs.name, array_dists)
            return
        elif (is_array(self.typemap, lhs) and isinstance(rhs, ir.Expr)
              and rhs.op == 'inplace_binop'):
            # distributions of all 3 variables should meet (lhs, arg1, arg2)
            arg1 = rhs.lhs.name
            arg2 = rhs.rhs.name
            dist = self._meet_array_dists(arg1, arg2, array_dists)
            dist = self._meet_array_dists(arg1, lhs, array_dists, dist)
            self._meet_array_dists(arg1, arg2, array_dists, dist)
            return
        elif isinstance(rhs,
                        ir.Expr) and rhs.op in ['getitem', 'static_getitem']:
            self._analyze_getitem(inst, lhs, rhs, array_dists)
            return
        elif isinstance(rhs, ir.Expr) and rhs.op == 'build_tuple':
            # parallel arrays can be packed and unpacked from tuples
            # e.g. boolean array index in test_getitem_multidim
            return
        elif (isinstance(rhs, ir.Expr) and rhs.op == 'getattr'
              and rhs.attr == 'T' and is_array(self.typemap, lhs)):
            # array and its transpose have same distributions
            arr = rhs.value.name
            self._meet_array_dists(lhs, arr, array_dists)
            # keep lhs in table for dot() handling
            self._T_arrs.add(lhs)
            return
        elif (isinstance(rhs, ir.Expr) and rhs.op == 'getattr'
              and rhs.attr in [
                  'shape', 'ndim', 'size', 'strides', 'dtype', 'itemsize',
                  'astype', 'reshape', 'ctypes', 'transpose', 'tofile', 'copy'
              ]):
            pass  # X.shape doesn't affect X distribution
        elif isinstance(rhs, ir.Expr) and rhs.op == 'call':
            self._analyze_call(lhs, rhs, rhs.func.name, rhs.args, array_dists)
        # handle for A in arr_container: ...
        # A = pair_first(iternext(getiter(arr_container)))
        # TODO: support getitem of container
        elif isinstance(rhs, ir.Expr) and rhs.op == 'pair_first' and is_array(
                self.typemap, lhs):
            arr_container = guard(_get_pair_first_container, self.func_ir, rhs)
            if arr_container is not None:
                self._meet_array_dists(lhs, arr_container.name, array_dists)
                return
        elif isinstance(rhs, ir.Expr) and rhs.op in ('getiter', 'iternext'):
            # analyze array container access in pair_first
            return
        else:
            self._set_REP(inst.list_vars(), array_dists)
        return
示例#31
0
 def _handle_ros(self, lhs, rhs):
     if guard(find_callname, self.func_ir,
              rhs) == ('read_ros_images', 'hpat.ros'):
         if len(rhs.args) != 1:  # pragma: no cover
             raise ValueError("Invalid read_ros_images() arguments")
         import hpat.ros
         return hpat.ros._handle_read_images(lhs, rhs)
     return None
示例#32
0
    def run(self):
        """ Finds all calls to StencilFuncs in the IR and converts them to parfor.
        """
        from numba.stencil import StencilFunc

        # Get all the calls in the function IR.
        call_table, _ = get_call_table(self.func_ir.blocks)
        stencil_calls = []
        stencil_dict = {}
        for call_varname, call_list in call_table.items():
            if isinstance(call_list[0], StencilFunc):
                # Remember all calls to StencilFuncs.
                stencil_calls.append(call_varname)
                stencil_dict[call_varname] = call_list[0]
        if not stencil_calls:
            return  # return early if no stencil calls found

        # find and transform stencil calls
        for label, block in self.func_ir.blocks.items():
            for i, stmt in reversed(list(enumerate(block.body))):
                # Found a call to a StencilFunc.
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'
                        and stmt.value.func.name in stencil_calls):
                    kws = dict(stmt.value.kws)
                    # Create dictionary of input argument number to
                    # the argument itself.
                    input_dict = {i: stmt.value.args[i] for i in
                                                    range(len(stmt.value.args))}
                    in_args = stmt.value.args
                    arg_typemap = tuple(self.typemap[i.name] for i in in_args)
                    for arg_type in arg_typemap:
                        if isinstance(arg_type, types.BaseTuple):
                            raise ValueError("Tuple parameters not supported " \
                                "for stencil kernels in parallel=True mode.")

                    out_arr = kws.get('out')

                    # Get the StencilFunc object corresponding to this call.
                    sf = stencil_dict[stmt.value.func.name]
                    stencil_ir, rt, arg_to_arr_dict = get_stencil_ir(sf,
                            self.typingctx, arg_typemap,
                            block.scope, block.loc, input_dict,
                            self.typemap, self.calltypes)
                    index_offsets = sf.options.get('index_offsets', None)
                    gen_nodes = self._mk_stencil_parfor(label, in_args, out_arr,
                            stencil_ir, index_offsets, stmt.target, rt, sf,
                            arg_to_arr_dict)
                    block.body = block.body[:i] + gen_nodes + block.body[i+1:]
                # Found a call to a stencil via numba.stencil().
                elif (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'
                        and guard(find_callname, self.func_ir, stmt.value)
                                    == ('stencil', 'numba')):
                    # remove dummy stencil() call
                    stmt.value = ir.Const(0, stmt.loc)
示例#33
0
 def _inline_stencil(self, instr, call_name, func_def):
     from numba.stencil import StencilFunc
     lhs = instr.target
     expr = instr.value
     # We keep the escaping variables of the stencil kernel
     # alive by adding them to the actual kernel call as extra
     # keyword arguments, which is ignored anyway.
     if (isinstance(func_def, ir.Global) and
         func_def.name == 'stencil' and
         isinstance(func_def.value, StencilFunc)):
         if expr.kws:
             expr.kws += func_def.value.kws
         else:
             expr.kws = func_def.value.kws
         return True
     # Otherwise we proceed to check if it is a call to numba.stencil
     require(call_name == ('stencil', 'numba.stencil') or
             call_name == ('stencil', 'numba'))
     require(expr not in self._processed_stencils)
     self._processed_stencils.append(expr)
     if not len(expr.args) == 1:
         raise ValueError("As a minimum Stencil requires"
             " a kernel as an argument")
     stencil_def = guard(get_definition, self.func_ir, expr.args[0])
     require(isinstance(stencil_def, ir.Expr) and
             stencil_def.op == "make_function")
     kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__,
             stencil_def.code)
     options = dict(expr.kws)
     if 'neighborhood' in options:
         fixed = guard(self._fix_stencil_neighborhood, options)
         if not fixed:
            raise ValueError("stencil neighborhood option should be a tuple"
                     " with constant structure such as ((-w, w),)")
     if 'index_offsets' in options:
         fixed = guard(self._fix_stencil_index_offsets, options)
         if not fixed:
            raise ValueError("stencil index_offsets option should be a tuple"
                     " with constant structure such as (offset, )")
     sf = StencilFunc(kernel_ir, 'constant', options)
     sf.kws = expr.kws # hack to keep variables live
     sf_global = ir.Global('stencil', sf, expr.loc)
     self.func_ir._definitions[lhs.name] = [sf_global]
     instr.value = sf_global
     return True
示例#34
0
 def stage_inline_test_pass(self):
     # assuming the function has one block with one call inside
     assert len(self.func_ir.blocks) == 1
     block = list(self.func_ir.blocks.values())[0]
     for i, stmt in enumerate(block.body):
         if guard(find_callname,self.func_ir, stmt.value) is not None:
             inline_closure_call(self.func_ir, {}, block, i, lambda: None,
                 self.typingctx, (), self.typemap, self.calltypes)
             break
示例#35
0
    def _analyze_parfor(self, parfor, array_dists, parfor_dists):
        if parfor.id not in parfor_dists:
            parfor_dists[parfor.id] = Distribution.OneD

        # analyze init block first to see array definitions
        self._analyze_block(parfor.init_block, array_dists, parfor_dists)
        out_dist = Distribution.OneD
        if self.in_parallel_parfor != -1:
            out_dist = Distribution.REP

        parfor_arrs = set()  # arrays this parfor accesses in parallel
        array_accesses = ir_utils.get_array_accesses(parfor.loop_body)
        par_index_var = parfor.loop_nests[0].index_variable.name
        #stencil_accesses, _ = get_stencil_accesses(parfor, self.typemap)
        for (arr, index) in array_accesses:
            if index == par_index_var:  #or index in stencil_accesses:
                parfor_arrs.add(arr)
                self._parallel_accesses.add((arr, index))

            # multi-dim case
            tup_list = guard(find_build_tuple, self.func_ir, index)
            if tup_list is not None:
                index_tuple = [var.name for var in tup_list]
                if index_tuple[0] == par_index_var:
                    parfor_arrs.add(arr)
                    self._parallel_accesses.add((arr, index))
                if par_index_var in index_tuple[1:]:
                    out_dist = Distribution.REP
            # TODO: check for index dependency

        for arr in parfor_arrs:
            if arr in array_dists:
                out_dist = Distribution(
                    min(out_dist.value, array_dists[arr].value))
        parfor_dists[parfor.id] = out_dist
        for arr in parfor_arrs:
            if arr in array_dists:
                array_dists[arr] = out_dist

        # TODO: find prange actually coming from user
        # for pattern in parfor.patterns:
        #     if pattern[0] == 'prange' and not self.in_parallel_parfor:
        #         parfor_dists[parfor.id] = Distribution.OneD

        # run analysis recursively on parfor body
        if self.second_pass and out_dist in [
                Distribution.OneD, Distribution.OneD_Var
        ]:
            self.in_parallel_parfor = parfor.id
        blocks = wrap_parfor_blocks(parfor)
        for b in blocks.values():
            self._analyze_block(b, array_dists, parfor_dists)
        unwrap_parfor_blocks(parfor)
        if self.in_parallel_parfor == parfor.id:
            self.in_parallel_parfor = -1
        return
示例#36
0
    def apply(self):
        new_block = self.block.copy()
        new_block.clear()
        vars_to_remove = []

        for inst in self.block.body:
            if inst in self.consts:
                consts = self.consts[inst]

                for key, value in consts.items():
                    if key not in dict(inst.value.kws):
                        continue

                    # collecting data from current variable
                    current_var = [
                        var for name, var in inst.value.kws if name == key
                    ][0]
                    loc = current_var.loc

                    seq, _ = guard(find_build_sequence, self.func_ir,
                                   current_var)
                    if not seq:
                        continue
                    if isinstance(value, list):
                        items = seq
                    elif isinstance(value, dict):
                        items = sum(map(list, seq), [])
                    else:
                        continue

                    # create tuple variable
                    stmt = make_assign(ir.Expr.build_tuple(items=items,
                                                           loc=loc),
                                       new_block.scope,
                                       self.func_ir,
                                       loc,
                                       name=f"{key}_tuple")
                    new_block.append(stmt)

                    # replace variable in call
                    inst.value.kws = [(kw[0],
                                       stmt.target) if kw[0] == key else kw
                                      for kw in inst.value.kws]

                    # save old variable for removing
                    vars_to_remove.append(current_var)

            new_block.append(inst)

        # remove old variables
        for var in vars_to_remove:
            # unsused variables are removed after new block is created b/c
            # remove_unused_recursively should see all del statements of variables
            remove_unused_recursively(var, new_block, self.func_ir)

        return new_block
示例#37
0
 def stage_inline_test_pass(self):
     # assuming the function has one block with one call inside
     assert len(self.func_ir.blocks) == 1
     block = list(self.func_ir.blocks.values())[0]
     for i, stmt in enumerate(block.body):
         if guard(find_callname, self.func_ir, stmt.value) is not None:
             inline_closure_call(self.func_ir, {}, block, i, lambda: None,
                                 self.typingctx, (), self.typemap,
                                 self.calltypes)
             break
示例#38
0
def _get_const_index_expr(stencil_ir, func_ir, index_var):
    """
    infer index_var as constant if it is of a expression form like c-1 where c
    is a constant in the outer function.
    index_var is assumed to be inside stencil kernel
    """
    const_val = guard(
        _get_const_index_expr_inner, stencil_ir, func_ir, index_var)
    if const_val is not None:
        return const_val
    return index_var
示例#39
0
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i, instr in enumerate(block.body):
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        call_name = guard(find_callname, self.func_ir, expr)
                        func_def = guard(get_definition, self.func_ir, expr.func)

                        if guard(self._inline_reduction,
                                 work_list, block, i, expr, call_name):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_closure,
                                work_list, block, i, func_def):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_stencil,
                                instr, call_name, func_def):
                            modified = True

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir.blocks)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                         self.parallel_options.comprehension):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names,
                                                                self.func_ir)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#40
0
def _get_const_index_expr_inner(stencil_ir, func_ir, index_var):
    """inner constant inference function that calls constant, unary and binary
    cases.
    """
    require(isinstance(index_var, ir.Var))
    # case where the index is a const itself in outer function
    var_const =  guard(_get_const_two_irs, stencil_ir, func_ir, index_var)
    if var_const is not None:
        return var_const
    # get index definition
    index_def = ir_utils.get_definition(stencil_ir, index_var)
    # match inner_var = unary(index_var)
    var_const = guard(
        _get_const_unary_expr, stencil_ir, func_ir, index_def)
    if var_const is not None:
        return var_const
    # match inner_var = arg1 + arg2
    var_const = guard(
        _get_const_binary_expr, stencil_ir, func_ir, index_def)
    if var_const is not None:
        return var_const
    raise GuardException
示例#41
0
def check_reduce_func(func_ir, func_var):
    reduce_func = guard(get_definition, func_ir, func_var)
    if reduce_func is None:
        raise ValueError("Reduce function cannot be found for njit \
                            analysis")
    if not (hasattr(reduce_func, 'code')
            or hasattr(reduce_func, '__code__')):
        raise ValueError("Invalid reduction function")
    f_code = (reduce_func.code if hasattr(reduce_func, 'code')
                                    else reduce_func.__code__)
    if not f_code.co_argcount == 2:
        raise TypeError("Reduction function should take 2 arguments")
    return
示例#42
0
 def find_array_def(arr):
     """Find numpy array definition such as
         arr = numba.unsafe.ndarray.empty_inferred(...).
     If it is arr = b[...], find array definition of b recursively.
     """
     arr_def = func_ir.get_definition(arr)
     _make_debug_print("find_array_def")(arr, arr_def)
     if isinstance(arr_def, ir.Expr):
         if guard(_find_unsafe_empty_inferred, func_ir, arr_def):
             return arr_def
         elif arr_def.op == 'getitem':
             return find_array_def(arr_def.value)
     raise GuardException
示例#43
0
def _inline_const_arraycall(block, func_ir, context, typemap, calltypes):
    """Look for array(list) call where list is a constant list created by build_list,
    and turn them into direct array creation and initialization, if the following
    conditions are met:
      1. The build_list call immediate preceeds the array call;
      2. The list variable is no longer live after array call;
    If any condition check fails, no modification will be made.
    """
    debug_print = _make_debug_print("inline_const_arraycall")
    scope = block.scope

    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        dtype = array_typ.dtype
        seq, op = find_build_sequence(func_ir, list_var)
        size = len(seq)
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)

        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ

        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))

        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {})
        typemap[empty_func.name] = fnty #

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True

    # list_vars keep track of the variable created from the latest
    # build_list instruction, as well as its synonyms.
    list_vars = []
    # dead_vars keep track of those in list_vars that are considered dead.
    dead_vars = []
    # list_items keep track of the elements used in build_list.
    list_items = []
    stmts = []
    # dels keep track of the deletion of list_items, which will need to be
    # moved after array initialization.
    dels = []
    modified = False
    for inst in block.body:
        if isinstance(inst, ir.Assign):
            if isinstance(inst.value, ir.Var):
                if inst.value.name in list_vars:
                    list_vars.append(inst.target.name)
                    stmts.append(inst)
                    continue
            elif isinstance(inst.value, ir.Expr):
                expr = inst.value
                if expr.op == 'build_list':
                    list_vars = [inst.target.name]
                    list_items = [x.name for x in expr.items]
                    stmts.append(inst)
                    continue
                elif expr.op == 'call' and expr in calltypes:
                    arr_var = inst.target
                    if guard(inline_array, inst.target, expr,
                                           stmts, list_vars, dels):
                        modified = True
                        continue
        elif isinstance(inst, ir.Del):
            removed_var = inst.value
            if removed_var in list_items:
                dels.append(inst)
                continue
            elif removed_var in list_vars:
                # one of the list_vars is considered dead.
                dead_vars.append(removed_var)
                list_vars.remove(removed_var)
                stmts.append(inst)
                if list_vars == []:
                    # if all list_vars are considered dead, we need to filter
                    # them out from existing stmts to completely remove
                    # build_list.
                    # Note that if a translation didn't take place, dead_vars
                    # will also be empty when we reach this point.
                    body = []
                    for inst in stmts:
                        if ((isinstance(inst, ir.Assign) and
                             inst.target.name in dead_vars) or
                             (isinstance(inst, ir.Del) and
                             inst.value in dead_vars)):
                            continue
                        body.append(inst)
                    stmts = body
                    dead_vars = []
                    modified = True
                    continue
        stmts.append(inst)

        # If the list is used in any capacity between build_list and array
        # call, then we must call off the translation for this list because
        # it could be mutated and list_items would no longer be applicable.
        list_var_used = any([ x.name in list_vars for x in inst.list_vars() ])
        if list_var_used:
            list_vars = []
            dead_vars = []
            list_items = []
            dels = []

    return stmts if modified else None
示例#44
0
def _inline_arraycall(func_ir, cfg, visited, loop, enable_prange=False):
    """Look for array(list) call in the exit block of a given loop, and turn list operations into
    array operations in the loop if the following conditions are met:
      1. The exit block contains an array call on the list;
      2. The list variable is no longer live after array call;
      3. The list is created in the loop entry block;
      4. The loop is created from an range iterator whose length is known prior to the loop;
      5. There is only one list_append operation on the list variable in the loop body;
      6. The block that contains list_append dominates the loop head, which ensures list
         length is the same as loop length;
    If any condition check fails, no modification will be made to the incoming IR.
    """
    debug_print = _make_debug_print("inline_arraycall")
    # There should only be one loop exit
    require(len(loop.exits) == 1)
    exit_block = next(iter(loop.exits))
    list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block])

    # check if dtype is present in array call
    dtype_def = None
    dtype_mod_def = None
    if 'dtype' in array_kws:
        require(isinstance(array_kws['dtype'], ir.Var))
        # We require that dtype argument to be a constant of getattr Expr, and we'll
        # remember its definition for later use.
        dtype_def = get_definition(func_ir, array_kws['dtype'])
        require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr')
        dtype_mod_def = get_definition(func_ir, dtype_def.value)

    list_var_def = get_definition(func_ir, list_var)
    debug_print("list_var = ", list_var, " def = ", list_var_def)
    if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast':
        list_var_def = get_definition(func_ir, list_var_def.value)
    # Check if the definition is a build_list
    require(isinstance(list_var_def, ir.Expr) and list_var_def.op ==  'build_list')

    # Look for list_append in "last" block in loop body, which should be a block that is
    # a post-dominator of the loop header.
    list_append_stmts = []
    for label in loop.body:
        # We have to consider blocks of this loop, but not sub-loops.
        # To achieve this, we require the set of "in_loops" of "label" to be visited loops.
        in_visited_loops = [l.header in visited for l in cfg.in_loops(label)]
        if not all(in_visited_loops):
            continue
        block = func_ir.blocks[label]
        debug_print("check loop body block ", label)
        for stmt in block.find_insts(ir.Assign):
            lhs = stmt.target
            expr = stmt.value
            if isinstance(expr, ir.Expr) and expr.op == 'call':
                func_def = get_definition(func_ir, expr.func)
                if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \
                  and func_def.attr == 'append':
                    list_def = get_definition(func_ir, func_def.value)
                    debug_print("list_def = ", list_def, list_def == list_var_def)
                    if list_def == list_var_def:
                        # found matching append call
                        list_append_stmts.append((label, block, stmt))

    # Require only one list_append, otherwise we won't know the indices
    require(len(list_append_stmts) == 1)
    append_block_label, append_block, append_stmt = list_append_stmts[0]

    # Check if append_block (besides loop entry) dominates loop header.
    # Since CFG doesn't give us this info without loop entry, we approximate
    # by checking if the predecessor set of the header block is the same
    # as loop_entries plus append_block, which is certainly more restrictive
    # than necessary, and can be relaxed if needed.
    preds = set(l for l, b in cfg.predecessors(loop.header))
    debug_print("preds = ", preds, (loop.entries | set([append_block_label])))
    require(preds == (loop.entries | set([append_block_label])))

    # Find iterator in loop header
    iter_vars = []
    iter_first_vars = []
    loop_header = func_ir.blocks[loop.header]
    for stmt in loop_header.find_insts(ir.Assign):
        expr = stmt.value
        if isinstance(expr, ir.Expr):
            if expr.op == 'iternext':
                iter_def = get_definition(func_ir, expr.value)
                debug_print("iter_def = ", iter_def)
                iter_vars.append(expr.value)
            elif expr.op == 'pair_first':
                iter_first_vars.append(stmt.target)

    # Require only one iterator in loop header
    require(len(iter_vars) == 1 and len(iter_first_vars) == 1)
    iter_var = iter_vars[0] # variable that holds the iterator object
    iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator

    # Final requirement: only one loop entry, and we're going to modify it by:
    # 1. replacing the list definition with an array definition;
    # 2. adding a counter for the array iteration.
    require(len(loop.entries) == 1)
    loop_entry = func_ir.blocks[next(iter(loop.entries))]
    terminator = loop_entry.terminator
    scope = loop_entry.scope
    loc = loop_entry.loc
    stmts = []
    removed = []
    def is_removed(val, removed):
        if isinstance(val, ir.Var):
            for x in removed:
                if x.name == val.name:
                    return True
        return False
    # Skip list construction and skip terminator, add the rest to stmts
    for i in range(len(loop_entry.body) - 1):
        stmt = loop_entry.body[i]
        if isinstance(stmt, ir.Assign) and (stmt.value == list_def or is_removed(stmt.value, removed)):
            removed.append(stmt.target)
        else:
            stmts.append(stmt)
    debug_print("removed variables: ", removed)

    # Define an index_var to index the array.
    # If the range happens to be single step ranges like range(n), or range(m, n),
    # then the index_var correlates to iterator index; otherwise we'll have to
    # define a new counter.
    range_def = guard(_find_iter_range, func_ir, iter_var)
    index_var = ir.Var(scope, mk_unique_var("index"), loc)
    if range_def and range_def[0] == 0:
        # iterator starts with 0, index_var can just be iter_first_var
        index_var = iter_first_var
    else:
        # index_var = -1 # starting the index with -1 since it will incremented in loop header
        stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc))

    # Insert statement to get the size of the loop iterator
    size_var = ir.Var(scope, mk_unique_var("size"), loc)
    if range_def:
        start, stop, range_func_def = range_def
        if start == 0:
            size_val = stop
        else:
            size_val = ir.Expr.binop(fn='-', lhs=stop, rhs=start, loc=loc)
        # we can parallelize this loop if enable_prange = True, by changing
        # range function from range, to prange.
        if enable_prange and isinstance(range_func_def, ir.Global):
            range_func_def.name = 'internal_prange'
            range_func_def.value = internal_prange

    else:
        len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc)
        stmts.append(_new_definition(func_ir, len_func_var,
                     ir.Global('range_iter_len', range_iter_len, loc=loc), loc))
        size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc)

    stmts.append(_new_definition(func_ir, size_var, size_val, loc))

    size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
    stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

    # Insert array allocation
    array_var = ir.Var(scope, mk_unique_var("array"), loc)
    empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
    if dtype_def and dtype_mod_def:
        # when dtype is present, we'll call emtpy with dtype
        dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc)
        dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc)
        stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc))
        stmts.append(_new_definition(func_ir, dtype_var,
                         ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc))
        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))
        array_kws = [('dtype', dtype_var)]
    else:
        # otherwise we'll call unsafe_empty_inferred
        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('unsafe_empty_inferred',
                             unsafe_empty_inferred, loc=loc), loc))
        array_kws = []
    # array_var = empty_func(size_tuple_var)
    stmts.append(_new_definition(func_ir, array_var,
                 ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc))

    # Add back removed just in case they are used by something else
    for var in removed:
        stmts.append(_new_definition(func_ir, var, array_var, loc))

    # Add back terminator
    stmts.append(terminator)
    # Modify loop_entry
    loop_entry.body = stmts

    if range_def:
        if range_def[0] != 0:
            # when range doesn't start from 0, index_var becomes loop index
            # (iter_first_var) minus an offset (range_def[0])
            terminator = loop_header.terminator
            assert(isinstance(terminator, ir.Branch))
            # find the block in the loop body that header jumps to
            block_id = terminator.truebr
            blk = func_ir.blocks[block_id]
            loc = blk.loc
            blk.body.insert(0, _new_definition(func_ir, index_var,
                ir.Expr.binop(fn='-', lhs=iter_first_var,
                                      rhs=range_def[0], loc=loc),
                loc))
    else:
        # Insert index_var increment to the end of loop header
        loc = loop_header.loc
        terminator = loop_header.terminator
        stmts = loop_header.body[0:-1]
        next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc)
        one = ir.Var(scope, mk_unique_var("one"), loc)
        # one = 1
        stmts.append(_new_definition(func_ir, one,
                     ir.Const(value=1,loc=loc), loc))
        # next_index_var = index_var + 1
        stmts.append(_new_definition(func_ir, next_index_var,
                     ir.Expr.binop(fn='+', lhs=index_var, rhs=one, loc=loc), loc))
        # index_var = next_index_var
        stmts.append(_new_definition(func_ir, index_var, next_index_var, loc))
        stmts.append(terminator)
        loop_header.body = stmts

    # In append_block, change list_append into array assign
    for i in range(len(append_block.body)):
        if append_block.body[i] == append_stmt:
            debug_print("Replace append with SetItem")
            append_block.body[i] = ir.SetItem(target=array_var, index=index_var,
                                              value=append_stmt.value.args[0], loc=append_stmt.loc)

    # replace array call, by changing "a = array(b)" to "a = b"
    stmt = func_ir.blocks[exit_block].body[array_call_index]
    # stmt can be either array call or SetItem, we only replace array call
    if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr):
        stmt.value = array_var
        func_ir._definitions[stmt.target.name] = [stmt.value]

    return True
示例#45
0
    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        # Get the type of the array to be created.
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        # Get the element type of the array to be created.
        dtype = array_typ.dtype
        # Get the sequence of operations to provide values to the new array.
        seq, _ = find_build_sequence(func_ir, list_var)
        size = len(seq)
        # Create a tuple to pass to empty below to specify the new array size.
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)
        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ
        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))
        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        # The general approach is to create an empty array and then fill
        # the elements in one-by-one from their specificiation.

        # Get the numpy type to pass to empty.
        nptype = types.DType(dtype)

        # Create a variable to hold the numpy empty function.
        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {'dtype':nptype})

        typemap[empty_func.name] = fnty

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        # We pass two arguments to empty, first the size tuple and second
        # the dtype of the new array.  Here, we created typ_var which is
        # the dtype argument of the new array.  typ_var in turn is created
        # by getattr of the dtype string on the numpy module.

        # Create var for numpy module.
        g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
        typemap[g_np_var.name] = types.misc.Module(np)
        g_np = ir.Global('np', np, loc)
        stmts.append(_new_definition(func_ir, g_np_var, g_np, loc))

        # Create var for result of numpy.<dtype>.
        typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc)
        typemap[typ_var.name] = nptype
        dtype_str = str(dtype)
        if dtype_str == 'bool':
            dtype_str = 'bool_'
        # Get dtype attribute of numpy module.
        np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc)
        stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc))

        # Create the call to numpy.empty passing the size tuple and dtype var.
        empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        # Fill in the new empty array one-by-one.
        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True
示例#46
0
def _fix_nested_array(func_ir):
    """Look for assignment like: a[..] = b, where both a and b are numpy arrays, and
    try to eliminate array b by expanding a with an extra dimension.
    """
    blocks = func_ir.blocks
    cfg = compute_cfg_from_blocks(blocks)
    usedefs = compute_use_defs(blocks)
    empty_deadmap = dict([(label, set()) for label in blocks.keys()])
    livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap)

    def find_array_def(arr):
        """Find numpy array definition such as
            arr = numba.unsafe.ndarray.empty_inferred(...).
        If it is arr = b[...], find array definition of b recursively.
        """
        arr_def = func_ir.get_definition(arr)
        _make_debug_print("find_array_def")(arr, arr_def)
        if isinstance(arr_def, ir.Expr):
            if guard(_find_unsafe_empty_inferred, func_ir, arr_def):
                return arr_def
            elif arr_def.op == 'getitem':
                return find_array_def(arr_def.value)
        raise GuardException

    def fix_dependencies(expr, varlist):
        """Double check if all variables in varlist are defined before
        expr is used. Try to move constant definition when the check fails.
        Bails out by raising GuardException if it can't be moved.
        """
        debug_print = _make_debug_print("fix_dependencies")
        for label, block in blocks.items():
            scope = block.scope
            body = block.body
            defined = set()
            for i in range(len(body)):
                inst = body[i]
                if isinstance(inst, ir.Assign):
                    defined.add(inst.target.name)
                    if inst.value == expr:
                        new_varlist = []
                        for var in varlist:
                            # var must be defined before this inst, or live
                            # and not later defined.
                            if (var.name in defined or
                                (var.name in livemap[label] and
                                 not (var.name in usedefs.defmap[label]))):
                                debug_print(var.name, " already defined")
                                new_varlist.append(var)
                            else:
                                debug_print(var.name, " not yet defined")
                                var_def = get_definition(func_ir, var.name)
                                if isinstance(var_def, ir.Const):
                                    loc = var.loc
                                    new_var = ir.Var(scope, mk_unique_var("new_var"), loc)
                                    new_const = ir.Const(var_def.value, loc)
                                    new_vardef = _new_definition(func_ir,
                                                    new_var, new_const, loc)
                                    new_body = []
                                    new_body.extend(body[:i])
                                    new_body.append(new_vardef)
                                    new_body.extend(body[i:])
                                    block.body = new_body
                                    new_varlist.append(new_var)
                                else:
                                    raise GuardException
                        return new_varlist
        # when expr is not found in block
        raise GuardException

    def fix_array_assign(stmt):
        """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the
        following:
        1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred
        2. find the source array creation for lhs, insert an extra dimension of size of b.
        3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx]
        """
        require(isinstance(stmt, ir.SetItem))
        require(isinstance(stmt.value, ir.Var))
        debug_print = _make_debug_print("fix_array_assign")
        debug_print("found SetItem: ", stmt)
        lhs = stmt.target
        # Find the source array creation of lhs
        lhs_def = find_array_def(lhs)
        debug_print("found lhs_def: ", lhs_def)
        rhs_def = get_definition(func_ir, stmt.value)
        debug_print("found rhs_def: ", rhs_def)
        require(isinstance(rhs_def, ir.Expr))
        if rhs_def.op == 'cast':
            rhs_def = get_definition(func_ir, rhs_def.value)
            require(isinstance(rhs_def, ir.Expr))
        require(_find_unsafe_empty_inferred(func_ir, rhs_def))
        # Find the array dimension of rhs
        dim_def = get_definition(func_ir, rhs_def.args[0])
        require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple')
        debug_print("dim_def = ", dim_def)
        extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ]
        debug_print("extra_dims = ", extra_dims)
        # Expand size tuple when creating lhs_def with extra_dims
        size_tuple_def = get_definition(func_ir, lhs_def.args[0])
        require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple')
        debug_print("size_tuple_def = ", size_tuple_def)
        extra_dims = fix_dependencies(size_tuple_def, extra_dims)
        size_tuple_def.items += extra_dims
        # In-place modify rhs_def to be getitem
        rhs_def.op = 'getitem'
        rhs_def.value = get_definition(func_ir, lhs, lhs_only=True)
        rhs_def.index = stmt.index
        del rhs_def._kws['func']
        del rhs_def._kws['args']
        del rhs_def._kws['vararg']
        del rhs_def._kws['kws']
        # success
        return True

    for label in find_topo_order(func_ir.blocks):
        block = func_ir.blocks[label]
        for stmt in block.body:
            if guard(fix_array_assign, stmt):
                block.body.remove(stmt)