示例#1
0
    def test1(self):
        typingctx = typing.Context()
        targetctx = cpu.CPUContext(typingctx)
        test_ir = compiler.run_frontend(test_will_propagate)
        #print("Num blocks = ", len(test_ir.blocks))
        #print(test_ir.dump())
        with cpu_target.nested_context(typingctx, targetctx):
            typingctx.refresh()
            targetctx.refresh()
            args = (types.int64, types.int64, types.int64)
            typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None)
            #print("typemap = ", typemap)
            #print("return_type = ", return_type)
            type_annotation = type_annotations.TypeAnnotation(
                func_ir=test_ir,
                typemap=typemap,
                calltypes=calltypes,
                lifted=(),
                lifted_from=None,
                args=args,
                return_type=return_type,
                html_output=config.HTML)
            remove_dels(test_ir.blocks)
            in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
            apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes)

            remove_dead(test_ir.blocks, test_ir.arg_names, test_ir)
            self.assertFalse(findLhsAssign(test_ir, "x"))
示例#2
0
    def test1(self):
        typingctx = typing.Context()
        targetctx = cpu.CPUContext(typingctx)
        test_ir = compiler.run_frontend(test_will_propagate)
        #print("Num blocks = ", len(test_ir.blocks))
        #print(test_ir.dump())
        with cpu_target.nested_context(typingctx, targetctx):
            typingctx.refresh()
            targetctx.refresh()
            args = (types.int64, types.int64, types.int64)
            typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None)
            #print("typemap = ", typemap)
            #print("return_type = ", return_type)
            type_annotation = type_annotations.TypeAnnotation(
                func_ir=test_ir,
                typemap=typemap,
                calltypes=calltypes,
                lifted=(),
                lifted_from=None,
                args=args,
                return_type=return_type,
                html_output=config.HTML)
            remove_dels(test_ir.blocks)
            in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
            apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes)

            remove_dead(test_ir.blocks, test_ir.arg_names, test_ir)
            self.assertFalse(findLhsAssign(test_ir, "x"))
示例#3
0
 def compare_ir(self, ir_list):
     outputs = []
     for func_ir in ir_list:
         remove_dead(func_ir.blocks, func_ir.arg_names, func_ir)
         output = utils.StringIO()
         func_ir.dump(file=output)
         outputs.append(output.getvalue())
     self.assertTrue(len(set(outputs)) == 1)  # assert all outputs are equal
 def compare_ir(self, ir_list):
     outputs = []
     for func_ir in ir_list:
         remove_dead(func_ir.blocks, func_ir.arg_names, func_ir)
         output = utils.StringIO()
         func_ir.dump(file=output)
         outputs.append(output.getvalue())
     self.assertTrue(len(set(outputs)) == 1)  # assert all outputs are equal
示例#5
0
 def run_pass(self, state):
     parfor_pass = numba.parfor.ParforPass(
         state.func_ir, state.type_annotation.typemap,
         state.type_annotation.calltypes, state.return_type,
         state.typingctx, state.flags.auto_parallel, state.flags,
         state.parfor_diagnostics)
     remove_dels(state.func_ir.blocks)
     parfor_pass.array_analysis.run(state.func_ir.blocks)
     parfor_pass._convert_loop(state.func_ir.blocks)
     remove_dead(state.func_ir.blocks, state.func_ir.arg_names,
                 state.func_ir, state.type_annotation.typemap)
     numba.parfor.get_parfor_params(state.func_ir.blocks,
                                    parfor_pass.options.fusion,
                                    parfor_pass.nested_fusion_info)
     return True
示例#6
0
 def run(self):
     """Run inline closure call pass.
     """
     modified = False
     work_list = list(self.func_ir.blocks.items())
     _debug_print("START InlineClosureCall")
     while work_list:
         label, block = work_list.pop()
         for i in range(len(block.body)):
             instr = block.body[i]
             if isinstance(instr, ir.Assign):
                 lhs  = instr.target
                 expr = instr.value
                 if isinstance(expr, ir.Expr) and expr.op == 'call':
                     try:
                         func_def = self.func_ir.get_definition(expr.func)
                     except KeyError:
                         func_def = None
                     _debug_print("found call to ", expr.func, " def = ", func_def)
                     if isinstance(func_def, ir.Expr) and func_def.op == "make_function":
                         new_blocks = self.inline_closure_call(block, i, func_def)
                         for block in new_blocks:
                             work_list.append(block)
                         modified = True
                         # current block is modified, skip the rest
                         break
     if modified:
         remove_dels(self.func_ir.blocks)
         # repeat dead code elimintation until nothing can be further removed
         while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)):
             pass
         self.func_ir.blocks = rename_labels(self.func_ir.blocks)
示例#7
0
    def test2(self):
        def call_np_random_seed():
            np.random.seed(2)

        def seed_call_exists(func_ir):
            for inst in func_ir.blocks[0].body:
                if (isinstance(inst, ir.Assign) and
                    isinstance(inst.value, ir.Expr) and
                    inst.value.op == 'call' and
                    func_ir.get_definition(inst.value.func).attr == 'seed'):
                    return True
            return False

        test_ir = compiler.run_frontend(call_np_random_seed)
        remove_dead(test_ir.blocks, test_ir.arg_names, test_ir)
        self.assertTrue(seed_call_exists(test_ir))
示例#8
0
 def run(self):
     dprint_func_ir(self.func_ir, "starting IO")
     topo_order = find_topo_order(self.func_ir.blocks)
     for label in topo_order:
         new_body = []
         # copies are collected before running the pass since
         # variables typed in locals are assigned late
         self._get_reverse_copies(self.func_ir.blocks[label].body)
         for inst in self.func_ir.blocks[label].body:
             if isinstance(inst, ir.Assign):
                 inst_list = self._run_assign(inst)
                 new_body.extend(inst_list)
             elif isinstance(inst, ir.StaticSetItem):
                 inst_list = self._run_static_setitem(inst)
                 new_body.extend(inst_list)
             else:
                 new_body.append(inst)
         self.func_ir.blocks[label].body = new_body
     # iterative remove dead to make sure all extra code (e.g. df vars) is removed
     while remove_dead(self.func_ir.blocks, self.func_ir.arg_names,
                       self.func_ir):
         pass
     self.func_ir._definitions = get_definitions(self.func_ir.blocks)
     dprint_func_ir(self.func_ir, "after IO")
     if debug_prints():
         print("h5 files: ", self.h5_files)
         print("h5 dsets: ", self.h5_dsets)
示例#9
0
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i, instr in enumerate(block.body):
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        call_name = guard(find_callname, self.func_ir, expr)
                        func_def = guard(get_definition, self.func_ir, expr.func)

                        if guard(self._inline_reduction,
                                 work_list, block, i, expr, call_name):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_closure,
                                work_list, block, i, func_def):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_stencil,
                                instr, call_name, func_def):
                            modified = True

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir.blocks)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                         self.parallel_options.comprehension):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names,
                                                                self.func_ir)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#10
0
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i, instr in enumerate(block.body):
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        call_name = guard(find_callname, self.func_ir, expr)
                        func_def = guard(get_definition, self.func_ir, expr.func)

                        if guard(self._inline_reduction,
                                 work_list, block, i, expr, call_name):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_closure,
                                work_list, block, i, func_def):
                            modified = True
                            break # because block structure changed

                        if guard(self._inline_stencil,
                                instr, call_name, func_def):
                            modified = True

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir.blocks)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                         self.swapped, self.parallel_options.comprehension):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names,
                                                                self.func_ir)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#11
0
    def test2(self):
        def call_np_random_seed():
            np.random.seed(2)

        def seed_call_exists(func_ir):
            for inst in func_ir.blocks[0].body:
                if (isinstance(inst, ir.Assign) and
                    isinstance(inst.value, ir.Expr) and
                    inst.value.op == 'call' and
                    func_ir.get_definition(inst.value.func).attr == 'seed'):
                    return True
            return False

        test_ir = compiler.run_frontend(call_np_random_seed)
        remove_dead(test_ir.blocks, test_ir.arg_names, test_ir)
        self.assertTrue(seed_call_exists(test_ir))
示例#12
0
    def run(self):
        dprint_func_ir(self.func_ir, "starting hiframes")
        topo_order = find_topo_order(self.func_ir.blocks)
        for label in topo_order:
            new_body = []
            for inst in self.func_ir.blocks[label].body:
                # df['col'] = arr
                if isinstance(
                        inst,
                        ir.StaticSetItem) and inst.target.name in self.df_vars:
                    df_name = inst.target.name
                    self.df_vars[df_name][inst.index] = inst.value
                    self._update_df_cols()
                elif isinstance(inst, ir.Assign):
                    out_nodes = self._run_assign(inst)
                    if isinstance(out_nodes, list):
                        new_body.extend(out_nodes)
                    if isinstance(out_nodes, dict):
                        label = include_new_blocks(self.func_ir.blocks,
                                                   out_nodes, label, new_body)
                        new_body = []
                else:
                    new_body.append(inst)
            self.func_ir.blocks[label].body = new_body

        self.func_ir._definitions = _get_definitions(self.func_ir.blocks)
        #remove_dead(self.func_ir.blocks, self.func_ir.arg_names)
        if config._has_h5py:
            io_pass = pio.PIO(self.func_ir, self.locals)
            io_pass.run()
        remove_dead(self.func_ir.blocks, self.func_ir.arg_names)
        DummyFlags = namedtuple('DummyFlags', 'auto_parallel')
        inline_pass = InlineClosureCallPass(self.func_ir, DummyFlags(True))
        inline_pass.run()
        self.typemap, self.return_type, self.calltypes = numba_compiler.type_inference_stage(
            self.typingctx, self.func_ir, self.args, None)
        self.fix_series_filter(self.func_ir.blocks)
        self.func_ir._definitions = _get_definitions(self.func_ir.blocks)
        dprint_func_ir(self.func_ir, "after hiframes")
        if numba.config.DEBUG_ARRAY_OPT == 1:
            print("df_vars: ", self.df_vars)
        return
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i in range(len(block.body)):
                instr = block.body[i]
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        func_def = guard(get_definition, self.func_ir, expr.func)
                        debug_print("found call to ", expr.func, " def = ", func_def)
                        if isinstance(func_def, ir.Expr) and func_def.op == "make_function":
                            new_blocks = self.inline_closure_call(block, i, func_def)
                            for block in new_blocks:
                                work_list.append(block)
                            modified = True
                            # current block is modified, skip the rest
                            break

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                        self.flags.auto_parallel):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#14
0
    def run(self):
        dprint_func_ir(self.func_ir, "starting hiframes")
        topo_order = find_topo_order(self.func_ir.blocks)
        for label in topo_order:
            new_body = []
            for inst in self.func_ir.blocks[label].body:
                # df['col'] = arr
                if isinstance(inst, ir.StaticSetItem) and inst.target.name in self.df_vars:
                    df_name = inst.target.name
                    self.df_vars[df_name][inst.index] = inst.value
                    self._update_df_cols()
                elif isinstance(inst, ir.Assign):
                    out_nodes = self._run_assign(inst)
                    if isinstance(out_nodes, list):
                        new_body.extend(out_nodes)
                    if isinstance(out_nodes, dict):
                        inner_blocks = add_offset_to_labels(out_nodes, ir_utils._max_label+1)
                        self.func_ir.blocks.update(inner_blocks)
                        ir_utils._max_label = max(self.func_ir.blocks.keys())
                        scope = self.func_ir.blocks[label].scope
                        loc = self.func_ir.blocks[label].loc
                        inner_topo_order = find_topo_order(inner_blocks)
                        inner_first_label = inner_topo_order[0]
                        inner_last_label = inner_topo_order[-1]
                        remove_none_return_from_block(inner_blocks[inner_last_label])
                        new_body.append(ir.Jump(inner_first_label, loc))
                        self.func_ir.blocks[label].body = new_body
                        label = ir_utils.next_label()
                        self.func_ir.blocks[label] = ir.Block(scope, loc)
                        inner_blocks[inner_last_label].body.append(ir.Jump(label, loc))
                        new_body = []
                else:
                    new_body.append(inst)
            self.func_ir.blocks[label].body = new_body

        remove_dead(self.func_ir.blocks, self.func_ir.arg_names)
        dprint_func_ir(self.func_ir, "after hiframes")
        if config.DEBUG_ARRAY_OPT==1:
            print("df_vars: ", self.df_vars)
        return
示例#15
0
文件: pio.py 项目: Quantonomist/hpat
 def run(self):
     dprint_func_ir(self.func_ir, "starting IO")
     topo_order = find_topo_order(self.func_ir.blocks)
     for label in topo_order:
         new_body = []
         # copies are collected before running the pass since
         # variables typed in locals are assigned late
         self._get_reverse_copies(self.func_ir.blocks[label].body)
         for inst in self.func_ir.blocks[label].body:
             if isinstance(inst, ir.Assign):
                 inst_list = self._run_assign(inst)
                 new_body.extend(inst_list)
             elif isinstance(inst, ir.StaticSetItem):
                 inst_list = self._run_static_setitem(inst)
                 new_body.extend(inst_list)
             else:
                 new_body.append(inst)
         self.func_ir.blocks[label].body = new_body
     remove_dead(self.func_ir.blocks, self.func_ir.arg_names)
     dprint_func_ir(self.func_ir, "after IO")
     if config.DEBUG_ARRAY_OPT == 1:
         print("h5 files: ", self.h5_files)
         print("h5 dsets: ", self.h5_dsets)
示例#16
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_blocks,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []

        if config.DEBUG_ARRAY_OPT == 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                  return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table,
                                      self.typemap, self.calltypes)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names,
                             self.typemap)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
            stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func,
            arg_to_arr_dict)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr.name)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                                  end_lengths[i], gen_nodes,
                                                  scope, loc)
            start_ind = self._get_stencil_start_ind(start_lengths[i],
                                                    gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(
                numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1))

        # replace return value to setitem to output array
        return_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        assert isinstance(return_node, ir.Return)

        last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        while not isinstance(last_node, ir.Assign) or not isinstance(
                last_node.value, ir.Expr) or not last_node.value.op == 'cast':
            last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        assert isinstance(last_node, ir.Assign)
        assert isinstance(last_node.value, ir.Expr)
        assert last_node.value.op == 'cast'
        return_val = last_node.value.value

        # create parfor index var
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope,
                                    mk_unique_var("$parfor_index_tuple_var"),
                                    loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            stencil_blocks[max(
                stencil_blocks.keys())].body.append(tuple_assign)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr == None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(
                types.intp, in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.types.npytypes.Array(
                return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var,
                                                 return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(
                return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var,
                                          loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full", np.full, out_arr,
                                         [shape_var, zero_var, dtype_attr_var],
                                         self.typingctx, self.typemap,
                                         self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, return_val, loc)
        self.calltypes[setitem_call] = signature(
            types.none, self.typemap[out_arr.name],
            self.typemap[parfor_ind_var.name],
            self.typemap[out_arr.name].dtype)
        stencil_blocks[max(stencil_blocks.keys())].body.append(setitem_call)

        parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks,
                                     loc, parfor_ind_var, equiv_set)
        parfor.patterns = [('stencil', [start_lengths, end_lengths])]
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes
示例#17
0
    def run(self):
        """Run inline closure call pass.
        """
        modified = False
        work_list = list(self.func_ir.blocks.items())
        debug_print = _make_debug_print("InlineClosureCallPass")
        debug_print("START")
        while work_list:
            label, block = work_list.pop()
            for i in range(len(block.body)):
                instr = block.body[i]
                if isinstance(instr, ir.Assign):
                    lhs = instr.target
                    expr = instr.value
                    if isinstance(expr, ir.Expr) and expr.op == 'call':
                        # inline reduce() when parallel is off
                        if not self.flags.auto_parallel:
                            call_name = guard(find_callname, self.func_ir, expr)
                            if (call_name == ('reduce', 'builtin')
                                    or call_name == ('reduce', 'functools')):
                                if len(expr.args) != 3:
                                    raise TypeError("invalid reduce call, "
                                        "three arguments including initial "
                                        "value required")
                                check_reduce_func(self.func_ir, expr.args[0])
                                def reduce_func(f, A, v):
                                    s = v
                                    it = iter(A)
                                    for a in it:
                                        s = f(s, a)
                                    return s
                                new_blocks = inline_closure_call(self.func_ir,
                                        self.func_ir.func_id.func.__globals__,
                                        block, i, reduce_func)
                                for block in new_blocks:
                                    work_list.append(block)
                                modified = True
                                # current block is modified, skip the rest
                                break
                        func_def = guard(get_definition, self.func_ir, expr.func)
                        debug_print("found call to ", expr.func, " def = ", func_def)
                        if isinstance(func_def, ir.Expr) and func_def.op == "make_function":
                            new_blocks = inline_closure_call(self.func_ir,
                                        self.func_ir.func_id.func.__globals__,
                                        block, i, func_def)
                            for block in new_blocks:
                                work_list.append(block)
                            modified = True
                            # current block is modified, skip the rest
                            break

        if enable_inline_arraycall:
            # Identify loop structure
            if modified:
                # Need to do some cleanups if closure inlining kicked in
                merge_adjacent_blocks(self.func_ir)
            cfg = compute_cfg_from_blocks(self.func_ir.blocks)
            debug_print("start inline arraycall")
            _debug_dump(cfg)
            loops = cfg.loops()
            sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
            visited = []
            # We go over all loops, bigger loops first (outer first)
            for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True):
                visited.append(k)
                if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k],
                        self.flags.auto_parallel):
                    modified = True
            if modified:
                _fix_nested_array(self.func_ir)

        if modified:
            remove_dels(self.func_ir.blocks)
            # repeat dead code elimintation until nothing can be further
            # removed
            while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)):
                pass
            self.func_ir.blocks = rename_labels(self.func_ir.blocks)
        debug_print("END")
示例#18
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []
        stencil_blocks = stencil_ir.blocks

        if config.DEBUG_ARRAY_OPT == 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                   return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(
            stencil_blocks,
            in_cps,
            name_var_table,
            self.typemap,
            self.calltypes)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir,
                             self.typemap)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
             stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func,
             arg_to_arr_dict)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replace stencil accesses")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                        end_lengths[i], gen_nodes, scope, loc)
            start_ind = self._get_stencil_start_ind(
                                        start_lengths[i], gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(numba.parfor.LoopNest(parfor_vars[i],
                                start_ind, last_ind, 1))

        # We have to guarantee that the exit block has maximum label and that
        # there's only one exit block for the parfor body.
        # So, all return statements will change to jump to the parfor exit block.
        parfor_body_exit_label = max(stencil_blocks.keys()) + 1
        stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
        exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc)
        self.typemap[exit_value_var.name] = return_type.dtype

        # create parfor index var
        for_replacing_ret = []
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_tuple_var"), loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            for_replacing_ret.append(tuple_assign)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after creating parfor index var")
            ir_utils.dump_blocks(stencil_blocks)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr == None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(types.intp,
                                                               in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError("cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.types.npytypes.Array(
                                                           return_type.dtype,
                                                           in_arr_typ.ndim,
                                                           in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full",
                                       np.full,
                                       out_arr,
                                       [shape_var, zero_var, dtype_attr_var],
                                       self.typingctx,
                                       self.typemap,
                                       self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)

        self.replace_return_with_setitem(stencil_blocks, exit_value_var,
                                         parfor_body_exit_label)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replacing return")
            ir_utils.dump_blocks(stencil_blocks)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
        self.calltypes[setitem_call] = signature(
                                        types.none, self.typemap[out_arr.name],
                                        self.typemap[parfor_ind_var.name],
                                        self.typemap[out_arr.name].dtype
                                        )
        stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
        stencil_blocks[parfor_body_exit_label].body.append(setitem_call)

        # simplify CFG of parfor body (exit block could be simplified often)
        # add dummy return to enable CFG
        stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0,
                                            ir.Loc("stencilparfor_dummy", -1)))
        stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
        stencil_blocks[max(stencil_blocks.keys())].body.pop()

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after adding SetItem")
            ir_utils.dump_blocks(stencil_blocks)

        pattern = ('stencil', [start_lengths, end_lengths])
        parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks,
                                     loc, parfor_ind_var, equiv_set, pattern, self.flags)
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes