Exemplo n.º 1
0
def include_new_blocks(blocks,
                       new_blocks,
                       label,
                       new_body,
                       remove_non_return=True,
                       work_list=None,
                       func_ir=None):
    inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1)
    blocks.update(inner_blocks)
    ir_utils._max_label = max(blocks.keys())
    scope = blocks[label].scope
    loc = blocks[label].loc
    inner_topo_order = find_topo_order(inner_blocks)
    inner_first_label = inner_topo_order[0]
    inner_last_label = inner_topo_order[-1]
    if remove_non_return:
        remove_return_from_block(inner_blocks[inner_last_label])
    new_body.append(ir.Jump(inner_first_label, loc))
    blocks[label].body = new_body
    label = ir_utils.next_label()
    blocks[label] = ir.Block(scope, loc)
    if remove_non_return:
        inner_blocks[inner_last_label].body.append(ir.Jump(label, loc))
    # new_body.clear()
    if work_list is not None:
        topo_order = find_topo_order(inner_blocks)
        for _label in topo_order:
            block = inner_blocks[_label]
            block.scope = scope
            numba.inline_closurecall._add_definitions(func_ir, block)
            work_list.append((_label, block))
    return label
Exemplo n.º 2
0
def _loop_lift_modify_call_block(liftedloop, block, inputs, outputs, returnto):
    """
    Transform calling block from top-level function to call the lifted loop.
    """
    scope = block.scope
    loc = block.loc
    blk = ir.Block(scope=scope, loc=loc)
    # load loop
    fn = ir.Const(value=liftedloop, loc=loc)
    fnvar = scope.make_temp(loc=loc)
    blk.append(ir.Assign(target=fnvar, value=fn, loc=loc))
    # call loop
    args = [scope.get_exact(name) for name in inputs]
    callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc)
    # temp variable for the return value
    callres = scope.make_temp(loc=loc)
    blk.append(ir.Assign(target=callres, value=callexpr, loc=loc))
    # unpack return value
    for i, out in enumerate(outputs):
        target = scope.get_exact(out)
        getitem = ir.Expr.static_getitem(value=callres,
                                         index=i,
                                         index_var=None,
                                         loc=loc)
        blk.append(ir.Assign(target=target, value=getitem, loc=loc))
    # jump to next block
    blk.append(ir.Jump(target=returnto, loc=loc))
    return blk
Exemplo n.º 3
0
def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
    """make a block that initializes loop range and iteration variables.
    target label in jump needs to be set.
    """
    # g_range_var = Global(range)
    g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc)
    typemap[g_range_var.name] = get_global_func_typ(range)
    g_range = ir.Global('range', range, loc)
    g_range_assign = ir.Assign(g_range, g_range_var, loc)
    arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc)
    # range_call_var = call g_range_var(start, stop, step)
    range_call = ir.Expr.call(g_range_var, args, (), loc)
    calltypes[range_call] = typemap[g_range_var.name].get_call_type(
        typing.Context(), [types.intp] * len(args), {})
    #signature(types.range_state64_type, types.intp)
    range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc)
    typemap[range_call_var.name] = types.iterators.RangeType(types.intp)
    range_call_assign = ir.Assign(range_call, range_call_var, loc)
    # iter_var = getiter(range_call_var)
    iter_call = ir.Expr.getiter(range_call_var, loc)
    calltypes[iter_call] = signature(types.range_iter64_type,
                                     types.range_state64_type)
    iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
    typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
    iter_call_assign = ir.Assign(iter_call, iter_var, loc)
    # $phi = iter_var
    phi_var = ir.Var(scope, mk_unique_var("$phi"), loc)
    typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp)
    phi_assign = ir.Assign(iter_var, phi_var, loc)
    # jump to header
    jump_header = ir.Jump(-1, loc)
    range_block = ir.Block(scope, loc)
    range_block.body = arg_nodes + [g_range_assign, range_call_assign,
                                    iter_call_assign, phi_assign, jump_header]
    return range_block
    def replace_return_with_setitem(self, blocks, exit_value_var,
                                    parfor_body_exit_label):
        """
        Find return statements in the IR and replace them with a SetItem
        call of the value "returned" by the kernel into the result array.
        Returns the block labels that contained return statements.
        """
        for label, block in blocks.items():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if isinstance(stmt, ir.Return):
                    # previous stmt should have been a cast
                    prev_stmt = new_body.pop()
                    assert (isinstance(prev_stmt, ir.Assign)
                            and isinstance(prev_stmt.value, ir.Expr)
                            and prev_stmt.value.op == 'cast')

                    new_body.append(
                        ir.Assign(prev_stmt.value.value, exit_value_var, loc))
                    new_body.append(ir.Jump(parfor_body_exit_label, loc))
                else:
                    new_body.append(stmt)
            block.body = new_body
Exemplo n.º 5
0
def include_new_blocks(blocks, new_blocks, label, new_body):
    inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1)
    blocks.update(inner_blocks)
    ir_utils._max_label = max(blocks.keys())
    scope = blocks[label].scope
    loc = blocks[label].loc
    inner_topo_order = find_topo_order(inner_blocks)
    inner_first_label = inner_topo_order[0]
    inner_last_label = inner_topo_order[-1]
    remove_none_return_from_block(inner_blocks[inner_last_label])
    new_body.append(ir.Jump(inner_first_label, loc))
    blocks[label].body = new_body
    label = ir_utils.next_label()
    blocks[label] = ir.Block(scope, loc)
    inner_blocks[inner_last_label].body.append(ir.Jump(label, loc))
    #new_body.clear()
    return label
Exemplo n.º 6
0
def _replace_returns(blocks, target, return_label):
    """
    Return return statement by assigning directly to target, and a jump.
    """
    for label, block in blocks.items():
        for i in range(len(block.body)):
            stmt = block.body[i]
            if isinstance(stmt, ir.Return):
                assert(i + 1 == len(block.body))
                block.body[i] = ir.Assign(stmt.value, target, stmt.loc)
                block.body.append(ir.Jump(return_label, stmt.loc))
Exemplo n.º 7
0
 def _start_new_block(self, inst):
     self.loc = ir.Loc(filename=self.bytecode.filename, line=inst.lineno)
     oldblock = self.current_block
     self.insert_block(inst.offset)
     # Ensure the last block is terminated
     if oldblock is not None and not oldblock.is_terminated:
         jmp = ir.Jump(inst.offset, loc=self.loc)
         oldblock.append(jmp)
     # Get DFA block info
     self.dfainfo = self.dfa.infos[self.current_block_offset]
     self.assigner = Assigner()
Exemplo n.º 8
0
    def run(self):
        dprint_func_ir(self.func_ir, "starting hiframes")
        topo_order = find_topo_order(self.func_ir.blocks)
        for label in topo_order:
            new_body = []
            for inst in self.func_ir.blocks[label].body:
                # df['col'] = arr
                if isinstance(inst, ir.StaticSetItem) and inst.target.name in self.df_vars:
                    df_name = inst.target.name
                    self.df_vars[df_name][inst.index] = inst.value
                    self._update_df_cols()
                elif isinstance(inst, ir.Assign):
                    out_nodes = self._run_assign(inst)
                    if isinstance(out_nodes, list):
                        new_body.extend(out_nodes)
                    if isinstance(out_nodes, dict):
                        inner_blocks = add_offset_to_labels(out_nodes, ir_utils._max_label+1)
                        self.func_ir.blocks.update(inner_blocks)
                        ir_utils._max_label = max(self.func_ir.blocks.keys())
                        scope = self.func_ir.blocks[label].scope
                        loc = self.func_ir.blocks[label].loc
                        inner_topo_order = find_topo_order(inner_blocks)
                        inner_first_label = inner_topo_order[0]
                        inner_last_label = inner_topo_order[-1]
                        remove_none_return_from_block(inner_blocks[inner_last_label])
                        new_body.append(ir.Jump(inner_first_label, loc))
                        self.func_ir.blocks[label].body = new_body
                        label = ir_utils.next_label()
                        self.func_ir.blocks[label] = ir.Block(scope, loc)
                        inner_blocks[inner_last_label].body.append(ir.Jump(label, loc))
                        new_body = []
                else:
                    new_body.append(inst)
            self.func_ir.blocks[label].body = new_body

        remove_dead(self.func_ir.blocks, self.func_ir.arg_names)
        dprint_func_ir(self.func_ir, "after hiframes")
        if config.DEBUG_ARRAY_OPT==1:
            print("df_vars: ", self.df_vars)
        return
Exemplo n.º 9
0
    def replace_target(term, src, dst):
        def replace(target):
            return (dst if target == src else target)

        if isinstance(term, ir.Branch):
            return ir.Branch(cond=term.cond,
                             truebr=replace(term.truebr),
                             falsebr=replace(term.falsebr),
                             loc=term.loc)
        elif isinstance(term, ir.Jump):
            return ir.Jump(target=replace(term.target), loc=term.loc)
        else:
            assert not term.get_targets()
            return term
Exemplo n.º 10
0
 def _start_new_block(self, inst):
     self.loc = ir.Loc(filename=self.bytecode.filename, line=inst.lineno)
     oldblock = self.current_block
     self.insert_block(inst.offset)
     # Ensure the last block is terminated
     if oldblock is not None and not oldblock.is_terminated:
         jmp = ir.Jump(inst.offset, loc=self.loc)
         oldblock.append(jmp)
         # Get DFA block info
     self.dfainfo = self.dfa.infos[self.current_block_offset]
     # Insert PHI
     self._insert_phi()
     # Notify listeners for the new block
     for fn in utils.dict_itervalues(self._block_actions):
         fn(self.current_block_offset, self.current_block)
Exemplo n.º 11
0
def _bypass_with_context(blocks, blk_start, blk_end, forwardvars):
    """Given the starting and ending block of the with-context,
    replaces the head block with a new block that jumps to the end.

    *blocks* is modified inplace.
    """
    sblk = blocks[blk_start]
    scope = sblk.scope
    loc = sblk.loc
    newblk = ir.Block(scope=scope, loc=loc)
    for k, v in forwardvars.items():
        newblk.append(ir.Assign(value=scope.get_exact(k),
                                target=scope.get_exact(v),
                                loc=loc))
    newblk.append(ir.Jump(target=blk_end, loc=loc))
    blocks[blk_start] = newblk
Exemplo n.º 12
0
def inline_calls_inner(func_ir, block, stmt, i, py_func):
    call_expr = stmt.value
    scope = block.scope
    callee_ir = numba.compiler.run_frontend(py_func)

    # relabel callee_ir by adding an offset
    max_label = max(func_ir.blocks.keys())
    callee_blocks = add_offset_to_labels(callee_ir.blocks, max_label + 1)
    callee_ir.blocks = callee_blocks
    min_label = min(callee_blocks.keys())
    max_label = max(callee_blocks.keys())

    #  init _max_label global in ir_utils before using next_label()
    ir_utils._max_label = max_label

    # rename all variables in callee blocks
    var_table = get_name_var_table(callee_ir.blocks)
    new_var_dict = {}
    for name, var in var_table.items():
        new_var = scope.define(mk_unique_var(var.name), loc=var.loc)
        new_var_dict[name] = new_var
    replace_vars(callee_ir.blocks, new_var_dict)

    # replace callee arguments
    args = list(call_expr.args)
    # TODO: replace defaults (add to args)
    _replace_args(callee_ir.blocks, args)

    # split caller blocks into two
    new_block = ir.Block(scope, block.loc)
    new_block.body = block.body[i + 1:]
    new_label = ir_utils.next_label()
    func_ir.blocks[new_label] = new_block
    block.body = block.body[:i]
    block.body.append(ir.Jump(min_label, stmt.loc))

    # replace Return with assignment to LHS
    _replace_returns(callee_ir.blocks, stmt.target, new_label)

    # insert all new blocks
    for label, bl in callee_ir.blocks.items():
        func_ir.blocks[label] = bl

    # run inline_calls recursively to transform other calls
    inline_calls(func_ir)
    return
Exemplo n.º 13
0
def inline_new_blocks(func_ir, block, i, callee_blocks, work_list=None):
    # adopted from inline_closure_call
    scope = block.scope
    instr = block.body[i]

    # 1. relabel callee_ir by adding an offset
    callee_blocks = add_offset_to_labels(callee_blocks,
                                         ir_utils._max_label + 1)
    callee_blocks = ir_utils.simplify_CFG(callee_blocks)
    max_label = max(callee_blocks.keys())
    #    reset globals in ir_utils before we use it
    ir_utils._max_label = max_label
    topo_order = find_topo_order(callee_blocks)

    # 5. split caller blocks into two
    new_blocks = []
    new_block = ir.Block(scope, block.loc)
    new_block.body = block.body[i + 1:]
    new_label = ir_utils.next_label()
    func_ir.blocks[new_label] = new_block
    new_blocks.append((new_label, new_block))
    block.body = block.body[:i]
    min_label = topo_order[0]
    block.body.append(ir.Jump(min_label, instr.loc))

    # 6. replace Return with assignment to LHS
    numba.inline_closurecall._replace_returns(callee_blocks, instr.target,
                                              new_label)
    #    remove the old definition of instr.target too
    if (instr.target.name in func_ir._definitions):
        func_ir._definitions[instr.target.name] = []

    # 7. insert all new blocks, and add back definitions
    for label in topo_order:
        # block scope must point to parent's
        block = callee_blocks[label]
        block.scope = scope
        numba.inline_closurecall._add_definitions(func_ir, block)
        func_ir.blocks[label] = block
        new_blocks.append((label, block))

    if work_list is not None:
        for block in new_blocks:
            work_list.append(block)
    return callee_blocks
Exemplo n.º 14
0
 def replace_return_with_setitem(self, blocks, exit_value_var,
                                 parfor_body_exit_label):
     """
     Find return statements in the IR and replace them with a SetItem
     call of the value "returned" by the kernel into the result array.
     Returns the block labels that contained return statements.
     """
     for label, block in blocks.items():
         scope = block.scope
         loc = block.loc
         new_body = []
         for stmt in block.body:
             if isinstance(stmt, ir.Return):
                 new_body.append(ir.Assign(stmt.value, exit_value_var, loc))
                 new_body.append(ir.Jump(parfor_body_exit_label, loc))
             else:
                 new_body.append(stmt)
         block.body = new_body
Exemplo n.º 15
0
def _replace_returns(blocks, target, return_label):
    """
    Return return statement by assigning directly to target, and a jump.
    """
    for label, block in blocks.items():
        casts = []
        for i in range(len(block.body)):
            stmt = block.body[i]
            if isinstance(stmt, ir.Return):
                assert(i + 1 == len(block.body))
                block.body[i] = ir.Assign(stmt.value, target, stmt.loc)
                block.body.append(ir.Jump(return_label, stmt.loc))
                # remove cast of the returned value
                for cast in casts:
                    if cast.target.name == stmt.value.name:
                        cast.value = cast.value.value
            elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast':
                casts.append(stmt)
Exemplo n.º 16
0
    def make_prologue():
        """
        Make a new block that unwraps the argument and jump to the loop entry.
        This block is the entry block of the function.
        """
        entry_block = blocks[loopinfo.callfrom]
        scope = entry_block.scope
        loc = entry_block.loc

        block = ir.Block(scope=scope, loc=loc)
        # load args
        args = [ir.Arg(name=k, index=i, loc=loc)
                for i, k in enumerate(loopinfo.inputs)]
        for aname, aval in zip(loopinfo.inputs, args):
            tmp = ir.Var(scope=scope, name=aname, loc=loc)
            block.append(ir.Assign(target=tmp, value=aval, loc=loc))
        # jump to loop entry
        block.append(ir.Jump(target=loopinfo.callfrom, loc=loc))
        return block
Exemplo n.º 17
0
 def rewrite_single_backedge(loop):
     """
     Add new tail block that gathers all the backedges
     """
     header = loop.header
     tailkey = new_block_id()
     for blkkey in loop.body:
         blk = newblocks[blkkey]
         if header in blk.terminator.get_targets():
             newblk = blk.copy()
             # rewrite backedge into jumps to new tail block
             newblk.body[-1] = replace_target(blk.terminator, header,
                                              tailkey)
             newblocks[blkkey] = newblk
     # create new tail block
     entryblk = newblocks[header]
     tailblk = ir.Block(scope=entryblk.scope, loc=entryblk.loc)
     # add backedge
     tailblk.append(ir.Jump(target=header, loc=tailblk.loc))
     newblocks[tailkey] = tailblk
Exemplo n.º 18
0
    def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes,
                         *args):
        # Overall approach:
        # 1) Construct a string containing a function definition for the stencil function
        #    that will execute the stencil kernel.  This function definition includes a
        #    unique stencil function name, the parameters to the stencil kernel, loop
        #    nests across the dimenions of the input array.  Those loop nests use the
        #    computed stencil kernel size so as not to try to compute elements where
        #    elements outside the bounds of the input array would be needed.
        # 2) The but of the loop nest in this new function is a special sentinel
        #    assignment.
        # 3) Get the IR of this new function.
        # 4) Split the block containing the sentinel assignment and remove the sentinel
        #    assignment.  Insert the stencil kernel IR into the stencil function IR
        #    after label and variable renaming of the stencil kernel IR to prevent
        #    conflicts with the stencil function IR.
        # 5) Compile the combined stencil function IR + stencil kernel IR into existence.

        # Copy the kernel so that our changes for this callsite
        # won't effect other callsites.
        (kernel_copy,
         copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir,
                                                       calltypes)
        # The stencil kernel body becomes the body of a loop, for which args aren't needed.
        ir_utils.remove_args(kernel_copy.blocks)
        first_arg = kernel_copy.arg_names[0]

        in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
        name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
        ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps,
                                      name_var_table, typemap, copy_calltypes)

        if "out" in name_var_table:
            raise ValueError(
                "Cannot use the reserved word 'out' in stencil kernels.")

        sentinel_name = ir_utils.get_unused_var_name("__sentinel__",
                                                     name_var_table)
        if config.DEBUG_ARRAY_OPT == 1:
            print("name_var_table", name_var_table, sentinel_name)

        the_array = args[0]

        if config.DEBUG_ARRAY_OPT == 1:
            print("_stencil_wrapper", return_type, return_type.dtype,
                  type(return_type.dtype), args)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # We generate a Numba function to execute this stencil and here
        # create the unique name of this function.
        stencil_func_name = "__numba_stencil_%s_%s" % (hex(
            id(the_array)).replace("-", "_"), self.id)

        # We will put a loop nest in the generated function for each
        # dimension in the input array.  Here we create the name for
        # the index variable for each dimension.  index0, index1, ...
        index_vars = []
        for i in range(the_array.ndim):
            index_var_name = ir_utils.get_unused_var_name(
                "index" + str(i), name_var_table)
            index_vars += [index_var_name]

        # Create extra signature for out and neighborhood.
        out_name = ir_utils.get_unused_var_name("out", name_var_table)
        neighborhood_name = ir_utils.get_unused_var_name(
            "neighborhood", name_var_table)
        sig_extra = ""
        if result is not None:
            sig_extra += ", {}=None".format(out_name)
        if "neighborhood" in dict(self.kws):
            sig_extra += ", {}=None".format(neighborhood_name)

        # Get a list of the standard indexed array names.
        standard_indexed = self.options.get("standard_indexing", [])

        if first_arg in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must "
                             "use relative indexing, not standard indexing.")

        if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
            raise ValueError("Standard indexing requested for an array name "
                             "not present in the stencil kernel definition.")

        # Add index variables to getitems in the IR to transition the accesses
        # in the kernel from relative to regular Python indexing.  Returns the
        # computed size of the stencil kernel and a list of the relatively indexed
        # arrays.
        kernel_size, relatively_indexed = self.add_indices_to_kernel(
            kernel_copy, index_vars, the_array.ndim, self.neighborhood,
            standard_indexed)
        if self.neighborhood is None:
            self.neighborhood = kernel_size

        if config.DEBUG_ARRAY_OPT == 1:
            print("After add_indices_to_kernel")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # The return in the stencil kernel becomes a setitem for that
        # particular point in the iteration space.
        ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
                                                      index_vars, out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("After replace_return_with_setitem", ret_blocks)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Start to form the new function to execute the stencil kernel.
        func_text = "def {}({}{}):\n".format(stencil_func_name,
                                             ",".join(kernel_copy.arg_names),
                                             sig_extra)

        # Get loop ranges for each dimension, which could be either int
        # or variable. In the latter case we'll use the extra neighborhood
        # argument to the function.
        ranges = []
        for i in range(the_array.ndim):
            if isinstance(kernel_size[i][0], int):
                lo = kernel_size[i][0]
                hi = kernel_size[i][1]
            else:
                lo = "{}[{}][0]".format(neighborhood_name, i)
                hi = "{}[{}][1]".format(neighborhood_name, i)
            ranges.append((lo, hi))

        # If there are more than one relatively indexed arrays, add a call to
        # a function that will raise an error if any of the relatively indexed
        # arrays are of different size than the first input array.
        if len(relatively_indexed) > 1:
            func_text += "    raise_if_incompatible_array_sizes(" + first_arg
            for other_array in relatively_indexed:
                if other_array != first_arg:
                    func_text += "," + other_array
            func_text += ")\n"

        # Get the shape of the first input array.
        shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
        func_text += "    {} = {}.shape\n".format(shape_name, first_arg)

        # If we have to allocate the output array (the out argument was not used)
        # then us numpy.full if the user specified a cval stencil decorator option
        # or np.zeros if they didn't to allocate the array.
        if result is None:
            return_type_name = numpy_support.as_dtype(
                return_type.dtype).type.__name__
            if "cval" in self.options:
                cval = self.options["cval"]
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")
                out_init = "{} = np.full({}, {}, dtype=np.{})\n".format(
                    out_name, shape_name, cval, return_type_name)
            else:
                out_init = "{} = np.zeros({}, dtype=np.{})\n".format(
                    out_name, shape_name, return_type_name)
            func_text += "    " + out_init

        offset = 1
        # Add the loop nests to the new function.
        for i in range(the_array.ndim):
            for j in range(offset):
                func_text += "    "
            # ranges[i][0] is the minimum index used in the i'th dimension
            # but minimum's greater than 0 don't preclude any entry in the array.
            # So, take the minimum of 0 and the minimum index found in the kernel
            # and this will be a negative number (potentially -0).  Then, we do
            # unary - on that to get the positive offset in this dimension whose
            # use is precluded.
            # ranges[i][1] is the maximum of 0 and the observed maximum index
            # in this dimension because negative maximums would not cause us to
            # preclude any entry in the array from being used.
            func_text += ("for {} in range(-min(0,{}),"
                          "{}[{}]-max(0,{})):\n").format(
                              index_vars[i], ranges[i][0], shape_name, i,
                              ranges[i][1])
            offset += 1

        for j in range(offset):
            func_text += "    "
        # Put a sentinel in the code so we can locate it in the IR.  We will
        # remove this sentinel assignment and replace it with the IR for the
        # stencil kernel body.
        func_text += "{} = 0\n".format(sentinel_name)
        func_text += "    return {}\n".format(out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new stencil func text")
            print(func_text)

        # Force the new stencil function into existence.
        exec_(func_text) in globals(), locals()
        stencil_func = eval(stencil_func_name)
        if sigret is not None:
            pysig = utils.pysignature(stencil_func)
            sigret.pysig = pysig
        # Get the IR for the newly created stencil function.
        stencil_ir = compiler.run_frontend(stencil_func)
        ir_utils.remove_dels(stencil_ir.blocks)

        # rename all variables in stencil_ir afresh
        var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
        new_var_dict = {}
        reserved_names = (
            [sentinel_name, out_name, neighborhood_name, shape_name] +
            kernel_copy.arg_names + index_vars)
        for name, var in var_table.items():
            if not name in reserved_names:
                new_var_dict[name] = ir_utils.mk_unique_var(name)
        ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)

        stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1

        # Shift lables in the kernel copy so they are guaranteed unique
        # and don't conflict with any labels in the stencil_ir.
        kernel_copy.blocks = ir_utils.add_offset_to_labels(
            kernel_copy.blocks, stencil_stub_last_label)
        new_label = max(kernel_copy.blocks.keys()) + 1
        # Adjust ret_blocks to account for addition of the offset.
        ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]

        if config.DEBUG_ARRAY_OPT == 1:
            print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
            print("before replace sentinel stencil_ir")
            ir_utils.dump_blocks(stencil_ir.blocks)
            print("before replace sentinel kernel_copy")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Search all the block in the stencil outline for the sentinel.
        for label, block in stencil_ir.blocks.items():
            for i, inst in enumerate(block.body):
                if (isinstance(inst, ir.Assign)
                        and inst.target.name == sentinel_name):
                    # We found the sentinel assignment.
                    loc = inst.loc
                    scope = block.scope
                    # split block across __sentinel__
                    # A new block is allocated for the statements prior to the
                    # sentinel but the new block maintains the current block
                    # label.
                    prev_block = ir.Block(scope, loc)
                    prev_block.body = block.body[:i]
                    # The current block is used for statements after sentinel.
                    block.body = block.body[i + 1:]
                    # But the current block gets a new label.
                    body_first_label = min(kernel_copy.blocks.keys())

                    # The previous block jumps to the minimum labelled block of
                    # the parfor body.
                    prev_block.append(ir.Jump(body_first_label, loc))
                    # Add all the parfor loop body blocks to the gufunc
                    # function's IR.
                    for (l, b) in kernel_copy.blocks.items():
                        stencil_ir.blocks[l] = b

                    stencil_ir.blocks[new_label] = block
                    stencil_ir.blocks[label] = prev_block
                    # Add a jump from all the blocks that previously contained
                    # a return in the stencil kernel to the block
                    # containing statements after the sentinel.
                    for ret_block in ret_blocks:
                        stencil_ir.blocks[ret_block].append(
                            ir.Jump(new_label, loc))
                    break
            else:
                continue
            break

        stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
        ir_utils.remove_dels(stencil_ir.blocks)

        assert (isinstance(the_array, types.Type))
        array_types = args

        new_stencil_param_types = list(array_types)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new_stencil_param_types", new_stencil_param_types)
            ir_utils.dump_blocks(stencil_ir.blocks)

        # Compile the combined stencil function with the replaced loop
        # body in it.
        new_func = compiler.compile_ir(self._typingctx, self._targetctx,
                                       stencil_ir, new_stencil_param_types,
                                       None, compiler.DEFAULT_FLAGS, {})
        return new_func
Exemplo n.º 19
0
 def test_jump(self):
     a = ir.Jump(1, self.loc1)
     b = ir.Jump(1, self.loc1)
     c = ir.Jump(1, self.loc2)
     d = ir.Jump(2, self.loc1)
     self.check(a, same=[b, c], different=[d])
Exemplo n.º 20
0
 def inline_closure_call(self, block, i, callee):
     """Inline the body of `callee` at its callsite (`i`-th instruction of `block`)
     """
     scope = block.scope
     instr = block.body[i]
     call_expr = instr.value
     _debug_print("Found closure call: ", instr, " with callee = ", callee)
     func_ir = self.func_ir
     # first, get the IR of the callee
     from_ir = self.get_ir_of_code(callee.code)
     from_blocks = from_ir.blocks
     # 1. relabel from_ir by adding an offset
     max_label = max(func_ir.blocks.keys())
     from_blocks = add_offset_to_labels(from_blocks, max_label + 1)
     from_ir.blocks = from_blocks
     min_label = min(from_blocks.keys())
     max_label = max(from_blocks.keys())
     #    reset globals in ir_utils before we use it
     ir_utils._max_label = max_label 
     ir_utils.visit_vars_extensions = {}
     # 2. rename all local variables in from_ir with new locals created in func_ir
     from_scopes = _get_all_scopes(from_blocks)
     _debug_print("obj_IR has scopes: ", from_scopes)
     #    one function should only have one local scope
     assert(len(from_scopes) == 1)
     from_scope = from_scopes[0]
     var_dict = {}
     for var in from_scope.localvars._con.values():
         if not (var.name in callee.code.co_freevars):
             var_dict[var.name] = scope.make_temp(var.loc)
     _debug_print("Before local var rename: var_dict = ", var_dict)
     _debug_dump(from_ir)
     replace_vars(from_blocks, var_dict)
     _debug_print("After local var rename: ")
     _debug_dump(from_ir)
     # 3. replace formal parameters with actual arguments
     args = list(call_expr.args)
     if callee.defaults:
         _debug_print("defaults", callee.defaults)
         if isinstance(callee.defaults, tuple): # Python 3.5
             args = args + list(callee.defaults)
         elif isinstance(callee.defaults, ir.Var) or isinstance(callee.defaults, str):
             defaults = func_ir.get_definition(callee.defaults)
             assert(isinstance(defaults, ir.Const))
             loc = defaults.loc
             args = args + [ ir.Const(value=v, loc=loc) for v in defaults.value ]
         else:
             raise NotImplementedError("Unsupported defaults to make_function: {}".format(defaults))
     _replace_args_with(from_blocks, args)
     _debug_print("After arguments rename: ")
     _debug_dump(from_ir)
     # 4. replace freevar with actual closure var
     if callee.closure:
         closure = func_ir.get_definition(callee.closure)
         assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple')
         assert(len(callee.code.co_freevars) == len(closure.items))
         _debug_print("callee's closure = ", closure)
         _replace_freevars(from_blocks, closure.items)
         _debug_print("After closure rename: ")
         _debug_dump(from_ir)
     # 5. split caller blocks into two
     new_blocks = []
     new_block = ir.Block(scope, block.loc)
     new_block.body = block.body[i+1:]
     new_label = next_label()
     func_ir.blocks[new_label] = new_block
     new_blocks.append((new_label, new_block))
     block.body = block.body[:i]
     block.body.append(ir.Jump(min_label, instr.loc))
     # 6. replace Return with assignment to LHS
     _replace_returns(from_blocks, instr.target, new_label)
     # 7. insert all new blocks, and add back definitions
     for label, block in from_blocks.items():
         # block scope must point to parent's
         block.scope = scope
         _add_definition(func_ir, block)
         func_ir.blocks[label] = block
         new_blocks.append((label, block))
     _debug_print("After merge: ")
     _debug_dump(func_ir)
     return new_blocks
Exemplo n.º 21
0
 def op_JUMP_FORWARD(self, inst):
     jmp = ir.Jump(inst.get_jump_target(), loc=self.loc)
     self.current_block.append(jmp)
Exemplo n.º 22
0
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None,
                        arg_typs=None, typemap=None, calltypes=None,
                        work_list=None):
    """Inline the body of `callee` at its callsite (`i`-th instruction of `block`)

    `func_ir` is the func_ir object of the caller function and `glbls` is its
    global variable environment (func_ir.func_id.func.__globals__).
    `block` is the IR block of the callsite and `i` is the index of the
    callsite's node. `callee` is either the called function or a
    make_function node. `typingctx`, `typemap` and `calltypes` are typing
    data structures of the caller, available if we are in a typed pass.
    `arg_typs` includes the types of the arguments at the callsite.
    """
    scope = block.scope
    instr = block.body[i]
    call_expr = instr.value
    debug_print = _make_debug_print("inline_closure_call")
    debug_print("Found closure call: ", instr, " with callee = ", callee)
    # support both function object and make_function Expr
    callee_code = callee.code if hasattr(callee, 'code') else callee.__code__
    callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__
    callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__
    # first, get the IR of the callee
    callee_ir = get_ir_of_code(glbls, callee_code)
    callee_blocks = callee_ir.blocks

    # 1. relabel callee_ir by adding an offset
    max_label = max(func_ir.blocks.keys())
    callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
    callee_blocks = simplify_CFG(callee_blocks)
    callee_ir.blocks = callee_blocks
    min_label = min(callee_blocks.keys())
    max_label = max(callee_blocks.keys())
    #    reset globals in ir_utils before we use it
    ir_utils._max_label = max_label
    debug_print("After relabel")
    _debug_dump(callee_ir)

    # 2. rename all local variables in callee_ir with new locals created in func_ir
    callee_scopes = _get_all_scopes(callee_blocks)
    debug_print("callee_scopes = ", callee_scopes)
    #    one function should only have one local scope
    assert(len(callee_scopes) == 1)
    callee_scope = callee_scopes[0]
    var_dict = {}
    for var in callee_scope.localvars._con.values():
        if not (var.name in callee_code.co_freevars):
            new_var = scope.define(mk_unique_var(var.name), loc=var.loc)
            var_dict[var.name] = new_var
    debug_print("var_dict = ", var_dict)
    replace_vars(callee_blocks, var_dict)
    debug_print("After local var rename")
    _debug_dump(callee_ir)

    # 3. replace formal parameters with actual arguments
    args = list(call_expr.args)
    if callee_defaults:
        debug_print("defaults = ", callee_defaults)
        if isinstance(callee_defaults, tuple): # Python 3.5
            args = args + list(callee_defaults)
        elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str):
            defaults = func_ir.get_definition(callee_defaults)
            assert(isinstance(defaults, ir.Const))
            loc = defaults.loc
            args = args + [ir.Const(value=v, loc=loc)
                           for v in defaults.value]
        else:
            raise NotImplementedError(
                "Unsupported defaults to make_function: {}".format(defaults))
    debug_print("After arguments rename: ")
    _debug_dump(callee_ir)

    # 4. replace freevar with actual closure var
    if callee_closure:
        closure = func_ir.get_definition(callee_closure)
        debug_print("callee's closure = ", closure)
        if isinstance(closure, tuple):
            cellget = ctypes.pythonapi.PyCell_Get
            cellget.restype = ctypes.py_object
            cellget.argtypes = (ctypes.py_object,)
            items = tuple(cellget(x) for x in closure)
        else:
            assert(isinstance(closure, ir.Expr)
                   and closure.op == 'build_tuple')
            items = closure.items
        assert(len(callee_code.co_freevars) == len(items))
        _replace_freevars(callee_blocks, items)
        debug_print("After closure rename")
        _debug_dump(callee_ir)

    if typingctx:
        from numba import compiler
        f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage(
                typingctx, callee_ir, arg_typs, None)
        canonicalize_array_math(callee_ir, f_typemap,
                                f_calltypes, typingctx)
        # remove argument entries like arg.a from typemap
        arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
        for a in arg_names:
            f_typemap.pop(a)
        typemap.update(f_typemap)
        calltypes.update(f_calltypes)

    _replace_args_with(callee_blocks, args)
    # 5. split caller blocks into two
    new_blocks = []
    new_block = ir.Block(scope, block.loc)
    new_block.body = block.body[i + 1:]
    new_label = next_label()
    func_ir.blocks[new_label] = new_block
    new_blocks.append((new_label, new_block))
    block.body = block.body[:i]
    block.body.append(ir.Jump(min_label, instr.loc))

    # 6. replace Return with assignment to LHS
    topo_order = find_topo_order(callee_blocks)
    _replace_returns(callee_blocks, instr.target, new_label)
    #    remove the old definition of instr.target too
    if (instr.target.name in func_ir._definitions):
        func_ir._definitions[instr.target.name] = []

    # 7. insert all new blocks, and add back definitions
    for label in topo_order:
        # block scope must point to parent's
        block = callee_blocks[label]
        block.scope = scope
        _add_definitions(func_ir, block)
        func_ir.blocks[label] = block
        new_blocks.append((label, block))
    debug_print("After merge in")
    _debug_dump(func_ir)

    if work_list != None:
        for block in new_blocks:
            work_list.append(block)
    return callee_blocks
Exemplo n.º 23
0
 def op_BREAK_LOOP(self, inst):
     loop = self.syntax_blocks[-1]
     assert isinstance(loop, ir.Loop)
     jmp = ir.Jump(target=loop.exit, loc=self.loc)
     self.current_block.append(jmp)
Exemplo n.º 24
0
 def do_prune(take_truebr, blk):
     keep = branch.truebr if take_truebr else branch.falsebr
     # replace the branch with a direct jump
     jmp = ir.Jump(keep, loc=branch.loc)
     blk.body[-1] = jmp
     return 1 if keep == branch.truebr else 0