Exemplo n.º 1
0
    def inline_array(array_var, expr, stmts, list_vars, dels):
        """Check to see if the given "array_var" is created from a list
        of constants, and try to inline the list definition as array
        initialization.

        Extra statements produced with be appended to "stmts".
        """
        callname = guard(find_callname, func_ir, expr)
        require(callname and callname[1] == 'numpy' and callname[0] == 'array')
        require(expr.args[0].name in list_vars)
        ret_type = calltypes[expr].return_type
        require(isinstance(ret_type, types.ArrayCompatible) and
                           ret_type.ndim == 1)
        loc = expr.loc
        list_var = expr.args[0]
        array_typ = typemap[array_var.name]
        debug_print("inline array_var = ", array_var, " list_var = ", list_var)
        dtype = array_typ.dtype
        seq, op = find_build_sequence(func_ir, list_var)
        size = len(seq)
        size_var = ir.Var(scope, mk_unique_var("size"), loc)
        size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc)
        size_typ = types.intp
        size_tuple_typ = types.UniTuple(size_typ, 1)

        typemap[size_var.name] = size_typ
        typemap[size_tuple_var.name] = size_tuple_typ

        stmts.append(_new_definition(func_ir, size_var,
                 ir.Const(size, loc=loc), loc))

        stmts.append(_new_definition(func_ir, size_tuple_var,
                 ir.Expr.build_tuple(items=[size_var], loc=loc), loc))

        empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc)
        fnty = get_np_ufunc_typ(np.empty)
        sig = context.resolve_function_type(fnty, (size_typ,), {})
        typemap[empty_func.name] = fnty #

        stmts.append(_new_definition(func_ir, empty_func,
                         ir.Global('empty', np.empty, loc=loc), loc))

        empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc)
        calltypes[empty_call] = typing.signature(array_typ, size_typ)
        stmts.append(_new_definition(func_ir, array_var, empty_call, loc))

        for i in range(size):
            index_var = ir.Var(scope, mk_unique_var("index"), loc)
            index_typ = types.intp
            typemap[index_var.name] = index_typ
            stmts.append(_new_definition(func_ir, index_var,
                    ir.Const(i, loc), loc))
            setitem = ir.SetItem(array_var, index_var, seq[i], loc)
            calltypes[setitem] = typing.signature(types.none, array_typ,
                                                  index_typ, dtype)
            stmts.append(setitem)

        stmts.extend(dels)
        return True
Exemplo n.º 2
0
    def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope,
                                                                        loc):
        last_ind = dim_size
        if end_length != 0:
            # set last index to size minus stencil size to avoid invalid
            # memory access
            index_const = ir.Var(scope, mk_unique_var("stencil_const_var"),
                                                                        loc)
            self.typemap[index_const.name] = types.intp
            if isinstance(end_length, numbers.Number):
                const_assign = ir.Assign(ir.Const(end_length, loc),
                                                        index_const, loc)
            else:
                const_assign = ir.Assign(end_length, index_const, loc)

            gen_nodes.append(const_assign)
            last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc)
            self.typemap[last_ind.name] = types.intp

            g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc)
            check_func = numba.njit(_compute_last_ind)
            func_typ = types.functions.Dispatcher(check_func)
            self.typemap[g_var.name] = func_typ
            g_obj = ir.Global("_compute_last_ind", check_func, loc)
            g_assign = ir.Assign(g_obj, g_var, loc)
            gen_nodes.append(g_assign)
            index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc)
            self.calltypes[index_call] = func_typ.get_call_type(
                self.typingctx, [types.intp, types.intp], {})
            index_assign = ir.Assign(index_call, last_ind, loc)
            gen_nodes.append(index_assign)

        return last_ind
Exemplo n.º 3
0
    def run(self):
        """ Finds all calls to StencilFuncs in the IR and converts them to parfor.
        """
        from numba.stencil import StencilFunc

        # Get all the calls in the function IR.
        call_table, _ = get_call_table(self.func_ir.blocks)
        stencil_calls = []
        stencil_dict = {}
        for call_varname, call_list in call_table.items():
            if isinstance(call_list[0], StencilFunc):
                # Remember all calls to StencilFuncs.
                stencil_calls.append(call_varname)
                stencil_dict[call_varname] = call_list[0]
        if not stencil_calls:
            return  # return early if no stencil calls found

        # find and transform stencil calls
        for label, block in self.func_ir.blocks.items():
            for i, stmt in reversed(list(enumerate(block.body))):
                # Found a call to a StencilFunc.
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'call'
                        and stmt.value.func.name in stencil_calls):
                    kws = dict(stmt.value.kws)
                    # Create dictionary of input argument number to
                    # the argument itself.
                    input_dict = {
                        i: stmt.value.args[i]
                        for i in range(len(stmt.value.args))
                    }
                    in_args = stmt.value.args
                    arg_typemap = tuple(self.typemap[i.name] for i in in_args)
                    for arg_type in arg_typemap:
                        if isinstance(arg_type, types.BaseTuple):
                            raise ValueError("Tuple parameters not supported " \
                                "for stencil kernels in parallel=True mode.")

                    out_arr = kws.get('out')

                    # Get the StencilFunc object corresponding to this call.
                    sf = stencil_dict[stmt.value.func.name]
                    stencil_blocks, rt, arg_to_arr_dict = get_stencil_blocks(
                        sf, self.typingctx, arg_typemap, block.scope,
                        block.loc, input_dict, self.typemap, self.calltypes)
                    index_offsets = sf.options.get('index_offsets', None)
                    gen_nodes = self._mk_stencil_parfor(
                        label, in_args, out_arr, stencil_blocks, index_offsets,
                        stmt.target, rt, sf, arg_to_arr_dict)
                    block.body = block.body[:i] + gen_nodes + block.body[i +
                                                                         1:]
                # Found a call to a stencil via numba.stencil().
                elif (isinstance(stmt, ir.Assign)
                      and isinstance(stmt.value, ir.Expr)
                      and stmt.value.op == 'call'
                      and guard(find_callname, self.func_ir,
                                stmt.value) == ('stencil', 'numba')):
                    # remove dummy stencil() call
                    stmt.value = ir.Const(0, stmt.loc)
Exemplo n.º 4
0
def _loop_lift_modify_call_block(liftedloop, block, inputs, outputs, returnto):
    """
    Transform calling block from top-level function to call the lifted loop.
    """
    scope = block.scope
    loc = block.loc
    blk = ir.Block(scope=scope, loc=loc)
    # load loop
    fn = ir.Const(value=liftedloop, loc=loc)
    fnvar = scope.make_temp(loc=loc)
    blk.append(ir.Assign(target=fnvar, value=fn, loc=loc))
    # call loop
    args = [scope.get_exact(name) for name in inputs]
    callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc)
    # temp variable for the return value
    callres = scope.make_temp(loc=loc)
    blk.append(ir.Assign(target=callres, value=callexpr, loc=loc))
    # unpack return value
    for i, out in enumerate(outputs):
        target = scope.get_exact(out)
        getitem = ir.Expr.static_getitem(value=callres,
                                         index=i,
                                         index_var=None,
                                         loc=loc)
        blk.append(ir.Assign(target=target, value=getitem, loc=loc))
    # jump to next block
    blk.append(ir.Jump(target=returnto, loc=loc))
    return blk
Exemplo n.º 5
0
 def _gen_col_var(self, out_var, args, col_var):
     loc = out_var.loc
     scope = out_var.scope
     # calculate mean first
     mean_var = ir.Var(scope, mk_unique_var("mean_val"), loc)
     f_mean_blocks = self._gen_col_mean(mean_var, args, col_var)
     f_mean_blocks = add_offset_to_labels(f_mean_blocks, ir_utils._max_label+1)
     ir_utils._max_label = max(f_mean_blocks.keys())
     m_last_label = find_topo_order(f_mean_blocks)[-1]
     remove_none_return_from_block(f_mean_blocks[m_last_label])
     def f(A, s, m):
         count = 0
         for i in numba.parfor.prange(len(A)):
             val = A[i]
             if not np.isnan(val):
                 s += (val-m)**2
                 count += 1
         if count <= 1:
             s = np.nan
         else:
             s = s/(count-1)
     f_blocks = get_inner_ir(f)
     replace_var_names(f_blocks, {'A': col_var.name})
     replace_var_names(f_blocks, {'s': out_var.name})
     replace_var_names(f_blocks, {'m': mean_var.name})
     f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc))
     # attach first var block to last mean block
     f_mean_blocks[m_last_label].body.extend(f_blocks[0].body)
     f_blocks.pop(0)
     f_blocks = add_offset_to_labels(f_blocks, ir_utils._max_label+1)
     # add offset to jump of first f_block since it didn't go through call
     f_mean_blocks[m_last_label].body[-1].target += ir_utils._max_label+1
     ir_utils._max_label = max(f_blocks.keys())
     f_mean_blocks.update(f_blocks)
     return f_mean_blocks
Exemplo n.º 6
0
 def _gen_size_call(self, var, i):
     out = []
     ndims = self._get_ndims(var.name)
     # attr call: A_sh_attr = getattr(A, shape)
     shape_attr_call = ir.Expr.getattr(var, "shape", var.loc)
     attr_var = ir.Var(var.scope,
                       mk_unique_var(var.name + "_sh_attr" + str(i)),
                       var.loc)
     self.typemap[attr_var.name] = types.containers.UniTuple(
         types.intp, ndims)
     attr_assign = ir.Assign(shape_attr_call, attr_var, var.loc)
     out.append(attr_assign)
     # const var for dim: $constA0 = Const(0)
     const_node = ir.Const(i, var.loc)
     const_var = ir.Var(var.scope,
                        mk_unique_var("$const" + var.name + str(i)),
                        var.loc)
     self.typemap[const_var.name] = types.intp
     const_assign = ir.Assign(const_node, const_var, var.loc)
     out.append(const_assign)
     # get size: Asize0 = A_sh_attr[0]
     size_var = ir.Var(var.scope, mk_unique_var(var.name + "size" + str(i)),
                       var.loc)
     self.typemap[size_var.name] = types.intp
     getitem_node = ir.Expr.static_getitem(attr_var, i, const_var, var.loc)
     self.calltypes[getitem_node] = None
     getitem_assign = ir.Assign(getitem_node, size_var, var.loc)
     out.append(getitem_assign)
     return out
Exemplo n.º 7
0
 def rewrite_statement(func_ir, stmt, new_val):
     """
     Rewrites the stmt as a ir.Const new_val and fixes up the entries in
     func_ir._definitions
     """
     stmt.value = ir.Const(new_val, stmt.loc)
     defns = func_ir._definitions[stmt.target.name]
     repl_idx = defns.index(val)
     defns[repl_idx] = stmt.value
Exemplo n.º 8
0
    def _gen_rolling_init(self, win_size, func, center):
        nodes = []
        right_length = 0
        scope = win_size.scope
        loc = win_size.loc
        right_length = ir.Var(scope, mk_unique_var('zero_var'), scope)
        nodes.append(ir.Assign(ir.Const(0, loc), right_length, win_size.loc))

        def f(w):
            return -w + 1

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [win_size])
        nodes.extend(f_block.body[:-2])  # remove none return
        left_length = nodes[-1].target

        if center:

            def f(w):
                return -(w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            left_length = nodes[-1].target

            def f(w):
                return (w // 2)

            f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [win_size])
            nodes.extend(f_block.body[:-2])  # remove none return
            right_length = nodes[-1].target

        def f(a, b):
            return ((a, b), )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [left_length, right_length])
        nodes.extend(f_block.body[:-2])  # remove none return
        win_tuple = nodes[-1].target

        index_offsets = [right_length]

        if func == 'apply':
            index_offsets = [left_length]

        def f(a):
            return (a, )

        f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1]
        replace_arg_nodes(f_block, index_offsets)
        nodes.extend(f_block.body[:-2])  # remove none return
        index_offsets = nodes[-1].target

        return index_offsets, win_tuple, nodes
Exemplo n.º 9
0
def convert_size_to_var(size_var, typemap, scope, loc, nodes):
    if isinstance(size_var, int):
        new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc)
        if typemap:
            typemap[new_size.name] = types.intp
        size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc)
        nodes.append(size_assign)
        return new_size
    assert isinstance(size_var, ir.Var)
    return size_var
Exemplo n.º 10
0
 def _handle_h5_File_call(self, assign, lhs, rhs):
     """
     Handle h5py.File calls like:
       f = h5py.File(file_name, mode)
     """
     # parallel arg = False for this stage
     loc = lhs.loc
     scope = lhs.scope
     parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc)
     parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc)
     rhs.args.append(parallel_var)
     return [parallel_assign, assign]
Exemplo n.º 11
0
Arquivo: pio.py Projeto: zmyer/hpat
    def _gen_h5write(self, f_id, dset_var, arr_var):
        scope = dset_var.scope
        loc = dset_var.loc

        # g_pio_var = Global(hpat.pio_api)
        g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
        g_pio = ir.Global('pio_api', hpat.pio_api, loc)
        g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
        # attr call: h5write_attr = getattr(g_pio_var, h5write)
        h5write_attr_call = ir.Expr.getattr(g_pio_var, "h5write", loc)
        attr_var = ir.Var(scope, mk_unique_var("$h5write_attr"), loc)
        attr_assign = ir.Assign(h5write_attr_call, attr_var, loc)
        out = [g_pio_assign, attr_assign]

        # ndims args
        ndims = len(self.h5_dsets_sizes[dset_var.name])
        ndims_var = ir.Var(scope, mk_unique_var("$h5_ndims"), loc)
        ndims_assign = ir.Assign(ir.Const(np.int32(ndims), loc), ndims_var,
                                 loc)
        # sizes arg
        sizes_var = ir.Var(scope, mk_unique_var("$h5_sizes"), loc)
        tuple_call = ir.Expr.getattr(arr_var, 'shape', loc)
        sizes_assign = ir.Assign(tuple_call, sizes_var, loc)

        zero_var = ir.Var(scope, mk_unique_var("$const_zero"), loc)
        zero_assign = ir.Assign(ir.Const(0, loc), zero_var, loc)
        # starts: assign to zeros
        starts_var = ir.Var(scope, mk_unique_var("$h5_starts"), loc)
        start_tuple_call = ir.Expr.build_tuple([zero_var] * ndims, loc)
        starts_assign = ir.Assign(start_tuple_call, starts_var, loc)
        out += [ndims_assign, zero_assign, starts_assign, sizes_assign]

        # err = h5write(f_id)
        err_var = ir.Var(scope, mk_unique_var("$pio_ret_var"), loc)
        write_call = ir.Expr.call(attr_var, [
            f_id, dset_var, ndims_var, starts_var, sizes_var, zero_var, arr_var
        ], (), loc)
        write_assign = ir.Assign(write_call, err_var, loc)
        out.append(write_assign)
        return out
Exemplo n.º 12
0
Arquivo: pio.py Projeto: zmyer/hpat
    def _gen_h5read_call(self, f_id, dset, start_vars, size_vars, lhs_var,
                         scope, loc, out):
        # g_pio_var = Global(hpat.pio_api)
        g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
        g_pio = ir.Global('pio_api', hpat.pio_api, loc)
        g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
        # attr call: h5size_attr = getattr(g_pio_var, h5read)
        h5size_attr_call = ir.Expr.getattr(g_pio_var, "h5read", loc)
        attr_var = ir.Var(scope, mk_unique_var("$h5read_attr"), loc)
        attr_assign = ir.Assign(h5size_attr_call, attr_var, loc)
        out += [g_pio_assign, attr_assign]

        # ndims args
        ndims = len(size_vars)
        ndims_var = ir.Var(scope, mk_unique_var("$h5_ndims"), loc)
        ndims_assign = ir.Assign(ir.Const(np.int32(ndims), loc), ndims_var,
                                 loc)
        # sizes arg
        sizes_var = ir.Var(scope, mk_unique_var("$h5_sizes"), loc)
        tuple_call = ir.Expr.build_tuple(size_vars, loc)
        sizes_assign = ir.Assign(tuple_call, sizes_var, loc)

        zero_var = ir.Var(scope, mk_unique_var("$const_zero"), loc)
        zero_assign = ir.Assign(ir.Const(0, loc), zero_var, loc)
        # starts: assign to zeros
        if not start_vars:
            start_vars = [zero_var] * ndims
        starts_var = ir.Var(scope, mk_unique_var("$h5_starts"), loc)
        start_tuple_call = ir.Expr.build_tuple(start_vars, loc)
        starts_assign = ir.Assign(start_tuple_call, starts_var, loc)
        out += [ndims_assign, zero_assign, starts_assign, sizes_assign]

        err_var = ir.Var(scope, mk_unique_var("$h5_err_var"), loc)
        read_call = ir.Expr.call(
            attr_var,
            [f_id, dset, ndims_var, starts_var, sizes_var, zero_var, lhs_var],
            (), loc)
        out.append(ir.Assign(read_call, err_var, loc))
        return
Exemplo n.º 13
0
def _replace_freevars(blocks, args):
    """
    Replace ir.FreeVar(...) with real variables from parent function
    """
    for label, block in blocks.items():
        assigns = block.find_insts(ir.Assign)
        for stmt in assigns:
            if isinstance(stmt.value, ir.FreeVar):
                idx = stmt.value.index
                assert(idx < len(args))
                if isinstance(args[idx], ir.Var):
                    stmt.value = args[idx]
                else:
                    stmt.value = ir.Const(args[idx], stmt.loc)
Exemplo n.º 14
0
def _mk_range_args(typemap, start, stop, step, scope, loc):
    nodes = []
    if isinstance(stop, ir.Var):
        g_stop_var = stop
    else:
        assert isinstance(stop, int)
        g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc)
        if typemap:
            typemap[g_stop_var.name] = types.intp
        stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc)
        nodes.append(stop_assign)
    if start == 0 and step == 1:
        return nodes, [g_stop_var]

    if isinstance(start, ir.Var):
        g_start_var = start
    else:
        assert isinstance(start, int)
        g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc)
        if typemap:
            typemap[g_start_var.name] = types.intp
        start_assign = ir.Assign(ir.Const(start, loc), g_start_var)
        nodes.append(start_assign)
    if step == 1:
        return nodes, [g_start_var, g_stop_var]

    if isinstance(step, ir.Var):
        g_step_var = step
    else:
        assert isinstance(step, int)
        g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc)
        if typemap:
            typemap[g_step_var.name] = types.intp
        step_assign = ir.Assign(ir.Const(step, loc), g_step_var)
        nodes.append(step_assign)

    return nodes, [g_start_var, g_stop_var, g_step_var]
Exemplo n.º 15
0
Arquivo: pio.py Projeto: zmyer/hpat
 def _handle_h5_File_call(self, assign, lhs, rhs):
     """
     Handle h5py.File calls like:
       f = h5py.File(file_name, mode)
     """
     if guard(find_callname, self.func_ir, rhs) == ('File', 'h5py'):
         self.h5_files[lhs.name] = rhs.args[0]
         # parallel arg = False for this stage
         loc = lhs.loc
         scope = lhs.scope
         parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc)
         parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc)
         rhs.args.append(parallel_var)
         return [parallel_assign, assign]
     return None
Exemplo n.º 16
0
    def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar):
        base = self.get(base)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc)
        self.store(value=index, name=indexvar)

        expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc)
        self.store(value=expr, name=res)
Exemplo n.º 17
0
 def _gen_col_sum(self, out_var, args, col_var):
     def f(A, s):
         count = 0
         for i in numba.parfor.prange(len(A)):
             val = A[i]
             if not np.isnan(val):
                 s += val
                 count += 1
         if not count:
             s = np.nan
     f_blocks = get_inner_ir(f)
     replace_var_names(f_blocks, {'A': col_var.name})
     replace_var_names(f_blocks, {'s': out_var.name})
     loc = out_var.loc
     f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc))
     return f_blocks
Exemplo n.º 18
0
    def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar):
        base = self.get(base)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        index = ir.Expr.call(self.get(slicevar), (none, none), (),
                             loc=self.loc)
        self.store(value=index, name=indexvar)

        stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc)
        self.current_block.append(stmt)
Exemplo n.º 19
0
 def fix_dependencies(expr, varlist):
     """Double check if all variables in varlist are defined before
     expr is used. Try to move constant definition when the check fails.
     Bails out by raising GuardException if it can't be moved.
     """
     debug_print = _make_debug_print("fix_dependencies")
     for label, block in blocks.items():
         scope = block.scope
         body = block.body
         defined = set()
         for i in range(len(body)):
             inst = body[i]
             if isinstance(inst, ir.Assign):
                 defined.add(inst.target.name)
                 if inst.value == expr:
                     new_varlist = []
                     for var in varlist:
                         # var must be defined before this inst, or live
                         # and not later defined.
                         if (var.name in defined or
                             (var.name in livemap[label]
                              and not (var.name in usedefs.defmap[label]))):
                             debug_print(var.name, " already defined")
                             new_varlist.append(var)
                         else:
                             debug_print(var.name, " not yet defined")
                             var_def = get_definition(func_ir, var.name)
                             if isinstance(var_def, ir.Const):
                                 loc = var.loc
                                 new_var = ir.Var(scope,
                                                  mk_unique_var("new_var"),
                                                  loc)
                                 new_const = ir.Const(var_def.value, loc)
                                 new_vardef = _new_definition(
                                     func_ir, new_var, new_const, loc)
                                 new_body = []
                                 new_body.extend(body[:i])
                                 new_body.append(new_vardef)
                                 new_body.extend(body[i:])
                                 block.body = new_body
                                 new_varlist.append(new_var)
                             else:
                                 raise GuardException
                     return new_varlist
     # when expr is not found in block
     raise GuardException
Exemplo n.º 20
0
    def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar,
                         indexvar):
        base = self.get(base)
        start = self.get(start)

        nonegv = ir.Const(None, loc=self.loc)
        self.store(value=nonegv, name=nonevar)
        none = self.get(nonevar)

        slicegv = ir.Global("slice", slice, loc=self.loc)
        self.store(value=slicegv, name=slicevar)

        index = ir.Expr.call(self.get(slicevar), (start, none), (),
                             loc=self.loc)
        self.store(value=index, name=indexvar)

        stmt = ir.SetItem(base, self.get(indexvar), self.get(value),
                          loc=self.loc)
        self.current_block.append(stmt)
Exemplo n.º 21
0
 def apply(self):
     """
     Rewrite `var = call <print function>(...)` as a sequence of
     `print(...)` and `var = const(None)`.
     """
     new_block = self.block.copy()
     new_block.clear()
     for inst in self.block.body:
         if inst in self.prints:
             expr = self.prints[inst]
             print_node = ir.Print(args=expr.args, vararg=expr.vararg,
                                   loc=expr.loc)
             new_block.append(print_node)
             assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc),
                                     target=inst.target,
                                     loc=inst.loc)
             new_block.append(assign_node)
         else:
             new_block.append(inst)
     return new_block
Exemplo n.º 22
0
Arquivo: pio.py Projeto: zmyer/hpat
    def _gen_h5size(self, f_id, dset, ndims, scope, loc, out):
        # g_pio_var = Global(hpat.pio_api)
        g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc)
        g_pio = ir.Global('pio_api', hpat.pio_api, loc)
        g_pio_assign = ir.Assign(g_pio, g_pio_var, loc)
        # attr call: h5size_attr = getattr(g_pio_var, h5size)
        h5size_attr_call = ir.Expr.getattr(g_pio_var, "h5size", loc)
        attr_var = ir.Var(scope, mk_unique_var("$h5size_attr"), loc)
        attr_assign = ir.Assign(h5size_attr_call, attr_var, loc)
        out += [g_pio_assign, attr_assign]

        size_vars = []
        for i in range(ndims):
            dim_var = ir.Var(scope, mk_unique_var("$h5_dim_var"), loc)
            dim_assign = ir.Assign(ir.Const(np.int32(i), loc), dim_var, loc)
            out.append(dim_assign)
            size_var = ir.Var(scope, mk_unique_var("$h5_size_var"), loc)
            size_vars.append(size_var)
            size_call = ir.Expr.call(attr_var, [f_id, dset, dim_var], (), loc)
            size_assign = ir.Assign(size_call, size_var, loc)
            out.append(size_assign)
        return size_vars
Exemplo n.º 23
0
 def op_LOAD_CONST(self, inst, res):
     value = self.code_consts[inst.arg]
     const = ir.Const(value, loc=self.loc)
     self.store(const, res)
Exemplo n.º 24
0
    def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood,
                              standard_indexed):
        """
        Transforms the stencil kernel as specified by the user into one
        that includes each dimension's index variable as part of the getitem
        calls.  So, in effect array[-1] becomes array[index0-1].
        """
        const_dict = {}
        kernel_consts = []

        if config.DEBUG_ARRAY_OPT == 1:
            print("add_indices_to_kernel", ndim, neighborhood)
            ir_utils.dump_blocks(kernel.blocks)

        if neighborhood is None:
            need_to_calc_kernel = True
        else:
            need_to_calc_kernel = False
            if len(neighborhood) != ndim:
                raise ValueError("%d dimensional neighborhood specified for %d " \
                    "dimensional input array" % (len(neighborhood), ndim))

        tuple_table = ir_utils.get_tuple_table(kernel.blocks)

        relatively_indexed = set()

        for block in kernel.blocks.values():
            scope = block.scope
            loc = block.loc
            new_body = []
            for stmt in block.body:
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Const)):
                    if config.DEBUG_ARRAY_OPT == 1:
                        print("remembering in const_dict", stmt.target.name,
                              stmt.value.value)
                    # Remember consts for use later.
                    const_dict[stmt.target.name] = stmt.value.value
                if ((isinstance(stmt, ir.Assign)
                     and isinstance(stmt.value, ir.Expr)
                     and stmt.value.op in ['setitem', 'static_setitem']
                     and stmt.value.value.name in kernel.arg_names)
                        or (isinstance(stmt, ir.SetItem)
                            and stmt.target.name in kernel.arg_names)):
                    raise ValueError("Assignments to arrays passed to stencil " \
                        "kernels is not allowed.")
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op in ['getitem', 'static_getitem']
                        and stmt.value.value.name in kernel.arg_names
                        and stmt.value.value.name not in standard_indexed):
                    # We found a getitem from the input array.
                    if stmt.value.op == 'getitem':
                        stmt_index_var = stmt.value.index
                    else:
                        stmt_index_var = stmt.value.index_var
                        # allow static_getitem since rewrite passes are applied
                        #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")

                    relatively_indexed.add(stmt.value.value.name)

                    # Store the index used after looking up the variable in
                    # the const dictionary.
                    if need_to_calc_kernel:
                        assert hasattr(stmt_index_var, 'name')

                        if stmt_index_var.name in tuple_table:
                            kernel_consts += [tuple_table[stmt_index_var.name]]
                        elif stmt_index_var.name in const_dict:
                            kernel_consts += [const_dict[stmt_index_var.name]]
                        else:
                            raise ValueError(
                                "stencil kernel index is not "
                                "constant, 'neighborhood' option required")

                    if ndim == 1:
                        # Single dimension always has index variable 'index0'.
                        # tmpvar will hold the real index and is computed by
                        # adding the relative offset in stmt.value.index to
                        # the current absolute location in index0.
                        index_var = ir.Var(scope, index_names[0], loc)
                        tmpname = ir_utils.mk_unique_var("stencil_index")
                        tmpvar = ir.Var(scope, tmpname, loc)
                        acc_call = ir.Expr.binop(operator.add, stmt_index_var,
                                                 index_var, loc)
                        new_body.append(ir.Assign(acc_call, tmpvar, loc))
                        new_body.append(
                            ir.Assign(
                                ir.Expr.getitem(stmt.value.value, tmpvar, loc),
                                stmt.target, loc))
                    else:
                        index_vars = []
                        sum_results = []
                        s_index_name = ir_utils.mk_unique_var("stencil_index")
                        s_index_var = ir.Var(scope, s_index_name, loc)
                        const_index_vars = []
                        ind_stencils = []

                        # Same idea as above but you have to extract
                        # individual elements out of the tuple indexing
                        # expression and add the corresponding index variable
                        # to them and then reconstitute as a tuple that can
                        # index the array.
                        for dim in range(ndim):
                            tmpname = ir_utils.mk_unique_var("const_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            new_body.append(
                                ir.Assign(ir.Const(dim, loc), tmpvar, loc))
                            const_index_vars += [tmpvar]
                            index_var = ir.Var(scope, index_names[dim], loc)
                            index_vars += [index_var]

                            tmpname = ir_utils.mk_unique_var(
                                "ind_stencil_index")
                            tmpvar = ir.Var(scope, tmpname, loc)
                            ind_stencils += [tmpvar]
                            getitemname = ir_utils.mk_unique_var("getitem")
                            getitemvar = ir.Var(scope, getitemname, loc)
                            getitemcall = ir.Expr.getitem(
                                stmt_index_var, const_index_vars[dim], loc)
                            new_body.append(
                                ir.Assign(getitemcall, getitemvar, loc))
                            acc_call = ir.Expr.binop(operator.add, getitemvar,
                                                     index_vars[dim], loc)
                            new_body.append(ir.Assign(acc_call, tmpvar, loc))

                        tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
                        new_body.append(ir.Assign(tuple_call, s_index_var,
                                                  loc))
                        new_body.append(
                            ir.Assign(
                                ir.Expr.getitem(stmt.value.value, s_index_var,
                                                loc), stmt.target, loc))
                else:
                    new_body.append(stmt)
            block.body = new_body

        if need_to_calc_kernel:
            # Find the size of the kernel by finding the maximum absolute value
            # index used in the kernel specification.
            neighborhood = [[0, 0] for _ in range(ndim)]
            if len(kernel_consts) == 0:
                raise ValueError("Stencil kernel with no accesses to "
                                 "relatively indexed arrays.")

            for index in kernel_consts:
                if isinstance(index, tuple) or isinstance(index, list):
                    for i in range(len(index)):
                        te = index[i]
                        if isinstance(te, ir.Var) and te.name in const_dict:
                            te = const_dict[te.name]
                        if isinstance(te, int):
                            neighborhood[i][0] = min(neighborhood[i][0], te)
                            neighborhood[i][1] = max(neighborhood[i][1], te)
                        else:
                            raise ValueError(
                                "stencil kernel index is not constant,"
                                "'neighborhood' option required")
                    index_len = len(index)
                elif isinstance(index, int):
                    neighborhood[0][0] = min(neighborhood[0][0], index)
                    neighborhood[0][1] = max(neighborhood[0][1], index)
                    index_len = 1
                else:
                    raise ValueError(
                        "Non-tuple or non-integer used as stencil index.")
                if index_len != ndim:
                    raise ValueError(
                        "Stencil index does not match array dimensionality.")

        return (neighborhood, relatively_indexed)
Exemplo n.º 25
0
    def _gen_rolling_call(self, args, col_var, win_size, center, func,
                          out_var):
        loc = col_var.loc
        scope = col_var.scope
        if func == 'apply':
            if len(args) != 1:
                raise ValueError("One argument expected for rolling apply")
            kernel_func = guard(get_definition, self.func_ir, args[0])
        elif func in ['sum', 'mean', 'min', 'max', 'std', 'var']:
            if len(args) != 0:
                raise ValueError(
                    "No argument expected for rolling {}".format(func))
            g_pack = "np"
            if func in ['std', 'var', 'mean']:
                g_pack = "hpat.hiframes_api"
            if isinstance(win_size, int) and win_size < LARGE_WIN_SIZE:
                # unroll if size is less than 5
                kernel_args = ','.join(
                    ['a[{}]'.format(-i) for i in range(win_size)])
                kernel_expr = '{}.{}(np.array([{}]))'.format(
                    g_pack, func, kernel_args)
                if func == 'sum':  # simplify sum
                    kernel_expr = '+'.join(
                        ['a[{}]'.format(-i) for i in range(win_size)])
            else:
                kernel_expr = '{}.{}(a[(-w+1):1])'.format(g_pack, func)
            func_text = 'def g(a, w):\n  return {}\n'.format(kernel_expr)
            loc_vars = {}
            exec(func_text, {}, loc_vars)
            kernel_func = loc_vars['g']

        init_nodes = []
        col_var, init_nodes = self._fix_rolling_array(col_var, func)

        if isinstance(win_size, int):
            win_size_var = ir.Var(scope, mk_unique_var("win_size"), loc)
            init_nodes.append(
                ir.Assign(ir.Const(win_size, loc), win_size_var, loc))
            win_size = win_size_var

        index_offsets, win_tuple, option_nodes = self._gen_rolling_init(
            win_size, func, center)

        init_nodes += option_nodes
        other_args = [win_size]
        if func == 'apply':
            other_args = None
        options = {'neighborhood': win_tuple}
        fir_globals = self.func_ir.func_id.func.__globals__
        stencil_nodes = gen_stencil_call(col_var, out_var, kernel_func,
                                         index_offsets, fir_globals,
                                         other_args, options)

        def f(A, w):
            A[0:w - 1] = np.nan

        f_block = compile_to_numba_ir(f, {'np': np}).blocks.popitem()[1]
        replace_arg_nodes(f_block, [out_var, win_size])
        setitem_nodes = f_block.body[:-3]  # remove none return

        if center:

            def f1(A, w):
                A[0:w // 2] = np.nan

            def f2(A, w):
                n = len(A)
                A[n - (w // 2):n] = np.nan

            f_block = compile_to_numba_ir(f1, {'np': np}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [out_var, win_size])
            setitem_nodes1 = f_block.body[:-3]  # remove none return
            f_block = compile_to_numba_ir(f2, {'np': np}).blocks.popitem()[1]
            replace_arg_nodes(f_block, [out_var, win_size])
            setitem_nodes2 = f_block.body[:-3]  # remove none return
            setitem_nodes = setitem_nodes1 + setitem_nodes2

        return init_nodes + stencil_nodes + setitem_nodes
Exemplo n.º 26
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_blocks,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []

        if config.DEBUG_ARRAY_OPT == 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                  return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table,
                                      self.typemap, self.calltypes)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names,
                             self.typemap)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
            stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func,
            arg_to_arr_dict)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr.name)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                                  end_lengths[i], gen_nodes,
                                                  scope, loc)
            start_ind = self._get_stencil_start_ind(start_lengths[i],
                                                    gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(
                numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1))

        # replace return value to setitem to output array
        return_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        assert isinstance(return_node, ir.Return)

        last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        while not isinstance(last_node, ir.Assign) or not isinstance(
                last_node.value, ir.Expr) or not last_node.value.op == 'cast':
            last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop()
        assert isinstance(last_node, ir.Assign)
        assert isinstance(last_node.value, ir.Expr)
        assert last_node.value.op == 'cast'
        return_val = last_node.value.value

        # create parfor index var
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope,
                                    mk_unique_var("$parfor_index_tuple_var"),
                                    loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            stencil_blocks[max(
                stencil_blocks.keys())].body.append(tuple_assign)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr == None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(
                types.intp, in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.types.npytypes.Array(
                return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var,
                                                 return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(
                return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var,
                                          loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full", np.full, out_arr,
                                         [shape_var, zero_var, dtype_attr_var],
                                         self.typingctx, self.typemap,
                                         self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, return_val, loc)
        self.calltypes[setitem_call] = signature(
            types.none, self.typemap[out_arr.name],
            self.typemap[parfor_ind_var.name],
            self.typemap[out_arr.name].dtype)
        stencil_blocks[max(stencil_blocks.keys())].body.append(setitem_call)

        parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks,
                                     loc, parfor_ind_var, equiv_set)
        parfor.patterns = [('stencil', [start_lengths, end_lengths])]
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes
Exemplo n.º 27
0
 def inline_closure_call(self, block, i, callee):
     """Inline the body of `callee` at its callsite (`i`-th instruction of `block`)
     """
     scope = block.scope
     instr = block.body[i]
     call_expr = instr.value
     _debug_print("Found closure call: ", instr, " with callee = ", callee)
     func_ir = self.func_ir
     # first, get the IR of the callee
     from_ir = self.get_ir_of_code(callee.code)
     from_blocks = from_ir.blocks
     # 1. relabel from_ir by adding an offset
     max_label = max(func_ir.blocks.keys())
     from_blocks = add_offset_to_labels(from_blocks, max_label + 1)
     from_ir.blocks = from_blocks
     min_label = min(from_blocks.keys())
     max_label = max(from_blocks.keys())
     #    reset globals in ir_utils before we use it
     ir_utils._max_label = max_label 
     ir_utils.visit_vars_extensions = {}
     # 2. rename all local variables in from_ir with new locals created in func_ir
     from_scopes = _get_all_scopes(from_blocks)
     _debug_print("obj_IR has scopes: ", from_scopes)
     #    one function should only have one local scope
     assert(len(from_scopes) == 1)
     from_scope = from_scopes[0]
     var_dict = {}
     for var in from_scope.localvars._con.values():
         if not (var.name in callee.code.co_freevars):
             var_dict[var.name] = scope.make_temp(var.loc)
     _debug_print("Before local var rename: var_dict = ", var_dict)
     _debug_dump(from_ir)
     replace_vars(from_blocks, var_dict)
     _debug_print("After local var rename: ")
     _debug_dump(from_ir)
     # 3. replace formal parameters with actual arguments
     args = list(call_expr.args)
     if callee.defaults:
         _debug_print("defaults", callee.defaults)
         if isinstance(callee.defaults, tuple): # Python 3.5
             args = args + list(callee.defaults)
         elif isinstance(callee.defaults, ir.Var) or isinstance(callee.defaults, str):
             defaults = func_ir.get_definition(callee.defaults)
             assert(isinstance(defaults, ir.Const))
             loc = defaults.loc
             args = args + [ ir.Const(value=v, loc=loc) for v in defaults.value ]
         else:
             raise NotImplementedError("Unsupported defaults to make_function: {}".format(defaults))
     _replace_args_with(from_blocks, args)
     _debug_print("After arguments rename: ")
     _debug_dump(from_ir)
     # 4. replace freevar with actual closure var
     if callee.closure:
         closure = func_ir.get_definition(callee.closure)
         assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple')
         assert(len(callee.code.co_freevars) == len(closure.items))
         _debug_print("callee's closure = ", closure)
         _replace_freevars(from_blocks, closure.items)
         _debug_print("After closure rename: ")
         _debug_dump(from_ir)
     # 5. split caller blocks into two
     new_blocks = []
     new_block = ir.Block(scope, block.loc)
     new_block.body = block.body[i+1:]
     new_label = next_label()
     func_ir.blocks[new_label] = new_block
     new_blocks.append((new_label, new_block))
     block.body = block.body[:i]
     block.body.append(ir.Jump(min_label, instr.loc))
     # 6. replace Return with assignment to LHS
     _replace_returns(from_blocks, instr.target, new_label)
     # 7. insert all new blocks, and add back definitions
     for label, block in from_blocks.items():
         # block scope must point to parent's
         block.scope = scope
         _add_definition(func_ir, block)
         func_ir.blocks[label] = block
         new_blocks.append((label, block))
     _debug_print("After merge: ")
     _debug_dump(func_ir)
     return new_blocks
Exemplo n.º 28
0
def dead_branch_prune(func_ir, called_args):
    """
    Removes dead branches based on constant inference from function args.
    This directly mutates the IR.

    func_ir is the IR
    called_args are the actual arguments with which the function is called
    """
    from .ir_utils import get_definition, guard, find_const, GuardException

    DEBUG = 0

    def find_branches(func_ir):
        # find *all* branches
        branches = []
        for blk in func_ir.blocks.values():
            branch_or_jump = blk.body[-1]
            if isinstance(branch_or_jump, ir.Branch):
                branch = branch_or_jump
                condition = guard(get_definition, func_ir, branch.cond.name)
                if condition is not None:
                    branches.append((branch, condition, blk))
        return branches

    def do_prune(take_truebr, blk):
        keep = branch.truebr if take_truebr else branch.falsebr
        # replace the branch with a direct jump
        jmp = ir.Jump(keep, loc=branch.loc)
        blk.body[-1] = jmp
        return 1 if keep == branch.truebr else 0

    def prune_by_type(branch, condition, blk, *conds):
        # this prunes a given branch and fixes up the IR
        # at least one needs to be a NoneType
        lhs_cond, rhs_cond = conds
        lhs_none = isinstance(lhs_cond, types.NoneType)
        rhs_none = isinstance(rhs_cond, types.NoneType)
        if lhs_none or rhs_none:
            take_truebr = condition.fn(lhs_cond, rhs_cond)
            if DEBUG > 0:
                kill = branch.falsebr if take_truebr else branch.truebr
                print("Pruning %s" % kill, branch, lhs_cond, rhs_cond,
                      condition.fn)
            taken = do_prune(take_truebr, blk)
            return True, taken
        return False, None

    def prune_by_value(branch, condition, blk, *conds):
        lhs_cond, rhs_cond = conds
        take_truebr = condition.fn(lhs_cond, rhs_cond)
        if DEBUG > 0:
            kill = branch.falsebr if take_truebr else branch.truebr
            print("Pruning %s" % kill, branch, lhs_cond, rhs_cond,
                  condition.fn)
        taken = do_prune(take_truebr, blk)
        return True, taken

    class Unknown(object):
        pass

    def resolve_input_arg_const(input_arg):
        """
        Resolves an input arg to a constant (if possible)
        """
        idx = func_ir.arg_names.index(input_arg)
        input_arg_ty = called_args[idx]

        # comparing to None?
        if isinstance(input_arg_ty, types.NoneType):
            return input_arg_ty

        # is it a kwarg default
        if isinstance(input_arg_ty, types.Omitted):
            val = input_arg_ty.value
            if isinstance(val, types.NoneType):
                return val
            elif val is None:
                return types.NoneType('none')

        # literal type, return the type itself so comparisons like `x == None`
        # still work as e.g. x = types.int64 will never be None/NoneType so
        # the branch can still be pruned
        return getattr(input_arg_ty, 'literal_type', Unknown())

    if DEBUG > 1:
        print("before".center(80, '-'))
        print(func_ir.dump())

    # This looks for branches where:
    # at least one arg of the condition is in input args and const
    # at least one an arg of the condition is a const
    # if the condition is met it will replace the branch with a jump
    branch_info = find_branches(func_ir)
    nullified_conditions = [
    ]  # stores conditions that have no impact post prune
    for branch, condition, blk in branch_info:
        const_conds = []
        if isinstance(condition, ir.Expr) and condition.op == 'binop':
            prune = prune_by_value
            for arg in [condition.lhs, condition.rhs]:
                resolved_const = Unknown()
                if arg.name in func_ir.arg_names:
                    # it's an e.g. literal argument to the function
                    resolved_const = resolve_input_arg_const(arg.name)
                    prune = prune_by_type
                else:
                    # it's some const argument to the function, cannot use guard
                    # here as the const itself may be None
                    try:
                        resolved_const = find_const(func_ir, arg)
                        if resolved_const is None:
                            resolved_const = types.NoneType('none')
                    except GuardException:
                        pass

                if not isinstance(resolved_const, Unknown):
                    const_conds.append(resolved_const)

            # lhs/rhs are consts
            if len(const_conds) == 2:
                # prune the branch, switch the branch for an unconditional jump
                prune_stat, taken = prune(branch, condition, blk, *const_conds)
                if (prune_stat):
                    # add the condition to the list of nullified conditions
                    nullified_conditions.append((condition, taken))

    # 'ERE BE DRAGONS...
    # It is the evaluation of the condition expression that often trips up type
    # inference, so ideally it would be removed as it is effectively rendered
    # dead by the unconditional jump if a branch was pruned. However, there may
    # be references to the condition that exist in multiple places (e.g. dels)
    # and we cannot run DCE here as typing has not taken place to give enough
    # information to run DCE safely. Upshot of all this is the condition gets
    # rewritten below into a benign const that typing will be happy with and DCE
    # can remove it and its reference post typing when it is safe to do so
    # (if desired). It is required that the const is assigned a value that
    # indicates the branch taken as its mutated value would be read in the case
    # of object mode fall back in place of the condition itself. For
    # completeness the func_ir._definitions and ._consts are also updated to
    # make the IR state self consistent.

    deadcond = [x[0] for x in nullified_conditions]
    for _, cond, blk in branch_info:
        if cond in deadcond:
            for x in blk.body:
                if isinstance(x, ir.Assign) and x.value is cond:
                    # rewrite the condition as a true/false bit
                    branch_bit = nullified_conditions[deadcond.index(cond)][1]
                    x.value = ir.Const(branch_bit, loc=x.loc)
                    # update the specific definition to the new const
                    defns = func_ir._definitions[x.target.name]
                    repl_idx = defns.index(cond)
                    defns[repl_idx] = x.value

    # Remove dead blocks, this is safe as it relies on the CFG only.
    cfg = compute_cfg_from_blocks(func_ir.blocks)
    for dead in cfg.dead_nodes():
        del func_ir.blocks[dead]

    # if conditions were nullified then consts were rewritten, update
    if nullified_conditions:
        func_ir._consts = consts.ConstantInference(func_ir)

    if DEBUG > 1:
        print("after".center(80, '-'))
        print(func_ir.dump())
Exemplo n.º 29
0
    def _add_index_offsets(self, index_list, index_offsets, new_body,
                           scope, loc):
        """ Does the actual work of adding loop index variables to the
            relative index constants or variables.
        """
        assert len(index_list) == len(index_offsets)

        # shortcut if all values are integer
        if all([isinstance(v, int) for v in index_list+index_offsets]):
            # add offsets in all dimensions
            return list(map(add, index_list, index_offsets))

        out_nodes = []
        index_vars = []
        for i in range(len(index_list)):
            # new_index = old_index + offset
            old_index_var = index_list[i]
            if isinstance(old_index_var, int):
                old_index_var = ir.Var(scope,
                                mk_unique_var("old_index_var"), loc)
                self.typemap[old_index_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_list[i], loc),
                                                    old_index_var, loc)
                out_nodes.append(const_assign)

            offset_var = index_offsets[i]
            if isinstance(offset_var, int):
                offset_var = ir.Var(scope,
                                mk_unique_var("offset_var"), loc)
                self.typemap[offset_var.name] = types.intp
                const_assign = ir.Assign(ir.Const(index_offsets[i], loc),
                                                offset_var, loc)
                out_nodes.append(const_assign)

            if (isinstance(old_index_var, slice)
                    or isinstance(self.typemap[old_index_var.name],
                                    types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[offset_var.name] == types.intp
                index_var = self._add_offset_to_slice(old_index_var, offset_var,
                                                        out_nodes, scope, loc)
                index_vars.append(index_var)
                continue

            if (isinstance(offset_var, slice)
                    or isinstance(self.typemap[offset_var.name],
                                    types.misc.SliceType)):
                # only one arg can be slice
                assert self.typemap[old_index_var.name] == types.intp
                index_var = self._add_offset_to_slice(offset_var, old_index_var,
                                                        out_nodes, scope, loc)
                index_vars.append(index_var)
                continue

            index_var = ir.Var(scope,
                            mk_unique_var("offset_stencil_index"), loc)
            self.typemap[index_var.name] = types.intp
            index_call = ir.Expr.binop('+', old_index_var,
                                                offset_var, loc)
            self.calltypes[index_call] = ir_utils.find_op_typ('+',
                                        [types.intp, types.intp])
            index_assign = ir.Assign(index_call, index_var, loc)
            out_nodes.append(index_assign)
            index_vars.append(index_var)

        new_body.extend(out_nodes)
        return index_vars
Exemplo n.º 30
0
    def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
                           index_offsets, target, return_type, stencil_func,
                           arg_to_arr_dict):
        """ Converts a set of stencil kernel blocks to a parfor.
        """
        gen_nodes = []
        stencil_blocks = stencil_ir.blocks

        if config.DEBUG_ARRAY_OPT == 1:
            print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
                   return_type, stencil_func, stencil_blocks)
            ir_utils.dump_blocks(stencil_blocks)

        in_arr = in_args[0]
        # run copy propagate to replace in_args copies (e.g. a = A)
        in_arr_typ = self.typemap[in_arr.name]
        in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
        name_var_table = ir_utils.get_name_var_table(stencil_blocks)

        ir_utils.apply_copy_propagate(
            stencil_blocks,
            in_cps,
            name_var_table,
            self.typemap,
            self.calltypes)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after copy_propagate")
            ir_utils.dump_blocks(stencil_blocks)
        ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir,
                             self.typemap)
        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after removing dead code")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor vars
        ndims = self.typemap[in_arr.name].ndim
        scope = in_arr.scope
        loc = in_arr.loc
        parfor_vars = []
        for i in range(ndims):
            parfor_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_var"), loc)
            self.typemap[parfor_var.name] = types.intp
            parfor_vars.append(parfor_var)

        start_lengths, end_lengths = self._replace_stencil_accesses(
             stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func,
             arg_to_arr_dict)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replace stencil accesses")
            ir_utils.dump_blocks(stencil_blocks)

        # create parfor loop nests
        loopnests = []
        equiv_set = self.array_analysis.get_equiv_set(label)
        in_arr_dim_sizes = equiv_set.get_shape(in_arr)

        assert ndims == len(in_arr_dim_sizes)
        for i in range(ndims):
            last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
                                        end_lengths[i], gen_nodes, scope, loc)
            start_ind = self._get_stencil_start_ind(
                                        start_lengths[i], gen_nodes, scope, loc)
            # start from stencil size to avoid invalid array access
            loopnests.append(numba.parfor.LoopNest(parfor_vars[i],
                                start_ind, last_ind, 1))

        # We have to guarantee that the exit block has maximum label and that
        # there's only one exit block for the parfor body.
        # So, all return statements will change to jump to the parfor exit block.
        parfor_body_exit_label = max(stencil_blocks.keys()) + 1
        stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
        exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc)
        self.typemap[exit_value_var.name] = return_type.dtype

        # create parfor index var
        for_replacing_ret = []
        if ndims == 1:
            parfor_ind_var = parfor_vars[0]
        else:
            parfor_ind_var = ir.Var(scope, mk_unique_var(
                "$parfor_index_tuple_var"), loc)
            self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
                types.intp, ndims)
            tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
            tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
            for_replacing_ret.append(tuple_assign)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after creating parfor index var")
            ir_utils.dump_blocks(stencil_blocks)

        # empty init block
        init_block = ir.Block(scope, loc)
        if out_arr == None:
            in_arr_typ = self.typemap[in_arr.name]

            shape_name = ir_utils.mk_unique_var("in_arr_shape")
            shape_var = ir.Var(scope, shape_name, loc)
            shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
            self.typemap[shape_name] = types.containers.UniTuple(types.intp,
                                                               in_arr_typ.ndim)
            init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])

            zero_name = ir_utils.mk_unique_var("zero_val")
            zero_var = ir.Var(scope, zero_name, loc)
            if "cval" in stencil_func.options:
                cval = stencil_func.options["cval"]
                # TODO: Loosen this restriction to adhere to casting rules.
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError("cval type does not match stencil return type.")

                temp2 = return_type.dtype(cval)
            else:
                temp2 = return_type.dtype(0)
            full_const = ir.Const(temp2, loc)
            self.typemap[zero_name] = return_type.dtype
            init_block.body.extend([ir.Assign(full_const, zero_var, loc)])

            so_name = ir_utils.mk_unique_var("stencil_output")
            out_arr = ir.Var(scope, so_name, loc)
            self.typemap[out_arr.name] = numba.types.npytypes.Array(
                                                           return_type.dtype,
                                                           in_arr_typ.ndim,
                                                           in_arr_typ.layout)
            dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
            self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
            dtype_g_np = ir.Global('np', np, loc)
            dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
            init_block.body.append(dtype_g_np_assign)

            dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc)
            dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
            self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype)
            dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc)
            init_block.body.append(dtype_attr_assign)

            stmts = ir_utils.gen_np_call("full",
                                       np.full,
                                       out_arr,
                                       [shape_var, zero_var, dtype_attr_var],
                                       self.typingctx,
                                       self.typemap,
                                       self.calltypes)
            equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
            init_block.body.extend(stmts)

        self.replace_return_with_setitem(stencil_blocks, exit_value_var,
                                         parfor_body_exit_label)

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after replacing return")
            ir_utils.dump_blocks(stencil_blocks)

        setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
        self.calltypes[setitem_call] = signature(
                                        types.none, self.typemap[out_arr.name],
                                        self.typemap[parfor_ind_var.name],
                                        self.typemap[out_arr.name].dtype
                                        )
        stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
        stencil_blocks[parfor_body_exit_label].body.append(setitem_call)

        # simplify CFG of parfor body (exit block could be simplified often)
        # add dummy return to enable CFG
        stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0,
                                            ir.Loc("stencilparfor_dummy", -1)))
        stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
        stencil_blocks[max(stencil_blocks.keys())].body.pop()

        if config.DEBUG_ARRAY_OPT == 1:
            print("stencil_blocks after adding SetItem")
            ir_utils.dump_blocks(stencil_blocks)

        pattern = ('stencil', [start_lengths, end_lengths])
        parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks,
                                     loc, parfor_ind_var, equiv_set, pattern, self.flags)
        gen_nodes.append(parfor)
        gen_nodes.append(ir.Assign(out_arr, target, loc))
        return gen_nodes