Пример #1
0
def arraylen(context, builder, tpe, val, totpe=None):
    if isinstance(tpe, numba.types.Array):
        out = numba.targets.arrayobj.array_len(context, builder,
                                               numba.intp(tpe), (val, ))
    else:
        out = tpe.lower_len(context, builder, numba.intp(tpe), (val, ))
    if totpe is None:
        return out
    else:
        return cast(context, builder, numba.intp, totpe, out)
Пример #2
0
 def generic(self, args, kwargs):
     if (
         len(args) == 1
         and len(kwargs) == 0
         and isinstance(args[0], ArrayBuilderType)
     ):
         return numba.intp(args[0])
Пример #3
0
        def four_way_scan(data, sm_masks, sm_blocksum, blksz, valid):
            sm_chunkoffset = hsa.shared.array(4, dtype=int32)

            tid = hsa.get_local_id(0)

            laneid = tid & (_WARPSIZE - 1)
            warpid = tid >> 6

            my_digit = -1

            for digit in range(RADIX):
                sm_masks[digit, tid] = 0
                if valid and data == digit:
                    sm_masks[digit, tid] = 1
                    my_digit = digit

            hsa.barrier()

            offset = 0
            base = 0
            while offset < blksz:
                # Exclusive scan
                if warpid < RADIX:
                    val = intp(sm_masks[warpid, offset + laneid])
                    cur, psum = shuf_wave_exclusive_scan(val)
                    sm_masks[warpid, offset + laneid] = cur + base
                    base += psum

                hsa.barrier()
                offset += _WARPSIZE

            hsa.barrier()

            # Store blocksum from the exclusive scan
            if warpid < RADIX and laneid == 0:
                sm_blocksum[warpid] = base

            hsa.barrier()
            # Calc chunk offset (a short exclusive scan)
            if tid == 0:
                sm_chunkoffset[0] = 0
                sm_chunkoffset[1] = sm_blocksum[0]
                sm_chunkoffset[2] = sm_chunkoffset[1] + sm_blocksum[1]
                sm_chunkoffset[3] = sm_chunkoffset[2] + sm_blocksum[2]

            hsa.barrier()
            # Prepare output
            chunk_offset = -1
            scanval = -1

            if my_digit != -1:
                chunk_offset = sm_chunkoffset[my_digit]
                scanval = sm_masks[my_digit, tid]

            hsa.wavebarrier()
            hsa.barrier()

            return chunk_offset, scanval
Пример #4
0
def lower_len(context, builder, sig, args):
    tpe, = sig.args
    val, = args
    proxyin = numba.cgutils.create_struct_proxy(tpe)(context,
                                                     builder,
                                                     value=val)
    return numba.targets.arrayobj.array_len(context, builder,
                                            numba.intp(tpe.arraytpe),
                                            (proxyin.array, ))
Пример #5
0
def lower_len(context, builder, sig, args):
    rettpe, (tpe, ) = sig.return_type, sig.args
    val, = args
    proxyin = numba.cgutils.create_struct_proxy(tpe)(context,
                                                     builder,
                                                     value=val)
    indexlen = numba.targets.arrayobj.array_len(context, builder,
                                                numba.intp(tpe.indextpe),
                                                (proxyin.index, ))
    return indexlen
Пример #6
0
def lower_len(context, builder, sig, args):
    rettpe, (tpe, ) = sig.return_type, sig.args
    val, = args
    proxyin = numba.cgutils.create_struct_proxy(tpe)(context,
                                                     builder,
                                                     value=val)
    offsetlen = numba.targets.arrayobj.array_len(context, builder,
                                                 numba.intp(tpe.offsetstpe),
                                                 (proxyin.offsets, ))
    return builder.sub(offsetlen, context.get_constant(rettpe, 1))
Пример #7
0
def tri_root(t):
    """tri_root(t)

	Numpy ufunc. Get n such that t is the nth triangular number.

	This is the fastest version of this function. Behavior is undefined when
	t is not a triangular number.

	:param int t: Triangular number.
	:rtype: int
	"""
    s = 8 * t + 1
    rs = nb.intp(np.sqrt(s) + .5)
    return (rs - 1) // 2
Пример #8
0
def compute_tree_weights(nodes, node_count, step):
    """Compute tree weights required to apply aggregation with exponential weights
    over all subtrees for the predictions

    Parameters
    ----------
    nodes : ndarray
        A numpy array containing the nodes data

    node_count : int
        Number of nodes in the tree

    step : float
        Step-size used for the computation of the aggregation weights

    References
    ----------
    This corresponds to Algorithm 1 in WildWood's paper
    TODO: Insert reference here
    """
    for node_idx in range(node_count - 1, -1, -1):
        node = nodes[node_idx]
        if node["is_leaf"]:
            # If the node is a leaf, the logarithm of its tree weight is simply
            #   step * loss
            node["log_weight_tree"] = -step * node["loss_valid"]
        else:
            # If the node is not a leaf, then we apply context tree weighting
            loss = -step * node["loss_valid"]
            # TODO: pourquoi on cast ici ?
            left_child = intp(node["left_child"])
            right_child = intp(node["right_child"])
            log_weight_tree_left = nodes[left_child]["log_weight_tree"]
            log_weight_tree_right = nodes[right_child]["log_weight_tree"]
            node["log_weight_tree"] = log_sum_2_exp(
                loss, log_weight_tree_left + log_weight_tree_right
            )
Пример #9
0
def tri_root_trunc(t):
    """tri_root_trunc(t)

	Numpy ufunc. Get n such that t is >= the nth triangular number and < the
	(n+1)th triangular number.

	:param int t: Triangular number.
	:rtype: int
	:raises ValueError: If t is not a triangular number.
	"""
    s = 8 * t + 1
    rs = nb.intp(np.sqrt(s) + .5)
    if rs**2 > s:
        rs -= 1
    return (rs - 1) // 2
Пример #10
0
def tri_root_strict(t):
    """tri_root_stric(t)

	Numpy ufunc. Get n such that t is the nth triangular number, or raise an
	exception if t is not triangular.

	:param int t: Triangular number.
	:rtype: int
	:raises ValueError: If t is not a triangular number.
	"""
    s = 8 * t + 1
    rs = nb.intp(np.sqrt(s) + .5)
    if rs**2 != s:
        raise ValueError('Not a triangular number')
    return (rs - 1) // 2
Пример #11
0
def lower_getitem_range(context, builder, sig, args):
    rettpe, (tpe, wheretpe) = sig.return_type, sig.args
    val, whereval = args

    proxyin = numba.cgutils.create_struct_proxy(tpe)(context,
                                                     builder,
                                                     value=val)

    proxyslicein = numba.cgutils.create_struct_proxy(wheretpe)(context,
                                                               builder,
                                                               value=whereval)
    numba.targets.slicing.fix_slice(
        builder, proxyslicein,
        tpe.lower_len(context, builder, numba.intp(tpe), (val, )))

    proxysliceout = numba.cgutils.create_struct_proxy(numba.types.slice2_type)(
        context, builder)
    proxysliceout.start = proxyslicein.start
    proxysliceout.stop = builder.add(proxyslicein.stop,
                                     context.get_constant(numba.intp, 1))
    proxysliceout.step = context.get_constant(numba.intp, 1)

    proxyout = numba.cgutils.create_struct_proxy(tpe)(context, builder)
    proxyout.offsets = numba.targets.arrayobj.getitem_arraynd_intp(
        context, builder,
        tpe.offsetstpe(tpe.offsetstpe, numba.types.slice2_type),
        (proxyin.offsets, proxysliceout._getvalue()))
    proxyout.content = proxyin.content
    if tpe.identitiestpe != numba.none:
        proxyout.identities = awkward1._numba.identities.lower_getitem_any(
            context, builder, tpe.identitiestpe, wheretpe, proxyin.identities,
            whereval)

    out = proxyout._getvalue()
    if context.enable_nrt:
        context.nrt.incref(builder, rettpe, out)
    return out
Пример #12
0
    intp,
    uintp,
    float32,
    void,
    optional,
)
from numba.experimental import jitclass

from ._utils import get_type, resize, resize2d
from ._node import node_type, node_dtype

IS_FIRST = 1
IS_NOT_FIRST = 0
IS_LEFT = 1
IS_NOT_LEFT = 0
TREE_LEAF = intp(-1)
TREE_UNDEFINED = intp(-2)

tree_type = [
    # Number of features
    ("n_features", uintp),
    # Number of classes
    ("n_classes", uintp),
    # Maximum depth allowed in the tree
    ("max_depth", uintp),
    # Number of nodes in the tree
    ("node_count", uintp),
    # Maximum number of nodes storable in the tree
    ("capacity", uintp),
    # A numpy array containing the nodes data
    ("nodes", node_type[::1]),
Пример #13
0
"""Math stuff."""

import numpy as np
import numba as nb


@nb.vectorize([nb.intp(nb.intp)], nopython=True)
def tri_n(n):
    """tri_n(n)

	Numpy ufunc. Get the nth triangular number.

	:param int n: Nonnegative integer.
	:rtype: int
	"""
    return n * (n + 1) // 2


@nb.vectorize([nb.intp(nb.intp)], nopython=True)
def tri_root(t):
    """tri_root(t)

	Numpy ufunc. Get n such that t is the nth triangular number.

	This is the fastest version of this function. Behavior is undefined when
	t is not a triangular number.

	:param int t: Triangular number.
	:rtype: int
	"""
    s = 8 * t + 1
Пример #14
0
 def generic(self, args, kwargs):
     if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], ArrayViewType):
         return numba.intp(args[0])
Пример #15
0
def grow(tree, tree_context, node_context):
    # Initialize the tree capacity
    init_capacity = 2047
    resize_tree(tree, init_capacity)
    # Create the stack of node records
    records = Records(INITIAL_STACK_SIZE)

    # Let us first define all the attributes of root
    parent = TREE_UNDEFINED
    depth = 0
    is_left = False
    impurity = np.inf
    start_train = 0
    end_train = tree_context.n_samples_train
    start_valid = 0
    end_valid = tree_context.n_samples_valid

    push_record(
        records,
        parent,
        depth,
        is_left,
        impurity,
        start_train,
        end_train,
        start_valid,
        end_valid,
    )

    # TODO: this option will come for the forest later
    min_samples_split = 2

    while not has_records(records):
        # Get information about the current node
        (
            parent,
            depth,
            is_left,
            impurity,
            start_train,
            end_train,
            start_valid,
            end_valid,
        ) = pop_node_record(records)

        # Initialize the node context, this computes the node statistics
        compute_node_context(tree_context, node_context, start_train,
                             end_train, start_valid, end_valid)

        # TODO: add the max_depth option using something like
        # is_leaf = is_leaf or (depth >= max_depth)
        # This node is a terminal leaf, we won't try to split it

        # We don't split a node if it contains less than min_samples_split training
        # or validation samples
        is_leaf = (node_context.n_samples_train < min_samples_split) or (
            node_context.n_samples_valid < min_samples_split)

        # We don't split a node if it's pure: whenever it's impurity computed on
        # training samples is less than min_impurity split
        min_impurity_split = 0.0
        is_leaf = is_leaf or (impurity <= min_impurity_split)

        # TODO: put back the min_impurity_split option

        if is_leaf:
            split = None
            bin = 0
            feature = 0
            found_split = False
            # TODO: pourquoi on mettrai impurity = infini ici ?
        else:
            split = find_node_split(tree_context, node_context)
            bin = split.bin_threshold
            feature = split.feature
            found_split = split.found_split

        # If we did not find a split then the node is a leaf, since we can't split it
        is_leaf = is_leaf or not found_split
        # TODO: correct this when actually using the threshold instead of
        #  bin_threshold
        threshold = 0.42
        weighted_n_samples_valid = 42.0

        node_id = add_node_tree(
            # The tree
            tree,
            # Index of the parent node
            parent,
            # Depth of the node
            depth,
            # Is the node a left child ?
            is_left,
            # Is the node a leaf ?
            is_leaf,
            # The feature used for splitting
            feature,
            # NOT USED FOR NOW
            threshold,
            # The bin threshold used for splitting
            bin,
            # Impurity of the node
            impurity,
            # Number of training samples
            node_context.n_samples_train,
            # Number of validation samples
            node_context.n_samples_valid,
            # Weighted number of training samples
            node_context.w_samples_train,
            # NOT USED FOR NOW
            weighted_n_samples_valid,
            # Index of the first training sample in the node
            start_train,
            # End-index of the slice containing the node's training samples
            end_train,
            # Index of the first validation (out-of-the-bag) sample in the node.
            start_valid,
            # End-index of the slice containing the node's validation samples indexes
            end_valid,
            # Validation loss of the node, computed on validation samples
            node_context.loss_valid,
        )

        # Save in the tree the predictions of the node
        tree.y_pred[node_id, :] = node_context.y_pred

        if not is_leaf:
            # If the node is not a leaf, we update partition_train and
            # partition_valid so that they contain training and validation indices of
            # nodes in a contiguous way.
            pos_train, pos_valid = split_indices(tree_context, split,
                                                 start_train, end_train,
                                                 start_valid, end_valid)

            # If the node is not a leaf, we add both childs in the node records,
            # so that they can be added in the tree and eventually be split as well.

            # This adds the left child
            push_record(
                # The stack containing the node records
                records,
                # The parent is the previous node_id
                node_id,
                # depth is increased by one
                depth + 1,
                # This is a left child (is_left=True)
                True,
                # Impurities of the childs are kept in the split information
                split.impurity_left,
                # start_train of the left child is the same as the parent's
                start_train,
                # end_train of the left child is at the split's position
                pos_train,
                # start_valid of the left child is the same as the parent's
                start_valid,
                # end_valid of the left child is as the split's position
                pos_valid,
            )

            # This adds the right child
            push_record(
                # The stack containing the node records
                records,
                # The parent is the previous node_id
                node_id,
                # depth is increased by one
                depth + 1,
                # This is a right child (is_left=False)
                False,
                # Impurities of the childs are kept in the split information
                split.impurity_right,
                # start_train of the right child is at the split's position
                pos_train,
                # end_train of the right child is the same as the parent's
                end_train,
                # start_valid of the right child is at the split's position
                pos_valid,
                # end_valid of the right child is the same as the parent's
                end_valid,
            )

    # We finished to grow the tree. Now, we can compute the tree's aggregation weights.
    aggregation = tree_context.aggregation
    step = tree_context.step

    # Since the tree is grown in a depth-first fashion, we know that if we iterate
    # through the nodes in reverse order, we'll always iterate over childs before
    # iteration over parents.
    node_count = tree.node_count

    # TODO: mettre ca dans une fonction a part...
    if aggregation:
        for node_idx in range(node_count - 1, -1, -1):
            node = tree.nodes[node_idx]
            if node["is_leaf"]:
                # If the node is a leaf, the logarithm of its tree weight is simply
                #   step * loss
                node["log_weight_tree"] = step * node["loss_valid"]
            else:
                # If the node is not a leaf, then we apply context tree weighting
                weight = step * node["loss_valid"]
                left_child = intp(node["left_child"])
                right_child = intp(node["right_child"])
                # print("left_child: ", left_child, ", right_child: ", right_child)
                log_weight_tree_left = tree.nodes[left_child][
                    "log_weight_tree"]
                log_weight_tree_right = tree.nodes[right_child][
                    "log_weight_tree"]
                node["log_weight_tree"] = log_sum_2_exp(
                    weight, log_weight_tree_left + log_weight_tree_right)

            node_idx -= 1
Пример #16
0
def lower_getiter(context, builder, sig, args):
    rettpe, (tpe,) = sig.return_type, sig.args
    val, = args
    proxyout = context.make_helper(builder, rettpe)
    proxyout.array = val
    proxyout.length = util.cast(context, builder, numba.intp, numba.int64, tpe.lower_len(context, builder, numba.intp(tpe), (val,)))
    proxyout.at = numba.cgutils.alloca_once_value(builder, context.get_constant(numba.int64, 0))
    if context.enable_nrt:
        context.nrt.incref(builder, tpe, val)
    return numba.targets.imputils.impl_ret_new_ref(context, builder, rettpe, proxyout._getvalue())