Exemplo n.º 1
0
def mode(keys,
         axis=semantics.axis_default,
         weights=None,
         return_indices=False):
    """compute the mode, or most frequent occuring key in a set

    Parameters
    ----------
    keys : ndarray, [n_keys, ...]
        input array. elements of 'keys' can have arbitrary shape or dtype
    weights : ndarray, [n_keys], optional
        if given, the contribution of each key to the mode is weighted by the given weights
    return_indices : bool
        if True, return all indices such that keys[indices]==mode holds

    Returns
    -------
    mode : ndarray, [...]
        the most frequently occuring key in the key sequence
    indices : ndarray, [mode_multiplicity], int, optional
        if return_indices is True, all indices such that points[indices]==mode holds
    """
    index = as_index(keys, axis)
    if weights is None:
        unique, weights = count(index)
    else:
        unique, weights = group_by(index).sum(weights)
    bin = np.argmax(weights)
    _mode = unique[
        bin]  # FIXME: replace with index.take for lexindex compatibility?
    if return_indices:
        indices = index.sorter[index.start[bin]:index.stop[bin]]
        return _mode, indices
    else:
        return _mode
Exemplo n.º 2
0
def count_table(*keys):
    """count the number of times each key occurs in the input set

    Arguments
    ---------
    keys : tuple of indexable objects, each having the same number of items

    Returns
    -------
    unique : tuple of ndarray, [groups, ...]
        unique keys for each input item
        they form the axes labels of the table
    table : ndarray, [keys[0].groups, ... keys[n].groups], int
        the number of times each key-combination occurs in the input set

    Notes
    -----
    Equivalent to R's pivot table or pandas 'crosstab'
    Alternatively, dense equivalent of the count function
    Should we add weights option?
    Or better yet; what about general reductions over key-grids?
    """
    indices = [as_index(k, axis=0) for k in keys]
    uniques = [i.unique for i in indices]
    inverses = [i.inverse for i in indices]
    shape = [i.groups for i in indices]
    table = np.zeros(shape, np.int)
    np.add.at(table, inverses, 1)
    return tuple(uniques), table
Exemplo n.º 3
0
 def get_inverses(self, keys):
     """
     Returns
     -------
     Tuple of inverse indices
     """
     return tuple([as_index(k, axis=0).inverse for k in keys])
Exemplo n.º 4
0
    def __init__(self, keys, axis=0):
        """
        Parameters
        ----------
        keys : indexable object
            sequence of keys to group by
        axis : int, optional
            axis to regard as the key-sequence, in case keys is multi-dimensional

        See Also
        --------
        numpy_indexed.as_index : for information regarding the casting rules to a valid Index object
        """
        self.index = as_index(keys, axis)
Exemplo n.º 5
0
def multiplicity(keys, axis=semantics.axis_default):
    """return the multiplicity of each key, or how often it occurs in the set

    Parameters
    ----------
    keys : indexable object

    Returns
    -------
    ndarray, [keys.size], int
        the number of times each input item occurs in the set
    """
    index = as_index(keys, axis)
    return index.count[index.inverse]
Exemplo n.º 6
0
def rank(keys, axis=semantics.axis_default):
    """where each item is in the pecking order.

    Parameters
    ----------
    keys : indexable object

    Returns
    -------
    ndarray, [keys.size], int
        unique integers, ranking the sorting order

    Notes
    -----
    we should have that index.sorted[index.rank] == keys
    """
    index = as_index(keys, axis)
    return index.rank
Exemplo n.º 7
0
    def argmax(self, values):
        """return the index into values corresponding to the maximum value of the group

        Parameters
        ----------
        values : array_like, [keys]
            values to pick the argmax of per group

        Returns
        -------
        unique: ndarray, [groups]
            unique keys
        argmax : ndarray, [groups]
            index into value array, representing the argmax per group
        """
        keys, maxima = self.max(values)
        maxima = maxima[self.inverse]
        # select the first occurence of the maximum in each group
        index = as_index((self.inverse, values == maxima))
        return keys, index.sorter[index.start[-self.groups:]]
Exemplo n.º 8
0
def count(keys, axis=semantics.axis_default):
    """count the number of times each key occurs in the input set

    Arguments
    ---------
    keys : indexable object

    Returns
    -------
    unique : ndarray, [groups, ...]
        unique keys
    count : ndarray, [groups], int
        the number of times each key occurs in the input set

    Notes
    -----
    Can be seen as numpy work-alike of collections.Counter
    Alternatively, as sparse equivalent of count_table
    """
    index = as_index(keys, axis, base=True)
    return index.unique, index.count
Exemplo n.º 9
0
 def __init__(self, *keys):
     self.keys = tuple(keys)
     self.indices = [as_index(k, axis=0) for k in keys]
     self.uniques = [i.unique for i in self.indices]
     self.shape = [i.groups for i in self.indices]
Exemplo n.º 10
0
def is_uniform(keys, axis=semantics.axis_default):
    """returns true if all keys have equal multiplicity"""
    index = as_index(keys, axis)
    return index.uniform
Exemplo n.º 11
0
def all_equal(keys, axis=semantics.axis_default):
    """returns true of all keys are equal"""
    index = as_index(keys, axis)
    return index.groups == 1
Exemplo n.º 12
0
def any_unique(keys, axis=semantics.axis_default):
    """returns true if any of the keys is unique"""
    index = as_index(keys, axis)
    return np.any(index.count == 1)
Exemplo n.º 13
0
def all_unique(keys, axis=semantics.axis_default):
    """Returns true if all keys are unique"""
    index = as_index(keys, axis)
    return index.groups == index.size
Exemplo n.º 14
0
def argsort(keys, axis=semantics.axis_default):
    """return the indices that will place the keys in sorted order"""
    return as_index(keys, axis).sorter
Exemplo n.º 15
0
def sort(keys, axis=semantics.axis_default):
    """sort an indexable object and return the sorted keys"""
    return as_index(keys, axis).sorted_keys
Exemplo n.º 16
0
 def get_inverses(self, keys):
     return tuple([as_index(k, axis=0).inverse for k in keys])