Exemplo n.º 1
0
 def test_is_sorted(self):
     assert array.is_sorted(np.array([0, 1, 2, 3, 4]))
     assert array.is_sorted(np.array([0, 1]))
     assert array.is_sorted(np.array([0]))
     assert not array.is_sorted(np.array([1, 0]))
     assert not array.is_sorted(np.array([0, 1, 2, 4, 3]))
     # nb
     assert array.is_sorted_nb(np.array([0, 1, 2, 3, 4]))
     assert array.is_sorted_nb(np.array([0, 1]))
     assert array.is_sorted_nb(np.array([0]))
     assert not array.is_sorted_nb(np.array([1, 0]))
     assert not array.is_sorted_nb(np.array([0, 1, 2, 4, 3]))
Exemplo n.º 2
0
def group_index(index, group_by, return_dict=False, nb_compatible=False, assert_sorted=False):
    """Group index by some mapper.

    By default, returns an array of group indices pointing to the original index, and the new index.
    Set `return_dict` to `True` to return a dict instead of array.
    Set `nb_compatible` to `True` to make the dict Numba-compatible (Dict out of arrays).
    Set `assert_sorted` to `True` to verify that group indices are increasing.
    """
    group_by = group_by_to_index(index, group_by)
    group_arr, new_index = pd.factorize(group_by)
    if not isinstance(new_index, pd.Index):
        new_index = pd.Index(new_index)
    if isinstance(group_by, pd.MultiIndex):
        new_index.names = group_by.names
    elif isinstance(group_by, (pd.Index, pd.Series)):
        new_index.name = group_by.name
    if assert_sorted:
        if not is_sorted(group_arr):
            raise ValueError("Group indices are not increasing. Use .sort_values() on the index.")
    if return_dict:
        groups = dict()
        for i, idx in enumerate(group_arr):
            if idx not in groups:
                groups[idx] = []
            groups[idx].append(i)
        if nb_compatible:
            numba_groups = Dict()
            for k, v in groups.items():
                numba_groups[k] = np.array(v)
            return numba_groups, new_index
        return groups, new_index
    return group_arr, new_index
Exemplo n.º 3
0
def get_groups_and_index(index, group_by):
    """Return array of group indices pointing to the original index, and grouped index.
    """
    if group_by is None or group_by is False:
        return np.arange(len(index)), index

    group_by = group_by_to_index(index, group_by)
    groups, index = pd.factorize(group_by)
    if not isinstance(index, pd.Index):
        index = pd.Index(index)
    if isinstance(group_by, pd.MultiIndex):
        index.names = group_by.names
    elif isinstance(group_by, (pd.Index, pd.Series)):
        index.name = group_by.name
    if not is_sorted(groups):
        raise ValueError("Groups must be coherent and sorted")
    return groups, index
Exemplo n.º 4
0
 def is_sorted(self, group_by: tp.GroupByLike = None, **kwargs) -> bool:
     """Return whether groups are coherent and sorted."""
     group_by = self.resolve_group_by(group_by=group_by, **kwargs)
     groups = self.get_groups(group_by=group_by)
     return is_sorted(groups)