def test_is_sorted(self): assert array.is_sorted(np.array([0, 1, 2, 3, 4])) assert array.is_sorted(np.array([0, 1])) assert array.is_sorted(np.array([0])) assert not array.is_sorted(np.array([1, 0])) assert not array.is_sorted(np.array([0, 1, 2, 4, 3])) # nb assert array.is_sorted_nb(np.array([0, 1, 2, 3, 4])) assert array.is_sorted_nb(np.array([0, 1])) assert array.is_sorted_nb(np.array([0])) assert not array.is_sorted_nb(np.array([1, 0])) assert not array.is_sorted_nb(np.array([0, 1, 2, 4, 3]))
def group_index(index, group_by, return_dict=False, nb_compatible=False, assert_sorted=False): """Group index by some mapper. By default, returns an array of group indices pointing to the original index, and the new index. Set `return_dict` to `True` to return a dict instead of array. Set `nb_compatible` to `True` to make the dict Numba-compatible (Dict out of arrays). Set `assert_sorted` to `True` to verify that group indices are increasing. """ group_by = group_by_to_index(index, group_by) group_arr, new_index = pd.factorize(group_by) if not isinstance(new_index, pd.Index): new_index = pd.Index(new_index) if isinstance(group_by, pd.MultiIndex): new_index.names = group_by.names elif isinstance(group_by, (pd.Index, pd.Series)): new_index.name = group_by.name if assert_sorted: if not is_sorted(group_arr): raise ValueError("Group indices are not increasing. Use .sort_values() on the index.") if return_dict: groups = dict() for i, idx in enumerate(group_arr): if idx not in groups: groups[idx] = [] groups[idx].append(i) if nb_compatible: numba_groups = Dict() for k, v in groups.items(): numba_groups[k] = np.array(v) return numba_groups, new_index return groups, new_index return group_arr, new_index
def get_groups_and_index(index, group_by): """Return array of group indices pointing to the original index, and grouped index. """ if group_by is None or group_by is False: return np.arange(len(index)), index group_by = group_by_to_index(index, group_by) groups, index = pd.factorize(group_by) if not isinstance(index, pd.Index): index = pd.Index(index) if isinstance(group_by, pd.MultiIndex): index.names = group_by.names elif isinstance(group_by, (pd.Index, pd.Series)): index.name = group_by.name if not is_sorted(groups): raise ValueError("Groups must be coherent and sorted") return groups, index
def is_sorted(self, group_by: tp.GroupByLike = None, **kwargs) -> bool: """Return whether groups are coherent and sorted.""" group_by = self.resolve_group_by(group_by=group_by, **kwargs) groups = self.get_groups(group_by=group_by) return is_sorted(groups)