def __init__( self, index: Index, grouper=None, obj: FrameOrSeries | None = None, name: Hashable = None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, dropna: bool = True, ): self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis self.dropna = dropna # right place for this? if isinstance(grouper, (Series, Index)) and name is None: self.name = grouper.name if isinstance(grouper, MultiIndex): self.grouper = grouper._values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level if level is not None: if not isinstance(level, int): if level not in index.names: raise AssertionError(f"Level {level} not in index") level = index.names.index(level) if self.name is None: self.name = index.names[level] ( self.grouper, self._codes, self._group_index, ) = index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, grouper, _ = self.grouper._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] validate=False, ) if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None and self.obj is not None: self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): self.grouper = com.asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed) categories = self.grouper.categories # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes self._codes = self.grouper.codes if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] if sort or self.grouper.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) self._group_index = CategoricalIndex( Categorical.from_codes(codes=codes, categories=categories, ordered=self.grouper.ordered), name=self.name, ) # we are done if isinstance(self.grouper, Grouping): self.grouper = self.grouper.grouper # no level passed elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, "ndim", 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): grper = pprint_thing(self.grouper) errmsg = ("Grouper result violates len(labels) == " f"len(data)\nresult: {grper}") self.grouper = None # Try for sanity raise AssertionError(errmsg) # if we have a date/time-like grouper, make sure that we have # Timestamps like if getattr(self.grouper, "dtype", None) is not None: if is_datetime64_dtype(self.grouper): self.grouper = self.grouper.astype("datetime64[ns]") elif is_timedelta64_dtype(self.grouper): self.grouper = self.grouper.astype("timedelta64[ns]")
def __init__(self, index, grouper=None, obj=None, name=None, level=None, sort=True, observed=False, in_axis=False): self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis # right place for this? if isinstance(grouper, (Series, Index)) and name is None: self.name = grouper.name if isinstance(grouper, MultiIndex): self.grouper = grouper.values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level if level is not None: if not isinstance(level, int): if level not in index.names: raise AssertionError('Level {} not in index'.format(level)) level = index.names.index(level) if self.name is None: self.name = index.names[level] self.grouper, self._labels, self._group_index = \ index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get labels elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False) if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None: self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): self.grouper = com.asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): from pandas.core.groupby.categorical import recode_for_groupby self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed) categories = self.grouper.categories # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes self._labels = self.grouper.codes if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] if sort or self.grouper.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) self._group_index = CategoricalIndex( Categorical.from_codes( codes=codes, categories=categories, ordered=self.grouper.ordered)) # we are done if isinstance(self.grouper, Grouping): self.grouper = self.grouper.grouper # no level passed elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, 'ndim', 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError( "Grouper for '{}' not 1-dimensional".format(t)) self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): errmsg = ('Grouper result violates len(labels) == ' 'len(data)\nresult: %s' % pprint_thing(self.grouper)) self.grouper = None # Try for sanity raise AssertionError(errmsg) # if we have a date/time-like grouper, make sure that we have # Timestamps like if getattr(self.grouper, 'dtype', None) is not None: if is_datetime64_dtype(self.grouper): from pandas import to_datetime self.grouper = to_datetime(self.grouper) elif is_timedelta64_dtype(self.grouper): from pandas import to_timedelta self.grouper = to_timedelta(self.grouper)
def __init__( self, index: Index, grouper=None, obj: FrameOrSeries | None = None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, dropna: bool = True, ): self.level = level self._orig_grouper = grouper self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis self.dropna = dropna self._passed_categorical = False # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level ilevel = self._ilevel if ilevel is not None: ( self.grouper, # Index self._codes, self._group_index, ) = index._get_grouper_for_level(self.grouper, ilevel) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, newgrouper, newobj = self.grouper._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] validate=False, ) self.obj = newobj ng = newgrouper._get_grouper() if isinstance(newgrouper, ops.BinGrouper): # in this case we have `ng is newgrouper` self.grouper = ng else: # ops.BaseGrouper # use Index instead of ndarray so we can recover the name self.grouper = Index(ng, name=newgrouper.result_index.name) elif is_categorical_dtype(self.grouper): # a passed Categorical self._passed_categorical = True self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed ) elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): # no level passed if getattr(self.grouper, "ndim", 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") self.grouper = self.index.map(self.grouper) if not ( hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index) ): grper = pprint_thing(self.grouper) errmsg = ( "Grouper result violates len(labels) == " f"len(data)\nresult: {grper}" ) self.grouper = None # Try for sanity raise AssertionError(errmsg) if isinstance(self.grouper, np.ndarray): # if we have a date/time-like grouper, make sure that we have # Timestamps like self.grouper = sanitize_to_nanoseconds(self.grouper)