def test_agg_grouping_is_list_tuple(ts): df = tm.makeTimeDataFrame() grouped = df.groupby(lambda x: x.year) grouper = grouped.grouper.groupings[0].grouper grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) result = grouped.agg(np.mean) expected = grouped.mean() tm.assert_frame_equal(result, expected) grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) result = grouped.agg(np.mean) expected = grouped.mean() tm.assert_frame_equal(result, expected)
def _get_grouper(obj, key=None, axis=0, level=None, sort=True): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. This may be composed of multiple Grouping objects, indicating multiple groupers Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers Groupers enable local references to axis,level,sort, while the passed in axis, level, and sort are 'global'. This routine tries to figure of what the passing in references are and then creates a Grouping for each one, combined into a BaseGrouper. """ # The implementation is essentially the same as pandas.core.groupby group_axis = obj._get_axis(axis) # validate thatthe passed level is compatible with the passed # axis of the object if level is not None: if not isinstance(group_axis, MultiIndex): if isinstance(level, compat.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the ' 'index' % level) elif level > 0: raise ValueError('level > 0 only valid with MultiIndex') level = None key = group_axis # a passed in Grouper, directly convert if isinstance(key, Grouper): binner, grouper, obj = key._get_grouper(obj) if key.key is None: return grouper, [], obj else: return grouper, set([key.key]), obj # already have a BaseGrouper, just return it elif isinstance(key, BaseGrouper): return key, [], obj if not isinstance(key, (tuple, list)): keys = [key] else: keys = key # what are we after, exactly? match_axis_length = len(keys) == len(group_axis) any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_arraylike = any( isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) try: if isinstance(obj, DataFrame): all_in_columns = all(g in obj.columns for g in keys) else: all_in_columns = False except Exception: all_in_columns = False if (not any_callable and not all_in_columns and not any_arraylike and match_axis_length and level is None): keys = [com._asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) levels = level else: levels = [level] * len(keys) groupings = [] exclusions = [] # if the actual grouper should be obj[key] def is_in_axis(key): if not _is_label_like(key): try: obj._data.items.get_loc(key) except Exception: return False return True # if the the grouper is obj[name] def is_in_obj(gpr): try: return id(gpr) == id(obj[gpr.name]) except Exception: return False for i, (gpr, level) in enumerate(zip(keys, levels)): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) else: in_axis, name = False, None if com.is_categorical_dtype(gpr) and len(gpr) != len(obj): raise ValueError( "Categorical dtype grouper must have len(grouper) == len(data)" ) ping = Grouping(group_axis, gpr, obj=obj, name=name, level=level, sort=sort, in_axis=in_axis) groupings.append(ping) if len(groupings) == 0: raise ValueError('No group keys passed!') # create the internals grouper # Modified to insert CustomGrouper grouper = CustomGrouper(group_axis, groupings, sort=sort) return grouper, exclusions, obj