def _maybe_make_multi_index_columns( self, columns: Sequence[Hashable], col_names: Sequence[Hashable] | None = None, ) -> Sequence[Hashable] | MultiIndex: # possibly create a column mi here if _is_potential_multi_index(columns): list_columns = cast(List[Tuple], columns) return MultiIndex.from_tuples(list_columns, names=col_names) return columns
def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): ilabels = list(zip(*[index._get_level_values(i) for i in subset])) labels_to_i = _get_label_to_i_dict(ilabels, sort_labels=sort_labels) labels_to_i = Series(labels_to_i) if len(subset) > 1: labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) labels_to_i.index.names = [index.names[i] for i in subset] else: labels_to_i.index = Index(x[0] for x in labels_to_i.index) labels_to_i.index.name = index.names[subset[0]] labels_to_i.name = "value" return labels_to_i
def series_with_multilevel_index(): """ Fixture with a Series with a 2-level MultiIndex. """ arrays = [ ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) data = np.random.randn(8) ser = Series(data, index=index) ser[3] = np.NaN return ser
def index_with_missing(request): """ Fixture for indices with missing values """ if request.param in ["int", "uint", "range", "empty", "repeats"]: pytest.xfail("missing values not supported") # GH 35538. Use deep copy to avoid illusive bug on np-dev # Azure pipeline that writes into indices_dict despite copy ind = indices_dict[request.param].copy(deep=True) vals = ind.values if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: # For setting missing values in the top level of MultiIndex vals = ind.tolist() vals[0] = (None, ) + vals[0][1:] vals[-1] = (None, ) + vals[-1][1:] return MultiIndex.from_tuples(vals) else: vals[0] = None vals[-1] = None return type(ind)(vals)
def index_with_missing(request): """ Fixture for indices with missing values. Integer-dtype and empty cases are excluded because they cannot hold missing values. MultiIndex is excluded because isna() is not defined for MultiIndex. """ # GH 35538. Use deep copy to avoid illusive bug on np-dev # Azure pipeline that writes into indices_dict despite copy ind = indices_dict[request.param].copy(deep=True) vals = ind.values if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: # For setting missing values in the top level of MultiIndex vals = ind.tolist() vals[0] = (None, ) + vals[0][1:] vals[-1] = (None, ) + vals[-1][1:] return MultiIndex.from_tuples(vals) else: vals[0] = None vals[-1] = None return type(ind)(vals)
def _stack_multi_columns(frame, level_num=-1, dropna=True): def _convert_level_number(level_num, columns): """ Logic for converting the level number to something we can safely pass to swaplevel: We generally want to convert the level number into a level name, except when columns do not have names, in which case we must leave as a level number """ if level_num in columns.names: return columns.names[level_num] else: if columns.names[level_num] is None: return level_num else: return columns.names[level_num] this = frame.copy() # this makes life much simpler if level_num != frame.columns.nlevels - 1: # roll levels to put selected level at end roll_columns = this.columns for i in range(level_num, frame.columns.nlevels - 1): # Need to check if the ints conflict with level names lev1 = _convert_level_number(i, roll_columns) lev2 = _convert_level_number(i + 1, roll_columns) roll_columns = roll_columns.swaplevel(lev1, lev2) this.columns = roll_columns if not this.columns.is_lexsorted(): # Workaround the edge case where 0 is one of the column names, # which interferes with trying to sort based on the first # level level_to_sort = _convert_level_number(0, this.columns) this = this.sort_index(level=level_to_sort, axis=1) # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: tuples = list( zip(*[ lev.take(level_codes) for lev, level_codes in zip( this.columns.levels[:-1], this.columns.codes[:-1]) ])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) else: new_columns = this.columns.levels[0]._shallow_copy( name=this.columns.names[0]) unique_groups = new_columns # time to ravel the values new_data = {} level_vals = this.columns.levels[-1] level_codes = sorted(set(this.columns.codes[-1])) level_vals_used = level_vals[level_codes] levsize = len(level_codes) drop_cols = [] for key in unique_groups: try: loc = this.columns.get_loc(key) except KeyError: drop_cols.append(key) continue # can make more efficient? # we almost always return a slice # but if unsorted can get a boolean # indexer if not isinstance(loc, slice): slice_len = len(loc) else: slice_len = loc.stop - loc.start if slice_len != levsize: chunk = this.loc[:, this.columns[loc]] chunk.columns = level_vals.take(chunk.columns.codes[-1]) value_slice = chunk.reindex(columns=level_vals_used).values else: if frame._is_homogeneous_type and is_extension_array_dtype( frame.dtypes.iloc[0]): dtype = this[this.columns[loc]].dtypes.iloc[0] subset = this[this.columns[loc]] value_slice = dtype.construct_array_type()._concat_same_type( [x._values for _, x in subset.items()]) N, K = this.shape idx = np.arange(N * K).reshape(K, N).T.ravel() value_slice = value_slice.take(idx) elif frame._is_mixed_type: value_slice = this[this.columns[loc]].values else: value_slice = this.values[:, loc] if value_slice.ndim > 1: # i.e. not extension value_slice = value_slice.ravel() new_data[key] = value_slice if len(drop_cols) > 0: new_columns = new_columns.difference(drop_cols) N = len(this) if isinstance(this.index, MultiIndex): new_levels = list(this.index.levels) new_names = list(this.index.names) new_codes = [lab.repeat(levsize) for lab in this.index.codes] else: old_codes, old_levels = factorize_from_iterable(this.index) new_levels = [old_levels] new_codes = [old_codes.repeat(levsize)] new_names = [this.index.name] # something better? new_levels.append(level_vals) new_codes.append(np.tile(level_codes, N)) new_names.append(frame.columns.names[level_num]) new_index = MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) result = frame._constructor(new_data, index=new_index, columns=new_columns) # more efficient way to go about this? can do the whole masking biz but # will only save a small amount of time... if dropna: result = result.dropna(axis=0, how="all") return result
indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), "datetime": tm.makeDateIndex(100), "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), "int": tm.makeIntIndex(100), "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), "multi": _create_multiindex(), "repeats": Index([0, 0, 1, 1, 2, 2]), } @pytest.fixture(params=indices_dict.keys()) def index(request): """ Fixture for many "simple" kinds of indices. These indices are unlikely to cover corner cases, e.g. - no names - no NaTs/NaNs - no values near implementation bounds
import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer return request.param zeros = [box([0] * 5, dtype=dtype) for box in [pd.Index, np.array] for dtype in [np.int64, np.uint64, np.float64]]
def _maybe_make_multi_index_columns(self, columns, col_names=None): # possibly create a column mi here if _is_potential_multi_index(columns): columns = MultiIndex.from_tuples(columns, names=col_names) return columns
import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(zip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer return request.param zeros = [box([0] * 5, dtype=dtype) for box in [pd.Index, np.array] for dtype in [np.int64, np.uint64, np.float64]]