def test_set_index_names(self): df = pd.util.testing.makeDataFrame() df.index.name = 'name' assert df.set_index(df.index).index.names == ['name'] mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, names=['A', 'B', 'C', 'D']) df = df.set_index(['A', 'B']) assert df.set_index(df.index).index.names == ['A', 'B'] # Check that set_index isn't converting a MultiIndex into an Index assert isinstance(df.set_index(df.index).index, MultiIndex) # Check actual equality tm.assert_index_equal(df.set_index(df.index).index, mi) idx2 = df.index.rename(['C', 'D']) # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather # than a pair of tuples assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex) # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
def test_set_index_names(self): df = pd.util.testing.makeDataFrame() df.index.name = 'name' self.assertEqual(df.set_index(df.index).index.names, ['name']) mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, names=['A', 'B', 'A', 'B']) df = df.set_index(['A', 'B']) self.assertEqual(df.set_index(df.index).index.names, ['A', 'B']) # Check that set_index isn't converting a MultiIndex into an Index self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex)) # Check actual equality tm.assert_index_equal(df.set_index(df.index).index, mi) # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather # than a pair of tuples self.assertTrue(isinstance(df.set_index( [df.index, df.index]).index, MultiIndex)) # Check equality tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2)
def test_append_mixed_dtypes(): # GH 13660 dti = date_range('2011-01-01', freq='M', periods=3, ) dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') pi = period_range('2011-01', freq='M', periods=3) mi = MultiIndex.from_arrays([[1, 2, 3], [1.1, np.nan, 3.3], ['a', 'b', 'c'], dti, dti_tz, pi]) assert mi.nlevels == 6 res = mi.append(mi) exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], ['a', 'b', 'c', 'a', 'b', 'c'], dti.append(dti), dti_tz.append(dti_tz), pi.append(pi)]) tm.assert_index_equal(res, exp) other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], ['x', 'y', 'z'], ['x', 'y', 'z'], ['x', 'y', 'z'], ['x', 'y', 'z']]) res = mi.append(other) exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], [1.1, np.nan, 3.3, 'x', 'y', 'z'], ['a', 'b', 'c', 'x', 'y', 'z'], dti.append(pd.Index(['x', 'y', 'z'])), dti_tz.append(pd.Index(['x', 'y', 'z'])), pi.append(pd.Index(['x', 'y', 'z']))]) tm.assert_index_equal(res, exp)
def test_dropna(): # GH 6194 idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], [1, 2, np.nan, np.nan, 5], ['a', 'b', 'c', np.nan, 'e']]) exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ['a', 'e']]) tm.assert_index_equal(idx.dropna(), exp) tm.assert_index_equal(idx.dropna(how='any'), exp) exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], [1, 2, np.nan, 5], ['a', 'b', 'c', 'e']]) tm.assert_index_equal(idx.dropna(how='all'), exp) msg = "invalid how option: xxx" with pytest.raises(ValueError, match=msg): idx.dropna(how='xxx') # GH26408 # test if missing values are dropped for mutiindex constructed # from codes and values idx = MultiIndex(levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]]) expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) tm.assert_index_equal(idx.dropna(), expected) tm.assert_index_equal(idx.dropna(how='any'), expected) expected = MultiIndex.from_arrays([[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]) tm.assert_index_equal(idx.dropna(how='all'), expected)
def test_get_duplicates(): # GH5873 for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []])) tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype='bool')) for n in range(1, 6): # 1st level shape for m in range(1, 5): # 2nd level shape # all possible unique combinations, including nan codes = product(range(-1, n), range(-1, m)) mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], codes=np.random.permutation(list(codes)).T) assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 assert mi.get_duplicates().equals(MultiIndex.from_arrays( [[], []])) tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype='bool'))
def test_pivot_datetime_tz(self): dates1 = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', '2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'] dates2 = ['2013-01-01 15:00:00', '2013-01-01 15:00:00', '2013-01-01 15:00:00', '2013-02-01 15:00:00', '2013-02-01 15:00:00', '2013-02-01 15:00:00'] df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], 'dt1': dates1, 'dt2': dates2, 'value1': range(6), 'value2': [1, 2] * 3}) df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific')) df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo')) exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'], tz='US/Pacific', name='dt1') exp_col1 = Index(['value1', 'value1']) exp_col2 = Index(['a', 'b'], name='label') exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col) result = pivot_table(df, index=['dt1'], columns=['label'], values=['value1']) tm.assert_frame_equal(result, expected) exp_col1 = Index(['sum', 'sum', 'sum', 'sum', 'mean', 'mean', 'mean', 'mean']) exp_col2 = Index(['value1', 'value1', 'value2', 'value2'] * 2) exp_col3 = pd.DatetimeIndex(['2013-01-01 15:00:00', '2013-02-01 15:00:00'] * 4, tz='Asia/Tokyo', name='dt2') exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) expected = DataFrame(np.array([[0, 3, 1, 2, 0, 3, 1, 2], [1, 4, 2, 1, 1, 4, 2, 1], [2, 5, 1, 2, 2, 5, 1, 2]]), index=exp_idx, columns=exp_col) result = pivot_table(df, index=['dt1'], columns=['dt2'], values=['value1', 'value2'], aggfunc=[np.sum, np.mean]) tm.assert_frame_equal(result, expected)
def test_join_multiindex(self): index1 = MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b'], [1, 2, 3, 1, 2, 3]], names=['first', 'second']) index2 = MultiIndex.from_arrays([['b', 'b', 'b', 'c', 'c', 'c'], [1, 2, 3, 1, 2, 3]], names=['first', 'second']) df1 = DataFrame(data=np.random.randn(6), index=index1, columns=['var X']) df2 = DataFrame(data=np.random.randn(6), index=index2, columns=['var Y']) df1 = df1.sort_index(level=0) df2 = df2.sort_index(level=0) joined = df1.join(df2, how='outer') ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) assert joined.index.names == index1.names df1 = df1.sort_index(level=1) df2 = df2.sort_index(level=1) joined = df1.join(df2, how='outer').sort_index(level=0) ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) assert joined.index.names == index1.names
def test_concat_with_group_keys(self): df = DataFrame(np.random.randn(4, 3)) df2 = DataFrame(np.random.randn(4, 4)) # axis=0 df = DataFrame(np.random.randn(3, 4)) df2 = DataFrame(np.random.randn(4, 4)) result = concat([df, df2], keys=[0, 1]) exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]]) expected = DataFrame(np.r_[df.values, df2.values], index=exp_index) tm.assert_frame_equal(result, expected) result = concat([df, df], keys=[0, 1]) exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) expected = DataFrame(np.r_[df.values, df.values], index=exp_index2) tm.assert_frame_equal(result, expected) # axis=1 df = DataFrame(np.random.randn(4, 3)) df2 = DataFrame(np.random.randn(4, 4)) result = concat([df, df2], keys=[0, 1], axis=1) expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) tm.assert_frame_equal(result, expected) result = concat([df, df], keys=[0, 1], axis=1) expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2) tm.assert_frame_equal(result, expected)
def test_unstack_bool(self): df = DataFrame([False, False], index=MultiIndex.from_arrays([["a", "b"], ["c", "l"]]), columns=["col"]) rs = df.unstack() xp = DataFrame( np.array([[False, np.nan], [np.nan, False]], dtype=object), index=["a", "b"], columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]), ) assert_frame_equal(rs, xp)
def test_unstack_bool(self): df = DataFrame([False, False], index=MultiIndex.from_arrays([['a', 'b'], ['c', 'l']]), columns=['col']) rs = df.unstack() xp = DataFrame(np.array([[False, np.nan], [np.nan, False]], dtype=object), index=['a', 'b'], columns=MultiIndex.from_arrays([['col', 'col'], ['c', 'l']])) assert_frame_equal(rs, xp)
def setup(self): n = 1182720 low, high = -4096, 4096 arrs = [np.repeat(np.random.randint(low, high, (n // k)), k) for k in [11, 7, 5, 3, 1]] self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)] a = np.repeat(np.arange(100), 1000) b = np.tile(np.arange(1000), 100) self.mi = MultiIndex.from_arrays([a, b]) self.mi = self.mi.take(np.random.permutation(np.arange(100000)))
def _to_narrow(self, data, mask, dates, assets): """ Convert raw computed pipeline results into a DataFrame for public APIs. Parameters ---------- data : dict[str -> ndarray[ndim=2]] Dict mapping column names to computed results. mask : ndarray[bool, ndim=2] Mask array of values to keep. dates : ndarray[datetime64, ndim=1] Row index for arrays `data` and `mask` assets : ndarray[int64, ndim=2] Column index for arrays `data` and `mask` Returns ------- results : pd.DataFrame The indices of `results` are as follows: index : two-tiered MultiIndex of (date, asset). Contains an entry for each (date, asset) pair corresponding to a `True` value in `mask`. columns : Index of str One column per entry in `data`. If mask[date, asset] is True, then result.loc[(date, asset), colname] will contain the value of data[colname][date, asset]. """ if not mask.any(): # Manually handle the empty DataFrame case. This is a workaround # to pandas failing to tz_localize an empty dataframe with a # MultiIndex. It also saves us the work of applying a known-empty # mask to each array. # # Slicing `dates` here to preserve pandas metadata. empty_dates = dates[:0] empty_assets = array([], dtype=object) return DataFrame( data={ name: array([], dtype=arr.dtype) for name, arr in iteritems(data) }, index=MultiIndex.from_arrays([empty_dates, empty_assets]), ) resolved_assets = array(self._finder.retrieve_all(assets)) dates_kept = repeat_last_axis(dates.values, len(assets))[mask] assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask] return DataFrame( data={name: arr[mask] for name, arr in iteritems(data)}, index=MultiIndex.from_arrays([dates_kept, assets_kept]), ).tz_localize('UTC', level=0)
def test_from_arrays(idx): arrays = [np.asarray(lev).take(level_codes) for lev, level_codes in zip(idx.levels, idx.codes)] # list of arrays as input result = MultiIndex.from_arrays(arrays, names=idx.names) tm.assert_index_equal(result, idx) # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], ['a', 'b']]) assert result.levels[0].equals(Index([Timestamp('20130101')])) assert result.levels[1].equals(Index(['a', 'b']))
def test_from_arrays_iterator(idx): # GH 18434 arrays = [np.asarray(lev).take(level_codes) for lev, level_codes in zip(idx.levels, idx.codes)] # iterator as input result = MultiIndex.from_arrays(iter(arrays), names=idx.names) tm.assert_index_equal(result, idx) # invalid iterator input msg = "Input must be a list / sequence of array-likes." with pytest.raises(TypeError, match=msg): MultiIndex.from_arrays(0)
def test_pivot_datetime_tz(self): dates1 = [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", ] dates2 = [ "2013-01-01 15:00:00", "2013-01-01 15:00:00", "2013-01-01 15:00:00", "2013-02-01 15:00:00", "2013-02-01 15:00:00", "2013-02-01 15:00:00", ] df = DataFrame( { "label": ["a", "a", "a", "b", "b", "b"], "dt1": dates1, "dt2": dates2, "value1": np.arange(6, dtype="int64"), "value2": [1, 2] * 3, } ) df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) exp_idx = pd.DatetimeIndex( ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], tz="US/Pacific", name="dt1" ) exp_col1 = Index(["value1", "value1"]) exp_col2 = Index(["a", "b"], name="label") exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col) result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"]) tm.assert_frame_equal(result, expected) exp_col1 = Index(["sum", "sum", "sum", "sum", "mean", "mean", "mean", "mean"]) exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2) exp_col3 = pd.DatetimeIndex(["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4, tz="Asia/Tokyo", name="dt2") exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) expected = DataFrame( np.array([[0, 3, 1, 2, 0, 3, 1, 2], [1, 4, 2, 1, 1, 4, 2, 1], [2, 5, 1, 2, 2, 5, 1, 2]], dtype="int64"), index=exp_idx, columns=exp_col, ) result = pivot_table(df, index=["dt1"], columns=["dt2"], values=["value1", "value2"], aggfunc=[np.sum, np.mean]) tm.assert_frame_equal(result, expected)
def test_unique_datetimelike(): idx1 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', '2015-01-01', 'NaT', 'NaT']) idx2 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', '2015-01-02', 'NaT', '2015-01-01'], tz='Asia/Tokyo') result = MultiIndex.from_arrays([idx1, idx2]).unique() eidx1 = DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) eidx2 = DatetimeIndex(['2015-01-01', '2015-01-02', 'NaT', '2015-01-01'], tz='Asia/Tokyo') exp = MultiIndex.from_arrays([eidx1, eidx2]) tm.assert_index_equal(result, exp)
def test_from_arrays_iterator(idx): # GH 18434 arrays = [] for lev, lab in zip(idx.levels, idx.labels): arrays.append(np.asarray(lev).take(lab)) # iterator as input result = MultiIndex.from_arrays(iter(arrays), names=idx.names) tm.assert_index_equal(result, idx) # invalid iterator input with tm.assert_raises_regex( TypeError, "Input must be a list / sequence of array-likes."): MultiIndex.from_arrays(0)
def test_isin(): values = [('foo', 2), ('bar', 3), ('quux', 4)] idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( 4)]) result = idx.isin(values) expected = np.array([False, False, True, True]) tm.assert_numpy_array_equal(result, expected) # empty, return dtype bool idx = MultiIndex.from_arrays([[], []]) result = idx.isin(values) assert len(result) == 0 assert result.dtype == np.bool_
def deserialize(self, item, force_bytes_to_unicode=False): index = self._index_from_records(item) column_fields = [x for x in item.dtype.names if x not in item.dtype.metadata['index']] multi_column = item.dtype.metadata.get('multi_column') if len(item) == 0: rdata = item[column_fields] if len(column_fields) > 0 else None if multi_column is not None: columns = MultiIndex.from_arrays(multi_column["values"], names=multi_column["names"]) return DataFrame(rdata, index=index, columns=columns) else: return DataFrame(rdata, index=index) columns = item.dtype.metadata['columns'] df = DataFrame(data=item[column_fields], index=index, columns=columns) if multi_column is not None: df.columns = MultiIndex.from_arrays(multi_column["values"], names=multi_column["names"]) if force_bytes_to_unicode: # This is needed due to 'str' type in py2 when read back in py3 is 'bytes' which breaks the workflow # of people migrating to py3. # https://github.com/manahl/arctic/issues/598 # This should not be used for a normal flow, and you should instead of writing unicode strings # if you want to work with str in py3., for c in df.select_dtypes(object): # The conversion is not using astype similar to the index as pandas has a bug where it tries to convert # the data columns to a unicode string, and the object in this case would be bytes, eg. b'abc' # which is converted to u"b'abc'" i.e it includes the b character as well! This generally happens # when there is a str conversion without specifying the encoding. eg. str(b'abc') -> "b'abc'" and the # fix for this is to tell it the encoding to use: i.e str(b'abc', 'utf-8') -> "abc" if type(df[c].iloc[0]) == bytes: df[c] = df[c].str.decode('utf-8') if isinstance(df.index, MultiIndex): unicode_indexes = [] # MultiIndex requires a conversion at each level. for level in range(len(df.index.levels)): _index = df.index.get_level_values(level) if isinstance(_index[0], bytes): _index = _index.astype('unicode') unicode_indexes.append(_index) df.index = unicode_indexes else: if type(df.index[0]) == bytes: df.index = df.index.astype('unicode') if type(df.columns[0]) == bytes: df.columns = df.index.astype('unicode') return df
def setup(self, axis): self.df = DataFrame(np.random.randn(10000, 1000)) self.df.ix[50:1000, 20:50] = np.nan self.df.ix[2000:3000] = np.nan self.df.ix[:, 60:70] = np.nan self.df_mixed = self.df.copy() self.df_mixed['foo'] = 'bar' self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index]) self.df.columns = MultiIndex.from_arrays([self.df.columns, self.df.columns]) self.df_mixed.index = MultiIndex.from_arrays([self.df_mixed.index, self.df_mixed.index]) self.df_mixed.columns = MultiIndex.from_arrays([self.df_mixed.columns, self.df_mixed.columns])
def test_query_with_partially_named_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, 'rating'] df = DataFrame(np.random.randn(10, 2), index=index) res = df.query('rating == 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind == 1] assert_frame_equal(res, exp) res = df.query('rating != 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind != 1] assert_frame_equal(res, exp) res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind == "red"] assert_frame_equal(res, exp) res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind != "red"] assert_frame_equal(res, exp)
def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(path) as path: tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) self.assertEquals(recons.index.names, ('time', 'foo')) # infer index tsframe.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) self.tsframe.index = old_index # needed if setUP becomes classmethod
def test_constructor(self): assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) assert derived.index.is_all_dates assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not self.empty.index.is_all_dates assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) pytest.raises(NotImplementedError, Series, m)
def test_nsmallest(): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) b = Series(list('a' * 5 + 'b' * 5)) gb = a.groupby(b) r = gb.nsmallest(3) e = Series([ 1, 2, 3, 0, 4, 6 ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]])) tm.assert_series_equal(r, e) a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) gb = a.groupby(b) e = Series([ 0, 1, 1, 0, 1, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) tm.assert_series_equal(gb.nsmallest(3, keep='last'), e)
def test_get_loc_missing_nan(): # GH 8569 idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) assert isinstance(idx.get_loc(1), slice) pytest.raises(KeyError, idx.get_loc, 3) pytest.raises(KeyError, idx.get_loc, np.nan) pytest.raises(KeyError, idx.get_loc, [np.nan])
def test_isin_level_kwarg(): idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( 4)]) vals_0 = ['foo', 'bar', 'quux'] vals_1 = [2, 3, 10] expected = np.array([False, False, True, True]) tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) msg = "Too many levels: Index has only 2 levels, not 6" with pytest.raises(IndexError, match=msg): idx.isin(vals_0, level=5) msg = ("Too many levels: Index has only 2 levels, -5 is not a valid level" " number") with pytest.raises(IndexError, match=msg): idx.isin(vals_0, level=-5) with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): idx.isin(vals_0, level=1.0) with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): idx.isin(vals_1, level=-1.0) with pytest.raises(KeyError, match="'Level A not found'"): idx.isin(vals_1, level='A') idx.names = ['A', 'B'] tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) with pytest.raises(KeyError, match="'Level C not found'"): idx.isin(vals_1, level='C')
def test_constructor(self): self.assertTrue(self.ts.index.is_all_dates) # Pass in Series derived = Series(self.ts) self.assertTrue(derived.index.is_all_dates) self.assertTrue(tm.equalContents(derived.index, self.ts.index)) # Ensure new index is not created self.assertEqual(id(self.ts.index), id(derived.index)) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) self.assertEqual(mixed.dtype, np.object_) self.assertIs(mixed[1], np.NaN) self.assertFalse(self.empty.index.is_all_dates) self.assertFalse(Series({}).index.is_all_dates) self.assertRaises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' self.assertEqual(rs, xp) # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) self.assertRaises(NotImplementedError, Series, m)
def test_constructor(self, datetime_series, empty_series): assert datetime_series.index.is_all_dates # Pass in Series derived = Series(datetime_series) assert derived.index.is_all_dates assert tm.equalContents(derived.index, datetime_series.index) # Ensure new index is not created assert id(datetime_series.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not empty_series.index.is_all_dates assert not Series({}).index.is_all_dates # exception raised is of type Exception with pytest.raises(Exception, match="Data must be 1-dimensional"): Series(np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) msg = "initializing a Series from a MultiIndex is not supported" with pytest.raises(NotImplementedError, match=msg): Series(m)
def setup(self): size = 65536 arrays = [np.random.randint(0, 8192, size), np.random.randint(0, 1024, size)] mask = np.random.rand(size) < 0.1 self.mi_unused_levels = MultiIndex.from_arrays(arrays) self.mi_unused_levels = self.mi_unused_levels[mask]
def test_empty_with_reversed_multiindex(self): data = 'x,y,z' result = self.read_csv(StringIO(data), index_col=[1, 0]) expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays( [[]] * 2, names=['y', 'x'])) tm.assert_frame_equal(result, expected, check_index_type=False)
def test_from_arrays_different_lengths(idx1, idx2): # see gh-13599 msg = "^all arrays must be same length$" with pytest.raises(ValueError, match=msg): MultiIndex.from_arrays([idx1, idx2])
"columns": ["x", "z"], "index": Index([], name="y") }), ("x", { "columns": ["y", "z"], "index": Index([], name="x") }), ("y", { "columns": ["x", "z"], "index": Index([], name="y") }), ( [0, 1], { "columns": ["z"], "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]), }, ), ( ["x", "y"], { "columns": ["z"], "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]), }, ), ( [1, 0], { "columns": ["z"], "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]), },
def test_is_monotonic_decreasing(): i = MultiIndex.from_product( [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] ) assert i.is_monotonic_decreasing is True assert i._is_strictly_monotonic_decreasing is True assert Index(i.values).is_monotonic_decreasing is True assert i._is_strictly_monotonic_decreasing is True i = MultiIndex.from_product( [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] ) assert i.is_monotonic_decreasing is False assert i._is_strictly_monotonic_decreasing is False assert Index(i.values).is_monotonic_decreasing is False assert Index(i.values)._is_strictly_monotonic_decreasing is False i = MultiIndex.from_product( [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] ) assert i.is_monotonic_decreasing is False assert i._is_strictly_monotonic_decreasing is False assert Index(i.values).is_monotonic_decreasing is False assert Index(i.values)._is_strictly_monotonic_decreasing is False i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) assert i.is_monotonic_decreasing is False assert i._is_strictly_monotonic_decreasing is False assert Index(i.values).is_monotonic_decreasing is False assert Index(i.values)._is_strictly_monotonic_decreasing is False # string ordering i = MultiIndex( levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) assert i.is_monotonic_decreasing is False assert Index(i.values).is_monotonic_decreasing is False assert i._is_strictly_monotonic_decreasing is False assert Index(i.values)._is_strictly_monotonic_decreasing is False i = MultiIndex( levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) assert i.is_monotonic_decreasing is True assert Index(i.values).is_monotonic_decreasing is True assert i._is_strictly_monotonic_decreasing is True assert Index(i.values)._is_strictly_monotonic_decreasing is True # mixed levels, hits the TypeError i = MultiIndex( levels=[ [4, 3, 2, 1], [ "nl0000301109", "nl0000289965", "nl0000289783", "lu0197800237", "gb00b03mlx29", ], ], codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], names=["household_id", "asset_id"], ) assert i.is_monotonic_decreasing is False assert i._is_strictly_monotonic_decreasing is False # empty i = MultiIndex.from_arrays([[], []]) assert i.is_monotonic_decreasing is True assert Index(i.values).is_monotonic_decreasing is True assert i._is_strictly_monotonic_decreasing is True assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_index_equal_empty_iterable(): # #16844 a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(a, b)
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): # issue 19132 idx = MultiIndex.from_arrays(index_arr) with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): result = idx.get_slice_bound(target, side=algo, kind="loc") assert result == expected
def _do_test(df, r_dtype=None, c_dtype=None, rnlvl=None, cnlvl=None, dupe_col=False): kwargs = dict(parse_dates=False) if cnlvl: if rnlvl is not None: kwargs['index_col'] = lrange(rnlvl) kwargs['header'] = lrange(cnlvl) with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize) recons = self.read_csv(path, **kwargs) else: kwargs['header'] = 0 with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize) recons = self.read_csv(path, **kwargs) def _to_uni(x): if not isinstance(x, compat.text_type): return x.decode('utf8') return x if dupe_col: # read_Csv disambiguates the columns by # labeling them dupe.1,dupe.2, etc'. monkey patch columns recons.columns = df.columns if rnlvl and not cnlvl: delta_lvl = [ recons.iloc[:, i].values for i in range(rnlvl - 1) ] ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl) recons.index = ix recons = recons.iloc[:, rnlvl - 1:] type_map = dict(i='i', f='f', s='O', u='O', dt='O', p='O') if r_dtype: if r_dtype == 'u': # unicode r_dtype = 'O' recons.index = np.array(lmap(_to_uni, recons.index), dtype=r_dtype) df.index = np.array(lmap(_to_uni, df.index), dtype=r_dtype) elif r_dtype == 'dt': # unicode r_dtype = 'O' recons.index = np.array(lmap(Timestamp, recons.index), dtype=r_dtype) df.index = np.array(lmap(Timestamp, df.index), dtype=r_dtype) elif r_dtype == 'p': r_dtype = 'O' recons.index = np.array(list( map(Timestamp, to_datetime(recons.index))), dtype=r_dtype) df.index = np.array(list( map(Timestamp, df.index.to_timestamp())), dtype=r_dtype) else: r_dtype = type_map.get(r_dtype) recons.index = np.array(recons.index, dtype=r_dtype) df.index = np.array(df.index, dtype=r_dtype) if c_dtype: if c_dtype == 'u': c_dtype = 'O' recons.columns = np.array(lmap(_to_uni, recons.columns), dtype=c_dtype) df.columns = np.array(lmap(_to_uni, df.columns), dtype=c_dtype) elif c_dtype == 'dt': c_dtype = 'O' recons.columns = np.array(lmap(Timestamp, recons.columns), dtype=c_dtype) df.columns = np.array(lmap(Timestamp, df.columns), dtype=c_dtype) elif c_dtype == 'p': c_dtype = 'O' recons.columns = np.array(lmap(Timestamp, to_datetime( recons.columns)), dtype=c_dtype) df.columns = np.array(lmap(Timestamp, df.columns.to_timestamp()), dtype=c_dtype) else: c_dtype = type_map.get(c_dtype) recons.columns = np.array(recons.columns, dtype=c_dtype) df.columns = np.array(df.columns, dtype=c_dtype) assert_frame_equal(df, recons, check_names=False, check_less_precise=True)
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.get_slice_bound(target, side=algo, kind="loc") assert result == expected
def test_get_indexer_with_missing_value(index_arr, labels, expected): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.get_indexer(labels) tm.assert_numpy_array_equal(result, expected)
def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper # gh-14942 (implement) # gh-10132 (back-compat) # gh-8138 (back-compat) # gh-8869 cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) df["C"] = ["foo", "bar"] * 2 # multiple groupers with a non-cat gb = df.groupby(["A", "B", "C"], observed=observed) exp_index = MultiIndex.from_arrays( [cat1, cat2, ["foo", "bar"] * 2], names=["A", "B", "C"] ) expected = DataFrame({"values": Series([1, 2, 3, 4], index=exp_index)}).sort_index() result = gb.sum() if not observed: expected = cartesian_product_for_groupers( expected, [cat1, cat2, ["foo", "bar"]], list("ABC") ) tm.assert_frame_equal(result, expected) gb = df.groupby(["A", "B"], observed=observed) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) result = gb.sum() if not observed: expected = cartesian_product_for_groupers(expected, [cat1, cat2], list("AB")) tm.assert_frame_equal(result, expected) # https://github.com/pandas-dev/pandas/issues/8138 d = { "cat": Categorical( ["a", "b", "a", "b"], categories=["a", "b", "c"], ordered=True ), "ints": [1, 1, 2, 2], "val": [10, 20, 30, 40], } df = DataFrame(d) # Grouping on a single column groups_single_key = df.groupby("cat", observed=observed) result = groups_single_key.mean() exp_index = CategoricalIndex( list("ab"), name="cat", categories=list("abc"), ordered=True ) expected = DataFrame({"ints": [1.5, 1.5], "val": [20.0, 30]}, index=exp_index) if not observed: index = CategoricalIndex( list("abc"), name="cat", categories=list("abc"), ordered=True ) expected = expected.reindex(index) tm.assert_frame_equal(result, expected) # Grouping on two columns groups_double_key = df.groupby(["cat", "ints"], observed=observed) result = groups_double_key.agg("mean") expected = DataFrame( { "val": [10, 30, 20, 40], "cat": Categorical( ["a", "a", "b", "b"], categories=["a", "b", "c"], ordered=True ), "ints": [1, 2, 1, 2], } ).set_index(["cat", "ints"]) if not observed: expected = cartesian_product_for_groupers( expected, [df.cat.values, [1, 2]], ["cat", "ints"] ) tm.assert_frame_equal(result, expected) # GH 10132 for key in [("a", 1), ("b", 2), ("b", 1), ("a", 2)]: c, i = key result = groups_double_key.get_group(key) expected = df[(df.cat == c) & (df.ints == i)] tm.assert_frame_equal(result, expected) # gh-8869 # with as_index d = { "foo": [10, 8, 4, 8, 4, 1, 1], "bar": [10, 20, 30, 40, 50, 60, 70], "baz": ["d", "c", "e", "a", "a", "d", "c"], } df = DataFrame(d) cat = pd.cut(df["foo"], np.linspace(0, 10, 3)) df["range"] = cat groups = df.groupby(["range", "baz"], as_index=False, observed=observed) result = groups.agg("mean") groups2 = df.groupby(["range", "baz"], as_index=True, observed=observed) expected = groups2.agg("mean").reset_index() tm.assert_frame_equal(result, expected)
tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False) assert len(start) == len(end) @pytest.mark.parametrize( "df, window_size, expected", [ ( DataFrame({ "b": [0, 1, 2], "a": [1, 2, 2] }), 2, Series( [0, 1.5, 2.0], index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]), name="b", dtype=np.float64, ), ), ( DataFrame({ "b": [np.nan, 1, 2, np.nan] + list(range(4, 18)), "a": [1] * 7 + [2] * 11, "c": range(18), }), 12, Series( [ 3.6, 3.6,
def test_set_index2(self): df = DataFrame({ 'A': ['foo', 'foo', 'foo', 'bar', 'bar'], 'B': ['one', 'two', 'three', 'one', 'two'], 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5) }) # new object, single-column result = df.set_index('C') result_nodrop = df.set_index('C', drop=False) index = Index(df['C'], name='C') expected = df.loc[:, ['A', 'B', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) self.assertEqual(result.index.name, index.name) # inplace, single df2 = df.copy() df2.set_index('C', inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index('C', drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # create new object, multi-column result = df.set_index(['A', 'B']) result_nodrop = df.set_index(['A', 'B'], drop=False) index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) expected = df.loc[:, ['C', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) self.assertEqual(result.index.names, index.names) # inplace df2 = df.copy() df2.set_index(['A', 'B'], inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index(['A', 'B'], drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # corner case with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) # append result = df.set_index(['A', 'B'], append=True) xp = df.reset_index().set_index(['index', 'A', 'B']) xp.index.names = [None, 'A', 'B'] assert_frame_equal(result, xp) # append to existing multiindex rdf = df.set_index(['A'], append=True) rdf = rdf.set_index(['B', 'C'], append=True) expected = df.set_index(['A', 'B', 'C'], append=True) assert_frame_equal(rdf, expected) # Series result = df.set_index(df.C) self.assertEqual(result.index.name, 'C')
def test_to_csv_multiindex(self): frame = self.frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index with ensure_clean('__tmp_to_csv_multiindex__') as path: frame.to_csv(path, header=False) frame.to_csv(path, columns=['A', 'B']) # round trip frame.to_csv(path) df = self.read_csv(path, index_col=[0, 1], parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) assert frame.index.names == df.index.names # needed if setUp becomes a class method self.frame.index = old_index # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=['time', 'foo']) recons = self.read_csv(path, index_col=[0, 1]) # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) recons = self.read_csv(path, index_col=None) assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) recons = self.read_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) # needed if setUp becomes class method self.tsframe.index = old_index with ensure_clean('__tmp_to_csv_multiindex__') as path: # GH3571, GH1651, GH3141 def _make_frame(names=None): if names is True: names = ['first', 'second'] return DataFrame(np.random.randint(0, 10, size=(3, 3)), columns=MultiIndex.from_tuples( [('bah', 'foo'), ('bah', 'bar'), ('ban', 'baz')], names=names), dtype='int64') # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=0) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) df.to_csv(path) result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) assert_frame_equal(df, result) # writing with no index df = _make_frame() df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) assert com._all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) # tupleize_cols=True and index=False df = _make_frame(True) with tm.assert_produces_warning(FutureWarning): df.to_csv(path, tupleize_cols=True, index=False) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = read_csv(path, header=0, tupleize_cols=True, index_col=None) result.columns = df.columns assert_frame_equal(df, result) # whatsnew example df = _make_frame() df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) assert_frame_equal(df, result) df = _make_frame(True) df.to_csv(path) result = read_csv(path, header=[0, 1], index_col=[0]) assert_frame_equal(df, result) # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) with tm.assert_produces_warning(FutureWarning): df.to_csv(path, tupleize_cols=True) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = read_csv(path, header=0, index_col=[0, 1], tupleize_cols=True) result.columns = df.columns assert_frame_equal(df, result) # invalid options df = _make_frame(True) df.to_csv(path) for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) with pytest.raises(ParserError, match=msg): read_csv(path, header=lrange(i), index_col=0) # write with cols msg = 'cannot specify cols with a MultiIndex' with pytest.raises(TypeError, match=msg): df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty tsframe[:0].to_csv(path) recons = self.read_csv(path) exp = tsframe[:0] exp.index = [] tm.assert_index_equal(recons.columns, exp.columns) assert len(recons) == 0
def test_slice_locs_with_missing_value(index_arr, expected, start_idx, end_idx): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.slice_locs(start=start_idx, end=end_idx) assert result == expected
def test_multiindex_name_and_level_raising(): # GH#20421 mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."): mi.set_names(names={"x": "z"}, level={"x": "z"})
def sims_data(**kwargs): """ Gets trace element data grouped by sample and minerals. The data is formatted as a nested ordered dictionary, for iterability. This data can be used as-is or averaged, as appropriate. kwargs: exclude_bad: Exclude bad items (True) whole_rock: Return recalculated whole-rock data (False) raw: Return raw data not normalized to CI chondrite (False) ree_only: Return only rare-earth element data (False) """ exclude_bad = kwargs.pop('exclude_bad',True) whole_rock = kwargs.pop('whole_rock',False) raw = kwargs.pop('raw',False) if exclude_bad: bad_data = datum.bad.isnot(True) else: # A meaningless filter bad_data = True if raw: ppm, std = (datum.raw_ppm, datum.raw_std) else: ppm, std = (datum.norm_ppm, datum.norm_std) q = (db.session.query(datum) .filter(datum.element != 14) .filter(bad_data) .join(meas) .group_by( meas.sample_id, meas.mineral, datum.element) .with_entities( meas.sample_id, meas.mineral, datum.element, func.array_agg(ppm) .label('ppm'), func.array_agg(std) .label('std'), func.count(ppm) .label('n'))) df = read_sql(q.statement,q.session.bind) # Apply uncertainty def uncertainty(row): vals = tuple(zip(row['ppm'],row['std'])) return tuple(uval(n,s) for n,s in vals) df['norm'] = df.apply(uncertainty,axis=1) df = (df .drop('ppm', axis=1) .drop('std', axis=1)) fn = lambda x: elements[x['element']].symbol df['symbol'] = df.apply(fn, axis=1) df['average'] = df['norm'].apply(N.mean) names = ('sample_id','mineral','element','symbol') df.index = MultiIndex.from_arrays( [df.pop(i) for i in names]) df.sortlevel(inplace=True) if kwargs.pop('ree_only',True): df = ree_only(df) # Add recalculated whole-rock data if requested if not whole_rock: return df xenoliths = (db.session.query(Sample) .filter_by(xenolith=True)) modes = {s.id:s.modes() for s in xenoliths.all()} modes = DataFrame.from_dict(modes, orient='index') modes = modes.stack() modes.name = 'mode' ix = ['sample_id','mineral'] modes.index.names = ix d1 = df.reset_index().join(modes,on=ix) d1['average'] *= d1.pop('mode') d1 = d1.groupby(['sample_id','element','symbol']) d1 = d1.aggregate(dict(average=sum)) d1['mineral'] = 'whole_rock' d1.set_index('mineral',append=True, inplace=True) d1 = d1.reorder_levels(df.index.names) return concat([df,d1])
def test_large_mi_contains(self): # GH#10645 result = MultiIndex.from_arrays([range(10**6), range(10**6)]) assert not (10**6, 0) in result
def test_isin_multi_index_with_missing_value(labels, expected, level): # GH 19132 midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) tm.assert_numpy_array_equal(midx.isin(labels, level=level), expected)
def test_is_monotonic_increasing(): i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=['one', 'two']) assert i.is_monotonic is True assert i._is_strictly_monotonic_increasing is True assert Index(i.values).is_monotonic is True assert i._is_strictly_monotonic_increasing is True i = MultiIndex.from_product( [np.arange(10, 0, -1), np.arange(10)], names=['one', 'two']) assert i.is_monotonic is False assert i._is_strictly_monotonic_increasing is False assert Index(i.values).is_monotonic is False assert Index(i.values)._is_strictly_monotonic_increasing is False i = MultiIndex.from_product( [np.arange(10), np.arange(10, 0, -1)], names=['one', 'two']) assert i.is_monotonic is False assert i._is_strictly_monotonic_increasing is False assert Index(i.values).is_monotonic is False assert Index(i.values)._is_strictly_monotonic_increasing is False i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) assert i.is_monotonic is False assert i._is_strictly_monotonic_increasing is False assert Index(i.values).is_monotonic is False assert Index(i.values)._is_strictly_monotonic_increasing is False # string ordering i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic is False assert Index(i.values).is_monotonic is False assert i._is_strictly_monotonic_increasing is False assert Index(i.values)._is_strictly_monotonic_increasing is False i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['mom', 'next', 'zenith']], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic is True assert Index(i.values).is_monotonic is True assert i._is_strictly_monotonic_increasing is True assert Index(i.values)._is_strictly_monotonic_increasing is True # mixed levels, hits the TypeError i = MultiIndex(levels=[[1, 2, 3, 4], [ 'gb00b03mlx29', 'lu0197800237', 'nl0000289783', 'nl0000289965', 'nl0000301109' ]], codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], names=['household_id', 'asset_id']) assert i.is_monotonic is False assert i._is_strictly_monotonic_increasing is False # empty i = MultiIndex.from_arrays([[], []]) assert i.is_monotonic is True assert Index(i.values).is_monotonic is True assert i._is_strictly_monotonic_increasing is True assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names): # GH#20421 mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"]) result = getattr(mi, func)(rename_dict) expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names) tm.assert_index_equal(result, expected)
def test_from_tuples_empty(): # GH 16777 result = MultiIndex.from_tuples([], names=["a", "b"]) expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(result, expected)
def _groupby_reduce(self, ops=None): columns = self._df._get_columns() dtypes = self._df._frame.dtypes valid_ops = {} valid_columns = [] size_reduction = len(ops) == 1 and ops[0][0] == "size" # If the ops are given as a list, apply them across all the columns # with compatible data types if isinstance(ops, list): key_indices = set(self._keys) for col_idx, col in enumerate(columns): if col_idx in key_indices: continue if reduction.incompatible_ops(ops, dtypes[col_idx]): continue valid_ops[col_idx] = [desc[0] for desc in ops] valid_columns.append(col) # Special case with the size reduction, which produces a single # output regardless of the number of input columns if size_reduction: break # If the ops are passed in a dictionary, it also specifies the input # columns on which the aggregation are performed else: assert is_dict_like(ops) for col, descs in ops.items(): col_idx = columns.get_indexer_for([col]) if len(col_idx) > 1: raise KeyError(f"ambiguous column name {col}") if col_idx[0] == -1: raise KeyError(col) if reduction.incompatible_ops(descs, dtypes[col_idx[0]]): continue valid_ops[col_idx[0]] = [desc[0] for desc in descs] valid_columns.append(col) frame = self._df._frame.groupby_reduce(self._keys, valid_ops, self._method, self._sort) # If more than one aggregation is requested for a column, # the output column names should use MultiIndex multi_aggs = any(len(set(ops)) > 1 for ops in valid_ops.values()) def _generate_columns(columns, all_ops): if multi_aggs: from pandas import MultiIndex pairs = [] for idx, ops in all_ops.items(): pairs.extend([(columns[idx], op) for op in ops]) index = MultiIndex.from_tuples(pairs) if self._is_series_groupby: index = index.droplevel(0) return index else: from pandas import Index return Index([columns[idx] for idx in all_ops.keys()]) from .dataframe import DataFrame if self._as_index: # Groupby keys are rearranged to come first in the frame, # no matter where they were in the input frame, so the # indexer should be picking the first N keys in the frame, # where N is the number of keys indexer = list(range(len(self._keys))) index_columns = frame.select_columns(indexer) # However, the index names are derived from the input # dataframe, which is not rearranged, so we use the original # indexer to select the names index_names = columns[self._keys] value_names = _generate_columns(columns, valid_ops) # Once we find the index columns, we drop them from the frame frame = frame.drop_columns(indexer) frame = frame.set_index(index_columns, index_names) if size_reduction or (self._is_series_groupby and not multi_aggs): # Size reduction always produces a series from .series import Series return Series(frame=frame, name=value_names[0]) else: return DataFrame(frame=frame, columns=value_names) else: # Index levels don't survive in the output when as_index is False levels = set(self._levels) keys = [key for key in self._keys if key not in levels] key_names = columns[keys] value_names = _generate_columns(columns, valid_ops) # If the column names are stored in a MultiIndex, # we should extend the key names to match the shape if multi_aggs: from pandas import MultiIndex key_names = MultiIndex.from_arrays( [key_names, [""] * len(key_names)]) value_names = key_names.append(value_names) frame = frame.drop_columns(self._levels) return DataFrame(frame=frame, columns=value_names)
Categorical(pd.date_range("2018-06-01 00", freq="1T", periods=3)), ], names=["key1", "key2"], ) expected = DataFrame({"values": [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "as_index, expected", [ ( True, Series( index=MultiIndex.from_arrays( [Series([1, 1, 2], dtype="category"), [1, 2, 2]], names=["a", "b"] ), data=[1, 2, 3], name="x", ), ), ( False, DataFrame( { "a": Series([1, 1, 2], dtype="category"), "b": [1, 2, 2], "x": [1, 2, 3], } ), ),
print(data.unstack()) # unstack之后就是一个dataframe print(data.unstack().stack()) # stack之后是一个series print('DataFrame的层次索引') frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) print(frame) frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] print(frame) print(frame.ix['a', 1]) # a表示key1,1表示key2,这条语句表示key1为a,key2为1的那一行 print(frame.ix['a', 2]['Colorado']) # 第一个[]表示行索引index,第二个[]表示列索引column print(frame.ix['a', 2]['Ohio']['Red']) print(frame.ix['a',2]['Ohio','Red']) print('直接用MultiIndex创建层次索引结构') print(MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']], names = ['state', 'color'])) # 这里索引的先后顺序会按照字母大小来排列 # reordering_and_sorting_levels 重新分级顺序 print('索引层级交换') frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame_swapped = frame.swaplevel('key1', 'key2') print(frame_swapped) print(frame_swapped.swaplevel(0, 1)) print('根据索引排序') # print(frame.sortlevel('key2')) # print(frame.swaplevel(0, 1).sortlevel(0)) print(frame.sort_index(level= 'key2')) # 根据key2排序 print(frame.swaplevel(0, 1).sort_index(level = 0)) # 交换index 0和1,再对index为0的进行排序
def _to_narrow(self, terms, data, mask, dates, assets): """ Convert raw computed pipeline results into a DataFrame for public APIs. Parameters ---------- terms : dict[str -> Term] Dict mapping column names to terms. data : dict[str -> ndarray[ndim=2]] Dict mapping column names to computed results for those names. mask : ndarray[bool, ndim=2] Mask array of values to keep. dates : ndarray[datetime64, ndim=1] Row index for arrays `data` and `mask` assets : ndarray[int64, ndim=2] Column index for arrays `data` and `mask` Returns ------- results : pd.DataFrame The indices of `results` are as follows: index : two-tiered MultiIndex of (date, asset). Contains an entry for each (date, asset) pair corresponding to a `True` value in `mask`. columns : Index of str One column per entry in `data`. If mask[date, asset] is True, then result.loc[(date, asset), colname] will contain the value of data[colname][date, asset]. """ if not mask.any(): # Manually handle the empty DataFrame case. This is a workaround # to pandas failing to tz_localize an empty dataframe with a # MultiIndex. It also saves us the work of applying a known-empty # mask to each array. # # Slicing `dates` here to preserve pandas metadata. empty_dates = dates[:0] empty_assets = array([], dtype=object) return DataFrame( data={ name: array([], dtype=arr.dtype) for name, arr in iteritems(data) }, index=MultiIndex.from_arrays([empty_dates, empty_assets]), ) resolved_assets = array(self._finder.retrieve_all(assets)) dates_kept = repeat_last_axis(dates.values, len(assets))[mask] assets_kept = repeat_first_axis(resolved_assets, len(dates))[mask] final_columns = {} for name in data: # Each term that computed an output has its postprocess method # called on the filtered result. # # As of Mon May 2 15:38:47 2016, we only use this to convert # LabelArrays into categoricals. final_columns[name] = terms[name].postprocess(data[name][mask]) return DataFrame( data=final_columns, index=MultiIndex.from_arrays([dates_kept, assets_kept]), ).tz_localize('UTC', level=0)
def test_empty_index_col_scenarios(self): data = 'x,y,z' # None, no index index_col, expected = None, DataFrame([], columns=list('xyz')), tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # False, no index index_col, expected = False, DataFrame([], columns=list('xyz')), tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # int, first column index_col, expected = 0, DataFrame([], columns=['y', 'z'], index=Index([], name='x')) tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # int, not first column index_col, expected = 1, DataFrame([], columns=['x', 'z'], index=Index([], name='y')) tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # str, first column index_col, expected = 'x', DataFrame([], columns=['y', 'z'], index=Index([], name='x')) tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # str, not the first column index_col, expected = 'y', DataFrame([], columns=['x', 'z'], index=Index([], name='y')) tm.assert_frame_equal( self.read_csv(StringIO(data), index_col=index_col), expected) # list of int index_col, expected = [0, 1], DataFrame( [], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, check_index_type=False) # list of str index_col = ['x', 'y'] expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, check_index_type=False) # list of int, reversed sequence index_col = [1, 0] expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, check_index_type=False) # list of str, reversed sequence index_col = ['y', 'x'] expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, check_index_type=False)
def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx = MultiIndex.from_arrays([idx1, idx2]) df = DataFrame( { "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"] }, index=idx, ) expected = DataFrame( { "idx1": [ datetime(2011, 1, 1), datetime(2011, 1, 2), datetime(2011, 1, 3), datetime(2011, 1, 4), datetime(2011, 1, 5), ], "idx2": np.arange(5, dtype="int64"), "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"], }, columns=["idx1", "idx2", "a", "b"], ) expected["idx1"] = expected["idx1"].apply( lambda d: Timestamp(d, tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) idx3 = pd.date_range("1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3") idx = MultiIndex.from_arrays([idx1, idx2, idx3]) df = DataFrame( { "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"] }, index=idx, ) expected = DataFrame( { "idx1": [ datetime(2011, 1, 1), datetime(2011, 1, 2), datetime(2011, 1, 3), datetime(2011, 1, 4), datetime(2011, 1, 5), ], "idx2": np.arange(5, dtype="int64"), "idx3": [ datetime(2012, 1, 1), datetime(2012, 2, 1), datetime(2012, 3, 1), datetime(2012, 4, 1), datetime(2012, 5, 1), ], "a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"], }, columns=["idx1", "idx2", "idx3", "a", "b"], ) expected["idx1"] = expected["idx1"].apply( lambda d: Timestamp(d, tz=tz)) expected["idx3"] = expected["idx3"].apply( lambda d: Timestamp(d, tz="Europe/Paris")) tm.assert_frame_equal(df.reset_index(), expected) # GH#7793 idx = MultiIndex.from_product([["a", "b"], pd.date_range("20130101", periods=3, tz=tz)]) df = DataFrame(np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx) expected = DataFrame( { "level_0": "a a a b b b".split(), "level_1": [ datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3), ] * 2, "a": np.arange(6, dtype="int64"), }, columns=["level_0", "level_1", "a"], ) expected["level_1"] = expected["level_1"].apply( lambda d: Timestamp(d, freq="D", tz=tz)) result = df.reset_index() tm.assert_frame_equal(result, expected)
def test_tz_convert_and_localize(self, fn): l0 = date_range('20140701', periods=5, freq='D') l1 = date_range('20140701', periods=5, freq='D') int_idx = Index(range(5)) if fn == 'tz_convert': l0 = l0.tz_localize('UTC') l1 = l1.tz_localize('UTC') for idx in [l0, l1]: l0_expected = getattr(idx, fn)('US/Pacific') l1_expected = getattr(idx, fn)('US/Pacific') df1 = DataFrame(np.ones(5), index=l0) df1 = getattr(df1, fn)('US/Pacific') assert_index_equal(df1.index, l0_expected) # MultiIndex # GH7846 df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) df3 = getattr(df2, fn)('US/Pacific', level=0) assert not df3.index.levels[0].equals(l0) assert_index_equal(df3.index.levels[0], l0_expected) assert_index_equal(df3.index.levels[1], l1) assert not df3.index.levels[1].equals(l1_expected) df3 = getattr(df2, fn)('US/Pacific', level=1) assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) # TODO: untested df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) # Bad Inputs # Not DatetimeIndex / PeriodIndex with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(index=int_idx) df = getattr(df, fn)('US/Pacific') # Not DatetimeIndex / PeriodIndex with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level with pytest.raises(ValueError, match='not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1)
def test_isin_nan_not_pypy(): idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, False])) tm.assert_numpy_array_equal(idx.isin([("bar", float("nan"))]), np.array([False, False]))
def check_query_with_unnamed_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # ## LEVEL 1 ind = Series(df.index.get_level_values(1).values, index=index) res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) exp = df[ind == 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) exp = df[ind != 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)
def test_from_arrays_invalid_input(invalid_sequence_of_arrays): msg = "Input must be a list / sequence of array-likes" with pytest.raises(TypeError, match=msg): MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)