def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): # GH: 25165 date_idx = date_range("2019", periods=2, freq="MS") df = DataFrame( list(range(4)), index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), ) expected = DataFrame( exp_values, index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), ) result = df[indexer] tm.assert_frame_equal(result, expected) result = df.loc[indexer] tm.assert_frame_equal(result, expected) result = df.loc(axis=0)[indexer] tm.assert_frame_equal(result, expected) result = df.loc[indexer, :] tm.assert_frame_equal(result, expected) df2 = df.swaplevel(0, 1).sort_index() expected = expected.swaplevel(0, 1).sort_index() result = df2.loc[:, indexer, :] tm.assert_frame_equal(result, expected)
def practice_five(): data = Series(np.randomrandn(10), index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]]) data.index data['b'] data['b':'c'] data.ix[['b', 'd']] data[:, 2] data.unstack() data.unstack().stack() # 重排分级顺序 frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['O', 'O', 'C'], ['G', 'R', 'G']]) frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame.swaplevel('key1', 'key2') frame.sortlevel(1) frame.swaplevel(0, 1).sortlevel(0) # 根据级别汇总统计 frame.sum(level='key2') frame.sum(level='color', axis=1) pass
def get_relevance_scores(player_stats: pd.DataFrame, roster_stats: pd.DataFrame) -> pd.DataFrame: num_players = len( player_stats.index.get_level_values(level=1).unique()) diffs = np.transpose(sigmoid(player_stats.swaplevel().values).reshape( (num_players, -1, 4)), axes=(1, 2, 0)) - sigmoid( roster_stats.values[..., None]) return (pd.DataFrame( data=np.transpose(diffs, axes=(2, 0, 1)).reshape( (27 * num_players, 4)), index=player_stats.swaplevel().index, columns=player_stats.swaplevel().columns, ).groupby(level=0).apply(lambda df: df.sum(axis=0)))
def genTable(self, col=['result', 'CPU (s)'], caption=None, label='tab'): ''' col is a subset of ['result', 'CPU (s)', 'CPU (ms)', 'Abs error', 'log error'] ''' tab = DataFrame(index=self.nlist, columns=[['result' for _ in self.fNameList], self.fNameList]).fillna(0) for key in col: tab = tab.add(self.frameDict[key], fill_value=0) if 'result' not in col: tab = tab.drop('result', axis=1, level=0) frame = tab.swaplevel(0, 1, axis=1).sortlevel(0, axis=1) print '\\begin{figure}[!t]' print '\\begin{center}' print frame.to_latex().encode('ascii', 'replace') print '\\end{center}' print '\\caption{' print caption print '}\label{' + label + '}' print '\\end{figure}' print
def _calculate_force_index(data: pd.DataFrame, period: int) -> pd.Series: data = data.swaplevel(0, 1, 1) return (data.groupby( level=0, axis=1).apply(lambda stock_data: volume.force_index( stock_data.xs("Close", level=1, axis=1).squeeze(), stock_data.xs("Volume", level=1, axis=1).squeeze(), n=period, )).iloc[-1])
def hierachicalIndexingDataFrame(): #Allows to have multiple (two or more) index levels on an axis. Provides a way for you to work with higher dimensional data in lower dimensional form df = DataFrame(np.arange(12).reshape(4,3), index=[['a','a','b','b'],[1,2,1,2]], columns=[['Ohio','Ohio','Colorado'], ['Green', 'Red','Green']]) print (df) print (df['Ohio']) print (df.unstack()) df.index.names = ['key1','key2'] df.columns.names = ['state','color'] print (df) print (df.swaplevel('key1','key2'))
def get_moments(df_betas: pd.DataFrame): """ Calculates moments for each series of betas and returns as a dataframe. """ df_select = df_betas.swaplevel("stat", "win_type", axis=1)["beta_1"] records = [] for s in df_select.items(): moments = stats.describe(s[1].dropna()) for stat in ["mean", "variance", "skewness", "kurtosis"]: record = {"win_type": s[0][0], "win_length": s[0][1]} record["stat"] = stat record["stat_value"] = moments._asdict()[stat] records.append(record) return pd.DataFrame(records)
def _calculate_keltner_bands(self, data: pd.DataFrame, period: int, multiplier) -> tuple: ema = self._calculate_ema(data, period) data = data.swaplevel(0, 1, 1) atr = (data.groupby( level=0, axis=1).apply(lambda stock_data: volatility.average_true_range( stock_data.xs("High", level=1, axis=1).squeeze(), stock_data.xs("Low", level=1, axis=1).squeeze(), stock_data.xs("Adj Close", level=1, axis=1).squeeze(), n=period, )).iloc[-1]) upper_band = ema + multiplier * atr lower_band = ema - multiplier * atr return upper_band, lower_band
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]]) import numpy as np data = Series(np.random.randn(10), index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]]) data data.index data[b] data['b'] data['b': 'c'] data.ix[['b', 'd']] # selection in an inner level data[:, 2] data.unstack() data.unstack().stack() from pandas import DataFrame frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame # Reordering and Sorting Levels frame.swaplevel('key1', 'key2') frame.sortlevel(1) # Summary Ststistics by Level frame.sum(level='key2')
['Seoul', 'Busan', 'Kwangju'], ['red', 'green', 'red'], ], ) print(df) # column index의 이름 정하기 df.columns.name = ['city', 'color'] print(df) df.index.names = ['key1', 'key2'] print(df) print(df['Seoul']) # 색인 계층의 순서를 바꾸기 # swaplevel() 메서드를 이용해서 바꾼다. print(df.swaplevel('key1', 'key2')) # key1과 key2를 바꾸겠다. # 사전식으로 계층을 바꾸어서 정렬하기 # sortlevel() 메서드를 이용해서 정렬한다. df2 = df.swaplevel('key1', 'key2') print(df2.sortlevel(0)) # 어느 깊이의 계층을 할것인지 정해준다. df3 = df.sortlevel(1) print(df3.swaplevel(0, 1)) # 숫자를 이용해도 가능 print(df.swaplevel(0, 1).sortlevel(0)) print(df.sortlevel(1).swaplevel(0, 1)) # 전부 다 같은 결과를 낸다. # sum
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['A', 'B', 'C'], ['d', 'e', 'f']]) print(test_df) # 而后行列的索引都可以具有名字 test_df.index.names = ['cha', 'num'] print(test_df) # 我们尝试对这个DataFrame通过索引进行调用 print(test_df[['A']]) print(test_df.ix[['a']]) print(test_df.ix['a']) print(test_df.ix['a', 'A']) # 交换索引次序 print(test_df.swaplevel('cha', 'num')) test_df.columns.names = ['big', 'small'] print(test_df) print(test_df.swaplevel('big', 'small', axis=1)) # 根据级别进行汇总统计 print(test_df.sum(level='cha')) # 列与行索引的相互转换 test_df_2 = DataFrame({ 'a': range(7), 'b': range(7, 0, -1), 'c': ['a', 'b', 'c', 'd', 'e', 'f', 'g'] })
df8 = df5.dropna(axis = 1, thresh = 2) # to scan across columns; thresh = min count of NA's before deleting df9 = df5.fillna(value = 0, axis = 1) # .fillna() scans columns for NAs, replaces them with 0 df10 = df5.fillna({0:0,1:1,2:2,3:3}) # use a dictionary to fill diff values for diff columns df5.fillna('NA', inplace= True ) # use inplace=True for permanent processing ############################################################################################################# # 5. Index Hierarchies: pandas allows you to create multiple levels of indexes ############################################################################################################# city = DataFrame(np.arange(16).reshape(4,4), index=[['a','a','b','b'],[1,2,1,2]], columns=[['NY','NY','LA','SF'],['cold','hot','hot','cold']]) city.index.names = ['Level-0','Level-1'] # name the indexes city.columns.names = ['Cities','Temp'] # name the columns city2 = city.swaplevel('Cities','Temp',axis=1) # swap level for columns city.sortlevel(1) # sort by Level-1 city3 = city.sum(level='Temp', axis=1) # math operations by levels - sums the rows city4 = city.sum(level='Level-0', axis=0) # math operations by levels - sums on columns ############################################################################################################# # 6. Importing Data ############################################################################################################# # Getting data intp Python like SAS's CARDS system from StringIO import StringIO data ="""\ Sample Type Intelligence 1 Dog Smart 2 Dog Smart 3 Cat Dumb
dframe # construct dataframe with multiple index levels dframe2 = DataFrame(np.arange(16).reshape(4,4), index = [['a','a','b','b'],[1,2,1,2]], columns = [['NY','NY','LA','SF'],['cold','hot','hot','cold']]) dframe2 # naming indexes and columns dframe2.index.names = ['INDEX_1', 'INDEX_2'] dframe2.columns.names = ['Cities','Temp'] dframe2 # Interchange index level orders dframe2.swaplevel('Cities','Temp',axis = 1) # sorting levels dframe2.sortlevel(level = 1, axis = 0) dframe2.sortlevel(level = 0, axis = 0) dframe2.sortlevel(level = 1, axis = 1) dframe2.sortlevel(level = 0, axis = 1) # summing on a specific level dframe2.sum(level = 'Temp', axis = 1)
frame['Ohio'] # ### Reordering and sorting levels(选讲) # In[ ]: frame # In[ ]: frame.swaplevel('key1', 'key2') # In[ ]: frame # In[ ]: frame.sort_index(1) #按照第1个index排序--注意index的序号是从0开始 # In[ ]:
['Green', 'Red', 'Green']]) frame # In[315]: frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame # In[316]: frame['Ohio'] # In[320]: frame.swaplevel('key1', 'key2') # In[322]: frame.sort_index(level=1) # In[326]: frame.swaplevel(0, 1).sort_index(1) # In[327]: frame.sum(level='key2') # In[328]:
print '索引层交换' frame = DataFrame(numpy.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['A', 'A', 'B'], ['A1', 'A2', 'B1']]) frame.index.names = ['key1', 'key2'] print frame ''' A B A1 A2 B1 key1 key2 a 1 0 1 2 2 3 4 5 b 1 6 7 8 2 9 10 11 ''' frame_swapped = frame.swaplevel('key1', 'key2') # 交互索引层 print frame_swapped ''' A B A1 A2 B1 key2 key1 1 a 0 1 2 2 a 3 4 5 1 b 6 7 8 2 b 9 10 11 ''' print frame_swapped.swaplevel(0, 1) # 交换回来 ''' A B A1 A2 B1 key1 key2
frame = DataFrame(numpy.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['A', 'A', 'B'], ['A1', 'A2', 'B1']]) frame.index.names = ['key1', 'key2'] print frame ''' A B A1 A2 B1 key1 key2 a 1 0 1 2 2 3 4 5 b 1 6 7 8 2 9 10 11 ''' frame_swapped = frame.swaplevel('key1', 'key2') # 交互索引层 print frame_swapped ''' A B A1 A2 B1 key2 key1 1 a 0 1 2 2 a 3 4 5 1 b 6 7 8 2 b 9 10 11 ''' print frame_swapped.swaplevel(0, 1) # 交换回来 ''' A B
df3.index df3 df3.ix["b", ].ix[1:, ] df3.ix["a":"b", ].ix[1:, ] df3.unstack().unstack() frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame frame.swaplevel("key1", "key2") frame frame.sortlevel(0) frame frame.sum(level="key1") frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame.sum(level="color", axis=1) frame.sum(level="state", axis=1) ser = Series(np.arange(3.), index=list("abc"))
from numpy.random import randn ser = Series(randn(6),index = [[1,1,1,2,2,2],['a','b','c','a','b','c']]) # 2 lists of indexes ser ser.index ser[1] ser[2] ser[:,'a'] df=ser.unstack() df df2 = DataFrame(np.arange(16).reshape(4,4),index = [['a','a','b','b'],[1,2,1,2]], columns = [['NY','NY','LA','SF'],['cold','hot','cold','hot']]) df2 #naming indices df2.index.names = ['Index_1','Index_2'] df2.columns.names = ['Cities','Temp'] df2 df2.swaplevel('Cities','Temp',axis = 1) #interchange the indices df2.sortlevel(1) df2.sortlevel(0) df2 df2.sortlevel(0,axis=1) df2.sum(axis=1) df2.sum(level = 'Temp',axis =1) #provide the sum for each level of the index separately
print(frame) print(frame.ix['a', 1]) # a表示key1,1表示key2,这条语句表示key1为a,key2为1的那一行 print(frame.ix['a', 2]['Colorado']) # 第一个[]表示行索引index,第二个[]表示列索引column print(frame.ix['a', 2]['Ohio']['Red']) print(frame.ix['a',2]['Ohio','Red']) print('直接用MultiIndex创建层次索引结构') print(MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']], names = ['state', 'color'])) # 这里索引的先后顺序会按照字母大小来排列 # reordering_and_sorting_levels 重新分级顺序 print('索引层级交换') frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame_swapped = frame.swaplevel('key1', 'key2') print(frame_swapped) print(frame_swapped.swaplevel(0, 1)) print('根据索引排序') # print(frame.sortlevel('key2')) # print(frame.swaplevel(0, 1).sortlevel(0)) print(frame.sort_index(level= 'key2')) # 根据key2排序 print(frame.swaplevel(0, 1).sort_index(level = 0)) # 交换index 0和1,再对index为0的进行排序 # 根据级别汇总统计 summary_statistics_by_level print('根据指定的key计算统计信息') frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] print(frame)
dframe = DataFrame(np.arange(16).reshape(4, 4), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['SF', 'SF', 'NY', 'NY'], ['Hot', 'Cold', 'Hot', 'Cold']]) dframe # name the Indexes dframe.index.names = ['INDEX1', 'INDEX2'] dframe.columns.names = ['City', 'Temp'] dframe # In[604]: # Swap levels dframe.swaplevel('City', 'Temp', axis=1) # In[611]: # Sort by level 0 index dframe.sort_index(level=0) # In[610]: #sort by level 1 index dframe.sort_index(level=1) # In[614]: dframe.sum() dframe.sum(level='Temp', axis=1)
• 通过stack与unstack变换DataFrame 层次化索引 重新分级顺序 • 索引交换 • 索引重新排序 ''' print('索引层级交换') frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame_swapped = frame.swaplevel('key1', 'key2') print(frame_swapped) print(frame_swapped.swaplevel(0, 1)) print('根据索引排序') print(frame.sortlevel('key2')) print(frame.swaplevel(0, 1).sortlevel(0)) ''' 层次化索引 根据级别汇总统计 • 指定索引级别和轴 层次化索引 使用DataFrame的列 • 将指定列变为索引 • 移除或保留对象
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby( [lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work self.ymd.index.levels = [ lev.astype('i8') for lev in self.ymd.index.levels ] self.ymd.index.names = ['year', 'month', 'day'] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) def test_reindex_level(self): # axis=0 month_sums = self.ymd.sum(level='month') result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) assert_series_equal(result, expected) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname) month_sums = self.ymd.sum(level='month') result = op(self.ymd, month_sums, level='month') broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) assert_frame_equal(result, expected) # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') broadcasted = self.ymd['A'].groupby(level='month').transform( np.sum) expected = op(self.ymd['A'], broadcasted) assert_series_equal(result, expected) _check_op('sub') _check_op('add') _check_op('mul') _check_op('div') def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) self.assertRaises(KeyError, df.__getitem__, 'foobar') def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s.reindex(s.index[49:51]) assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd['A'] # don't segfault, GH #495 # out of bounds access self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] expected = s[s > 0] assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s.values[42:65]).all()) self.assert_(notnull(s.values[:42]).all()) self.assert_(notnull(s.values[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.ix[:4] expected = self.frame[:4] assert_frame_equal(result, expected) # setitem cp = self.frame.copy() cp.ix[:4] = 0 self.assert_((cp.values[:4] == 0).all()) self.assert_((cp.values[4:] != 0).all()) def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a', 'b', 'c']] labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id']) df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.ix[:, 'value'] assert_series_equal(df['value'], result) result = df.ix[1:3, 'value'] assert_series_equal(df['value'][1:3], result) result = df.ix[:, :] assert_frame_equal(df, result) result = df df.ix[:, 'value'] = 10 result['value'] = 10 assert_frame_equal(df, result) df.ix[:, :] = 10 assert_frame_equal(df, result) def test_getitem_tuple_plus_slice(self): # GH #671 df = DataFrame({ 'a': range(10), 'b': range(10), 'c': np.random.randn(10), 'd': np.random.randn(10) }) idf = df.set_index(['a', 'b']) result = idf.ix[(0, 0), :] expected = idf.ix[0, 0] expected2 = idf.xs((0, 0)) assert_series_equal(result, expected) assert_series_equal(result, expected2) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.ix[('bar', 'two')] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.ix['foo'] expected = self.frame.T['foo'].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ('p', 'q', 'r')]) df = DataFrame(np.random.randn(3, 5), index=index) result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) def test_xs_level_multiple(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) def test_xs_level0(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs('a', level=0) expected = df.xs('a') self.assertEqual(len(result), 2) assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.ix[2000, 5]['A'] assert_series_equal(result, expected) # not implementing this for now self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) # assert_series_equal(result, expected) # can do this though def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df['bar'] result2 = df.ix[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) res = frame.ix[1:2] exp = frame.reindex(frame.index[2:]) assert_frame_equal(res, exp) frame.ix[1:2] = 7 self.assert_((frame.ix[1:2] == 7).values.all()) series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series.reindex(series.index[2:]) assert_series_equal(res, exp) series.ix[1:2] = 7 self.assert_((series.ix[1:2] == 7).values.all()) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_getitem_slice_not_sorted(self): df = self.frame.sortlevel(1).T # buglet with int typechecking result = df.ix[:, :np.int32(3)] expected = df.reindex(columns=df.columns[:3]) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.ix[('bar', 'two'), 'B'] = 5 self.assertEquals(self.frame.ix[('bar', 'two'), 'B'], 5) # with integer labels df = self.frame.copy() df.columns = range(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) def test_fancy_slice_partial(self): result = self.frame.ix['bar':'baz'] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) self.assertRaises(Exception, self.frame.reset_index()['A'].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [ tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1]) ] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() self.assert_(isinstance(deleveled, DataFrame)) self.assert_( len(deleveled.columns) == len(self.series.index.levels) + 1) deleveled = self.series.reset_index(drop=True) self.assert_(isinstance(deleveled, Series)) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] result = self.frame.sortlevel(level='second') expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df['foo'] = 'bar' sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft['foo', 'three'] = 'bar' sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), sorted_after.drop([('foo', 'three')], axis=1)) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype('i8') assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df['foo', 'four'] = 'foo' df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked['foo'], df['foo'].stack()) self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): df = DataFrame({ 'state': ['naive', 'naive', 'naive', 'activ', 'activ', 'activ'], 'exp': ['a', 'b', 'b', 'b', 'a', 'a'], 'barcode': [1, 2, 3, 4, 1, 3], 'v': ['hi', 'hi', 'bye', 'bye', 'bye', 'peace'], 'extra': np.arange(6.) }) result = df.groupby(['state', 'exp', 'barcode', 'v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, 'first') self.assertEquals(unstacked.columns.names, ['exp', 'second']) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1) assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1) assert_frame_equal(unstacked, expected) def test_groupby_transform(self): s = self.frame['A'] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], labels=[[0], [0], [0]], names=['one', 'two', 'three']) df = DataFrame([np.random.rand(4)], columns=['a', 'b', 'c', 'd'], index=midx) # should work df.groupby(level='three') def test_join(self): a = self.frame.ix[:5, ['A']] b = self.frame.ix[2:, ['B', 'C']] joined = a.join(b, how='outer').reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) swapped2 = self.frame['A'].swaplevel('first', 'second') self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel('second', 'first') self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])) y = Series(data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) df['foo']['one'] = 2 self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df['foo'] result2 = df.ix[:, 'foo'] expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs('foo') result2 = df.ix['foo'] expected = df.reindex(df.index[arrays[0] == 'foo']) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s['qux'] result2 = s.ix['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) AGG_FUNCTIONS = [ 'sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var' ] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] self.assert_(leftside._get_axis(axis).equals(level_index)) self.assert_(rightside._get_axis(axis).equals(level_index)) assert_frame_equal(leftside, rightside) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd['A'].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd['A'].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df['A'].ix[2000, 4] = 1 exp['A'].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df['A'].ix[14] = 5 self.assertEquals(df['A'][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd['E'] = 'foo' self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') self.assert_(unstacked['A', 1].dtype == np.float64) self.assert_(unstacked['E', 1].dtype == np.object_) self.assert_(unstacked['F', 1].dtype == np.float64) def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (('bar', 'three'), 'B')) self.assertRaises(KeyError, self.frame.ix.__setitem__, (('bar', 'three'), 'B'), 0) #---------------------------------------------------------------------- # AMBIGUOUS CASES! def test_partial_ix_missing(self): raise nose.SkipTest result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]['A'] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) def test_fancy_2d(self): raise nose.SkipTest result = self.frame.ix['foo', 'B'] expected = self.frame.xs('foo')['B'] assert_series_equal(result, expected) ft = self.frame.T result = ft.ix['B', 'foo'] expected = ft.xs('B')['foo'] assert_series_equal(result, expected) #---------------------------------------------------------------------- def test_to_html(self): self.ymd.columns.name = 'foo' self.ymd.to_html() self.ymd.T.to_html() def test_level_with_tuples(self): index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), ('foo', 'qux', 0)], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar', 0)] result2 = series.ix[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) result = frame.ix[('foo', 'bar', 0)] result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ('foo', 'qux')], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar')] result2 = series.ix[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = frame.ix[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 self.assert_(np.array_equal(s.values, exp.values)) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df['a'] expected = df['a', '', ''] assert_series_equal(result, expected) self.assertEquals(result.name, 'a') result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] assert_series_equal(result, expected) self.assertEquals(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.copy() expected = df.copy() result['b'] = [1, 2, 3, 4] expected['b', '', ''] = [1, 2, 3, 4] assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.drop('a', axis=1) expected = df.drop([('a', '', '')], axis=1) assert_frame_equal(expected, result) result = df.drop(['top'], axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) expected = expected.drop([('top', 'OD', 'wy')], axis=1) assert_frame_equal(expected, result) def test_mixed_depth_pop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) df1 = df.copy() df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a', '', '')) assert_series_equal(expected, result) assert_frame_equal(df1, df2) self.assertEquals(result.name, 'a') expected = df1['top'] df1 = df1.drop(['top'], axis=1) result = df2.pop('top') assert_frame_equal(expected, result) assert_frame_equal(df1, df2)
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")) self.single_level = MultiIndex(levels=[["foo", "bar", "baz", "qux"]], labels=[[0, 1, 2, 3]], names=["first"]) # create test series object arrays = [ ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work self.ymd.index.levels = [lev.astype("i8") for lev in self.ymd.index.levels] self.ymd.index.names = ["year", "month", "day"] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a["A"].append(b["A"]) tm.assert_series_equal(result, self.frame["A"]) def test_reindex_level(self): # axis=0 month_sums = self.ymd.sum(level="month") result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level="month").transform(np.sum) assert_frame_equal(result, expected) # Series result = month_sums["A"].reindex(self.ymd.index, level=1) expected = self.ymd["A"].groupby(level="month").transform(np.sum) assert_series_equal(result, expected) # axis=1 month_sums = self.ymd.T.sum(axis=1, level="month") result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level="month").transform(np.sum).T assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname) month_sums = self.ymd.sum(level="month") result = op(self.ymd, month_sums, level="month") broadcasted = self.ymd.groupby(level="month").transform(np.sum) expected = op(self.ymd, broadcasted) assert_frame_equal(result, expected) # Series op = getattr(Series, opname) result = op(self.ymd["A"], month_sums["A"], level="month") broadcasted = self.ymd["A"].groupby(level="month").transform(np.sum) expected = op(self.ymd["A"], broadcasted) assert_series_equal(result, expected) _check_op("sub") _check_op("add") _check_op("mul") _check_op("div") def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[("foo", "one"), ("bar", "one")]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_getitem_simple(self): df = self.frame.T col = df["foo", "one"] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ("foo", "four")) self.assertRaises(KeyError, df.__getitem__, "foobar") def test_series_getitem(self): s = self.ymd["A"] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s.reindex(s.index[49:51]) assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd["A"] # don't segfault, GH #495 # out of bounds access self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] expected = s[s > 0] assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd["A"] s[2000, 3] = np.nan self.assert_(isnull(s.values[42:65]).all()) self.assert_(notnull(s.values[:42]).all()) self.assert_(notnull(s.values[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.ix[:4] expected = self.frame[:4] assert_frame_equal(result, expected) # setitem cp = self.frame.copy() cp.ix[:4] = 0 self.assert_((cp.values[:4] == 0).all()) self.assert_((cp.values[4:] != 0).all()) def test_frame_getitem_setitem_multislice(self): levels = [["t1", "t2"], ["a", "b", "c"]] labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] midx = MultiIndex(labels=labels, levels=levels, names=[None, "id"]) df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) result = df.ix[:, "value"] assert_series_equal(df["value"], result) result = df.ix[1:3, "value"] assert_series_equal(df["value"][1:3], result) result = df.ix[:, :] assert_frame_equal(df, result) result = df df.ix[:, "value"] = 10 result["value"] = 10 assert_frame_equal(df, result) df.ix[:, :] = 10 assert_frame_equal(df, result) def test_getitem_tuple_plus_slice(self): # GH #671 df = DataFrame({"a": range(10), "b": range(10), "c": np.random.randn(10), "d": np.random.randn(10)}) idf = df.set_index(["a", "b"]) result = idf.ix[(0, 0), :] expected = idf.ix[0, 0] expected2 = idf.xs((0, 0)) assert_series_equal(result, expected) assert_series_equal(result, expected2) def test_xs(self): xs = self.frame.xs(("bar", "two")) xs2 = self.frame.ix[("bar", "two")] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs("foo") result2 = self.frame.ix["foo"] expected = self.frame.T["foo"].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_xs_level(self): result = self.frame.xs("two", level="second") expected = self.frame[self.frame.index.get_level_values(1) == "two"] expected.index = expected.index.droplevel(1) assert_frame_equal(result, expected) index = MultiIndex.from_tuples([("x", "y", "z"), ("a", "b", "c"), ("p", "q", "r")]) df = DataFrame(np.random.randn(3, 5), index=index) result = df.xs("c", level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) def test_xs_level_multiple(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep="\s+") result = df.xs(("a", 4), level=["one", "four"]) expected = df.xs("a").xs(4, level="four") assert_frame_equal(result, expected) def test_xs_level0(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep="\s+") result = df.xs("a", level=0) expected = df.xs("a") self.assertEqual(len(result), 2) assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame["A"] result = s[:, "two"] expected = self.frame.xs("two", level=1)["A"] assert_series_equal(result, expected) s = self.ymd["A"] result = s[2000, 5] expected = self.ymd.ix[2000, 5]["A"] assert_series_equal(result, expected) # not implementing this for now self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) # assert_series_equal(result, expected) # can do this though def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df["foo"] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df["bar"] result2 = df.ix[:, "bar"] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"]) res = frame.ix[1:2] exp = frame.reindex(frame.index[2:]) assert_frame_equal(res, exp) frame.ix[1:2] = 7 self.assert_((frame.ix[1:2] == 7).values.all()) series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series.reindex(series.index[2:]) assert_series_equal(res, exp) series.ix[1:2] = 7 self.assert_((series.ix[1:2] == 7).values.all()) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_getitem_slice_not_sorted(self): df = self.frame.sortlevel(1).T # buglet with int typechecking result = df.ix[:, : np.int32(3)] expected = df.reindex(columns=df.columns[:3]) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft["foo", "two"] dft["foo", "two"] = s > s.median() assert_series_equal(dft["foo", "two"], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[("foo", "two")]) assert_series_equal(reindexed["foo", "two"], s > s.median()) def test_frame_setitem_ix(self): self.frame.ix[("bar", "two"), "B"] = 5 self.assertEquals(self.frame.ix[("bar", "two"), "B"], 5) # with integer labels df = self.frame.copy() df.columns = range(3) df.ix[("bar", "two"), 1] = 7 self.assertEquals(df.ix[("bar", "two"), 1], 7) def test_fancy_slice_partial(self): result = self.frame.ix["bar":"baz"] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame["A"].sortlevel(0) self.assertRaises(Exception, self.frame.reset_index()["A"].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled["prm1"])) self.assert_(com.is_float_dtype(deleveled["prm2"])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() self.assert_(isinstance(deleveled, DataFrame)) self.assert_(len(deleveled.columns) == len(self.series.index.levels) + 1) deleveled = self.series.reset_index(drop=True) self.assert_(isinstance(deleveled, Series)) def test_sortlevel_by_name(self): self.frame.index.names = ["first", "second"] result = self.frame.sortlevel(level="second") expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df["foo"] = "bar" sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft["foo", "three"] = "bar" sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal( sorted_before.drop([("foo", "three")], axis=1), sorted_after.drop([("foo", "three")], axis=1) ) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype("i8") assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame["D"] = "foo" result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ["A", "B", "C"]) def test_count_level_series(self): index = MultiIndex( levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]] ) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype("f8"), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype("f8"), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame["A"][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df["foo", "four"] = "foo" df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked["foo"], df["foo"].stack()) self.assert_(stacked["bar"].dtype == np.float_) def test_unstack_bug(self): df = DataFrame( { "state": ["naive", "naive", "naive", "activ", "activ", "activ"], "exp": ["a", "b", "b", "b", "a", "a"], "barcode": [1, 2, 3, 4, 1, 3], "v": ["hi", "hi", "bye", "bye", "bye", "peace"], "extra": np.arange(6.0), } ) result = df.groupby(["state", "exp", "barcode", "v"]).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, "first") self.assertEquals(unstacked.columns.names, ["exp", "second"]) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack("second") expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack("second") result = unstacked.stack("exp") expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack("exp") expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(["year", "month"]) expected = self.ymd.unstack("year").unstack("month") assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd["A"] s_unstacked = s.unstack(["year", "month"]) assert_frame_equal(s_unstacked, expected["A"]) restacked = unstacked.stack(["year", "month"]) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1) assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1) assert_frame_equal(unstacked, expected) def test_groupby_transform(self): s = self.frame["A"] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_groupby_corner(self): midx = MultiIndex(levels=[["foo"], ["bar"], ["baz"]], labels=[[0], [0], [0]], names=["one", "two", "three"]) df = DataFrame([np.random.rand(4)], columns=["a", "b", "c", "d"], index=midx) # should work df.groupby(level="three") def test_join(self): a = self.frame.ix[:5, ["A"]] b = self.frame.ix[2:, ["B", "C"]] joined = a.join(b, how="outer").reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame["A"].swaplevel(0, 1) swapped2 = self.frame["A"].swaplevel("first", "second") self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel("second", "first") self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel("first", "second", axis=1) exp = self.frame.swaplevel("first", "second").T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({"ItemA": self.frame, "ItemB": self.frame * 2}) result = panel.swaplevel(0, 1, axis="major") expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(["month", "day", "year"]) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) assert_frame_equal(result, expected) result = self.ymd["A"].reorder_levels(["month", "day", "year"]) expected = self.ymd["A"].swaplevel(0, 1).swaplevel(1, 2) assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(["month", "day", "year"], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])) y = Series(data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df["foo"].values[:] = 0 self.assert_((df["foo"].values == 0).all()) # but not if it's mixed-type df["foo", "four"] = "foo" df = df.sortlevel(0, axis=1) df["foo"]["one"] = 2 self.assert_((df["foo", "one"] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df["foo", "four"] = "foo" arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df["foo"] result2 = df.ix[:, "foo"] expected = df.reindex(columns=df.columns[arrays[0] == "foo"]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs("foo") result2 = df.ix["foo"] expected = df.reindex(df.index[arrays[0] == "foo"]) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [ ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s["qux"] result2 = s.ix["qux"] expected = s[arrays[0] == "qux"] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) def test_count(self): frame = self.frame.copy() frame.index.names = ["a", "b"] result = frame.count(level="b") expect = self.frame.count(level=1) assert_frame_equal(result, expect) result = frame.count(level="a") expect = self.frame.count(level=0) assert_frame_equal(result, expect) series = self.series.copy() series.index.names = ["a", "b"] result = series.count(level="b") expect = self.series.count(level=1) assert_series_equal(result, expect) result = series.count(level="a") expect = self.series.count(level=0) assert_series_equal(result, expect) self.assertRaises(Exception, series.count, "x") self.assertRaises(Exception, frame.count, level="x") AGG_FUNCTIONS = ["sum", "prod", "min", "max", "median", "mean", "skew", "mad", "std", "var"] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] self.assert_(leftside._get_axis(axis).equals(level_index)) self.assert_(rightside._get_axis(axis).equals(level_index)) assert_frame_equal(leftside, rightside) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=["year", "month"]) expected = self.ymd.groupby(level=["year", "month"]).sum() assert_frame_equal(result, expected) result = self.ymd["A"].sum(level=["year", "month"]) expected = self.ymd["A"].groupby(level=["year", "month"]).sum() assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df["Totals", ""] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd["A"].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd["A"].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df["A"].ix[2000, 4] = 1 exp["A"].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df["A"].ix[14] = 5 self.assertEquals(df["A"][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd["E"] = "foo" self.ymd["F"] = 2 unstacked = self.ymd.unstack("month") self.assert_(unstacked["A", 1].dtype == np.float64) self.assert_(unstacked["E", 1].dtype == np.object_) self.assert_(unstacked["F", 1].dtype == np.float64) def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (("bar", "three"), "B")) self.assertRaises(KeyError, self.frame.ix.__setitem__, (("bar", "three"), "B"), 0) # ---------------------------------------------------------------------- # AMBIGUOUS CASES! def test_partial_ix_missing(self): raise nose.SkipTest result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]["A"] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) def test_fancy_2d(self): raise nose.SkipTest result = self.frame.ix["foo", "B"] expected = self.frame.xs("foo")["B"] assert_series_equal(result, expected) ft = self.frame.T result = ft.ix["B", "foo"] expected = ft.xs("B")["foo"] assert_series_equal(result, expected) # ---------------------------------------------------------------------- def test_to_html(self): self.ymd.columns.name = "foo" self.ymd.to_html() self.ymd.T.to_html() def test_level_with_tuples(self): index = MultiIndex( levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[("foo", "bar", 0)] result2 = series.ix[("foo", "bar", 0)] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (("foo", "bar", 0), 2)) result = frame.ix[("foo", "bar", 0)] result2 = frame.xs(("foo", "bar", 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) index = MultiIndex( levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[("foo", "bar")] result2 = series.ix[("foo", "bar")] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = frame.ix[("foo", "bar")] result2 = frame.xs(("foo", "bar")) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd["A"] result = s[5:] expected = s.reindex(s.index[5:]) assert_series_equal(result, expected) exp = self.ymd["A"].copy() s[5:] = 0 exp.values[5:] = 0 self.assert_(np.array_equal(s.values, exp.values)) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [ ["a", "top", "top", "routine1", "routine1", "routine2"], ["", "OD", "OD", "result1", "result2", "result1"], ["", "wx", "wy", "", "", ""], ] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df["a"] expected = df["a", "", ""] assert_series_equal(result, expected) self.assertEquals(result.name, "a") result = df["routine1", "result1"] expected = df["routine1", "result1", ""] assert_series_equal(result, expected) self.assertEquals(result.name, ("routine1", "result1")) def test_mixed_depth_insert(self): arrays = [ ["a", "top", "top", "routine1", "routine1", "routine2"], ["", "OD", "OD", "result1", "result2", "result1"], ["", "wx", "wy", "", "", ""], ] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.copy() expected = df.copy() result["b"] = [1, 2, 3, 4] expected["b", "", ""] = [1, 2, 3, 4] assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [ ["a", "top", "top", "routine1", "routine1", "routine2"], ["", "OD", "OD", "result1", "result2", "result1"], ["", "wx", "wy", "", "", ""], ] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) result = df.drop("a", axis=1) expected = df.drop([("a", "", "")], axis=1) assert_frame_equal(expected, result) result = df.drop(["top"], axis=1) expected = df.drop([("top", "OD", "wx")], axis=1) expected = expected.drop([("top", "OD", "wy")], axis=1) assert_frame_equal(expected, result) def test_mixed_depth_pop(self): arrays = [ ["a", "top", "top", "routine1", "routine1", "routine2"], ["", "OD", "OD", "result1", "result2", "result1"], ["", "wx", "wy", "", "", ""], ] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) df1 = df.copy() df2 = df.copy() result = df1.pop("a") expected = df2.pop(("a", "", "")) assert_series_equal(expected, result) assert_frame_equal(df1, df2) self.assertEquals(result.name, "a") expected = df1["top"] df1 = df1.drop(["top"], axis=1) result = df2.pop("top") assert_frame_equal(expected, result) assert_frame_equal(df1, df2)
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() self.ymd.index.names = ['year', 'month', 'day'] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) self.assertRaises(KeyError, df.__getitem__, 'foobar') def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s[42:65] expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s[49:51] assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s[42:65]).all()) self.assert_(notnull(s[:42]).all()) self.assert_(notnull(s[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.ix[('bar', 'two')] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.ix['foo'] expected = self.frame.T['foo'].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_fancy_2d(self): result = self.frame.ix['foo', 'B'] expected = self.frame.xs('foo')['B'] assert_series_equal(result, expected) ft = self.frame.T result = ft.ix['B', 'foo'] expected = ft.xs('B')['foo'] assert_series_equal(result, expected) def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df['bar'] result2 = df.ix[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_fancy_slice_partial(self): result = self.frame.ix['bar':'baz'] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000,2):(2000,4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) self.assertRaises(Exception, self.frame.delevel()['A'].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.delevel() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] result = self.frame.sortlevel(level='second') expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df['foo'] = 'bar' sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft['foo', 'three'] = 'bar' sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), sorted_after.drop([('foo', 'three')], axis=1)) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype('i8') assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df['foo', 'four'] = 'foo' df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked['foo'], df['foo'].stack()) self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): df = DataFrame({'state': ['naive','naive','naive', 'activ','activ','activ'], 'exp':['a','b','b','b','a','a'], 'barcode':[1,2,3,4,1,3], 'v':['hi','hi','bye','bye','bye','peace'], 'extra': np.arange(6.)}) result = df.groupby(['state','exp','barcode','v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, 'first') self.assertEquals(unstacked.columns.names, ['exp', 'second']) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1) assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1) assert_frame_equal(unstacked, expected) def test_groupby_transform(self): s = self.frame['A'] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_join(self): a = self.frame.ix[:5, ['A']] b = self.frame.ix[2:, ['B', 'C']] joined = a.join(b, how='outer').reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) swapped2 = self.frame['A'].swaplevel('first', 'second') self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel('second', 'first') self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA' : self.frame, 'ItemB' : self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1,2,3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B",3)])) y = Series(data=[4,5,6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B",3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) df['foo']['one'] = 2 self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df['foo'] result2 = df.ix[:, 'foo'] expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs('foo') result2 = df.ix['foo'] expected = df.reindex(df.index[arrays[0] == 'foo']) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s['qux'] result2 = s.ix['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var'] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) assert_frame_equal(leftside, rightside) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd['A'].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd['A'].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df['A'].ix[2000, 4] = 1 exp['A'].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df['A'].ix[14] = 5 self.assertEquals(df['A'][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd['E'] = 'foo' self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') self.assert_(unstacked['A', 1].dtype == np.float64) self.assert_(unstacked['E', 1].dtype == np.object_) self.assert_(unstacked['F', 1].dtype == np.float64) def test_partial_ix_missing(self): result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]['A'] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) def test_to_html(self): self.ymd.columns.name = 'foo' self.ymd.to_html() self.ymd.T.to_html()
data['b':'c'] data.ix['b','d'] data.ix[['b','d']] data[:,2] data.unstack() data.unstack().stacl() data.unstack().stack() frame = DataFrame(np.arange(12).reshape((4,3)),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']]) frame frame.index.names=['key1','key2'] frame.columns.names['state','color'] frame.columns.names=['state','color'] frame frame['Ohio'] MultiIndex.from_arrays frame.swaplevel('key1','key2') frame.sortlevel(1) frame.swaplevel(0,1).sortlevel(0) frame.sum(level='key2') frame.sum(level='color',axis=1) frame = DataFrame({'a':range(7),'b':range(7,0,-1), 'c':['one','one','one','two','two','two','two'],'d':[0,1,2,0,1,2,3]}) frame frame2 = frame.set_index('c','d']) frame2 = frame.set_index(['c','d']) frame2 frame2 = frame.set_index(['c','d'],drop=False) frame2 frame2.reset_index() ser = Series(np.arange(3)) ser ser[-1]
frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] print(frame) print('\n') ############################################################### ''' MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']], names = ['state','color']) ''' ############################################################### print(frame.swaplevel('key1', 'key2')) print('\n') print(frame.sortlevel(1)) print('\n') print(frame.sortlevel(0)) print('\n') print(frame.swaplevel(0, 1).sortlevel(0)) print('\n') ############################################################### print(frame.sum(level='key2')) print('\n') print(frame.sum(level='color',axis=1)) print('\n')
# -*- coding: utf-8 -*- import numpy as np from pandas import Series, DataFrame print('索引层级交换') frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) print(frame) frame.index.names = ['key1', 'key2'] frame_swapped = frame.swaplevel('key1', 'key2') print(frame_swapped) print(frame_swapped.swaplevel(0, 1)) print() print('根据索引排序') print(frame.sort_index(1)) print(frame.swaplevel(0, 1).sort_index(1))
#Now we can sleect specific subsets ser1[1] ser1[2] # We can also select from an internal index level ser1[1st level,2nd level] ser1[:, "a"] # We can also create Data Frames from Series with multiple levels dframe = ser1.unstack() dframe # We can also apply multiple level indexing to DataFrames dframe2 = DataFrame(np.arange(16).reshape(4, 4), index=[["a", "a", "b", "b"], [1, 2, 1, 2]], columns=[["NY", "NY", "LA", "SF"], ["cold", "hot", "hot", "cold"]]) dframe2 # We can also give these index levels names #Name the index levels dframe2.index.names = ["index_1", "index_2"] #Name the column levels dframe2.columns.names = ["Cities", "Temp"] dframe2 # We can also interchange level orders (note the axis=1 for columns) dframe2.swaplevel("Cities", "Temp", axis=1) #We can also sort levels dframe2.sort_index(level=1) #workaround sortlevel is removed #Note the change in sorting, now the Dframe index is sorted by the INDEX_2 #We can also perform operations on particular levels dframe2.sum(level=1, axis=1) #we can use number or name in level
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work self.ymd.index.levels = [lev.astype('i8') for lev in self.ymd.index.levels] self.ymd.index.names = ['year', 'month', 'day'] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) self.assert_(isinstance(multi.index, MultiIndex)) self.assert_(not isinstance(multi.columns, MultiIndex)) multi = DataFrame(np.random.randn(4, 4), columns=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) self.assert_(isinstance(multi.columns, MultiIndex)) def test_series_constructor(self): multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) self.assert_(isinstance(multi.index, MultiIndex)) multi = Series(1., index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) self.assert_(isinstance(multi.index, MultiIndex)) multi = Series(range(4), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) self.assert_(isinstance(multi.index, MultiIndex)) def test_reindex_level(self): # axis=0 month_sums = self.ymd.sum(level='month') result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) assert_series_equal(result, expected) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname) month_sums = self.ymd.sum(level='month') result = op(self.ymd, month_sums, level='month') broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) assert_frame_equal(result, expected) # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') broadcasted = self.ymd['A'].groupby(level='month').transform(np.sum) expected = op(self.ymd['A'], broadcasted) assert_series_equal(result, expected) _check_op('sub') _check_op('add') _check_op('mul') _check_op('div') def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_repr_name_coincide(self): index = MultiIndex.from_tuples([('a', 0, 'foo'), ('b', 1, 'bar')], names=['a', 'b', 'c']) df = DataFrame({'value': [0, 1]}, index=index) lines = repr(df).split('\n') self.assert_(lines[2].startswith('a 0 foo')) def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) self.assertRaises(KeyError, df.__getitem__, 'foobar') def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s.reindex(s.index[49:51]) assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd['A'] # don't segfault, GH #495 # out of bounds access self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] expected = s[s > 0] assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s.values[42:65]).all()) self.assert_(notnull(s.values[:42]).all()) self.assert_(notnull(s.values[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.ix[:4] expected = self.frame[:4] assert_frame_equal(result, expected) # setitem cp = self.frame.copy() cp.ix[:4] = 0 self.assert_((cp.values[:4] == 0).all()) self.assert_((cp.values[4:] != 0).all()) def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a','b','c']] labels = [[0,0,0,1,1], [0,1,2,0,1]] midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id']) df = DataFrame({'value':[1,2,3,7,8]}, index=midx) result = df.ix[:,'value'] assert_series_equal(df['value'], result) result = df.ix[1:3,'value'] assert_series_equal(df['value'][1:3], result) result = df.ix[:,:] assert_frame_equal(df, result) result = df df.ix[:, 'value'] = 10 result['value'] = 10 assert_frame_equal(df, result) df.ix[:,:] = 10 assert_frame_equal(df, result) def test_getitem_tuple_plus_slice(self): # GH #671 df = DataFrame({'a' : range(10), 'b' : range(10), 'c' : np.random.randn(10), 'd' : np.random.randn(10)}) idf = df.set_index(['a', 'b']) result = idf.ix[(0, 0), :] expected = idf.ix[0, 0] expected2 = idf.xs((0, 0)) assert_series_equal(result, expected) assert_series_equal(result, expected2) def test_getitem_setitem_tuple_plus_columns(self): # GH #1013 df = self.ymd[:5] result = df.ix[(2000, 1, 6), ['A', 'B', 'C']] expected = df.ix[2000, 1, 6][['A', 'B', 'C']] assert_series_equal(result, expected) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.ix[('bar', 'two')] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.ix['foo'] expected = self.frame.T['foo'].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ('p', 'q', 'r')]) df = DataFrame(np.random.randn(3, 5), index=index) result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) def test_xs_level_multiple(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) def test_xs_level0(self): from pandas import read_table from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = read_table(StringIO(text), sep='\s+') result = df.xs('a', level=0) expected = df.xs('a') self.assertEqual(len(result), 2) assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.ix[2000, 5]['A'] assert_series_equal(result, expected) # not implementing this for now self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) # assert_series_equal(result, expected) # can do this though def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df['bar'] result2 = df.ix[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) res = frame.ix[1:2] exp = frame.reindex(frame.index[2:]) assert_frame_equal(res, exp) frame.ix[1:2] = 7 self.assert_((frame.ix[1:2] == 7).values.all()) series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series.reindex(series.index[2:]) assert_series_equal(res, exp) series.ix[1:2] = 7 self.assert_((series.ix[1:2] == 7).values.all()) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_getitem_slice_not_sorted(self): df = self.frame.sortlevel(1).T # buglet with int typechecking result = df.ix[:, :np.int32(3)] expected = df.reindex(columns=df.columns[:3]) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.ix[('bar', 'two'), 'B'] = 5 self.assertEquals(self.frame.ix[('bar', 'two'), 'B'], 5) # with integer labels df = self.frame.copy() df.columns = range(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) def test_fancy_slice_partial(self): result = self.frame.ix['bar':'baz'] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000,2):(2000,4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) self.assertRaises(Exception, self.frame.reset_index()['A'].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop = True) self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() self.assert_(isinstance(deleveled, DataFrame)) self.assert_(len(deleveled.columns) == len(self.series.index.levels)+1) deleveled = self.series.reset_index(drop = True) self.assert_(isinstance(deleveled, Series)) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] result = self.frame.sortlevel(level='second') expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df['foo'] = 'bar' sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft['foo', 'three'] = 'bar' sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), sorted_after.drop([('foo', 'three')], axis=1)) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype('i8') assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_unstack_multiple_no_empty_columns(self): index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0), (1, 'baz', 1), (1, 'qux', 1)]) s = Series(np.random.randn(4), index=index) unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how='all') assert_frame_equal(unstacked, expected) def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df['foo', 'four'] = 'foo' df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked['foo'], df['foo'].stack()) self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): df = DataFrame({'state': ['naive','naive','naive', 'activ','activ','activ'], 'exp':['a','b','b','b','a','a'], 'barcode':[1,2,3,4,1,3], 'v':['hi','hi','bye','bye','bye','peace'], 'extra': np.arange(6.)}) result = df.groupby(['state','exp','barcode','v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, 'first') self.assertEquals(unstacked.columns.names, ['exp', 'second']) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how='all') assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all') assert_frame_equal(unstacked, expected.ix[:, unstacked.columns]) def test_groupby_transform(self): s = self.frame['A'] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'],['bar'],['baz']], labels=[[0],[0],[0]], names=['one','two','three']) df = DataFrame([np.random.rand(4)], columns=['a','b','c','d'], index=midx) # should work df.groupby(level='three') def test_join(self): a = self.frame.ix[:5, ['A']] b = self.frame.ix[2:, ['B', 'C']] joined = a.join(b, how='outer').reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) swapped2 = self.frame['A'].swaplevel('first', 'second') self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel('second', 'first') self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA' : self.frame, 'ItemB' : self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1,2,3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B",3)])) y = Series(data=[4,5,6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B",3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) df['foo']['one'] = 2 self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df['foo'] result2 = df.ix[:, 'foo'] expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs('foo') result2 = df.ix['foo'] expected = df.reindex(df.index[arrays[0] == 'foo']) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s['qux'] result2 = s.ix['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) def test_count(self): frame = self.frame.copy() frame.index.names = ['a', 'b'] result = frame.count(level='b') expect = self.frame.count(level=1) assert_frame_equal(result, expect) result = frame.count(level='a') expect = self.frame.count(level=0) assert_frame_equal(result, expect) series = self.series.copy() series.index.names = ['a', 'b'] result = series.count(level='b') expect = self.series.count(level=1) assert_series_equal(result, expect) result = series.count(level='a') expect = self.series.count(level=0) assert_series_equal(result, expect) self.assertRaises(Exception, series.count, 'x') self.assertRaises(Exception, frame.count, level='x') AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var'] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) pieces = [] def aggf(x): pieces.append(x) return getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] self.assert_(leftside._get_axis(axis).equals(level_index)) self.assert_(rightside._get_axis(axis).equals(level_index)) assert_frame_equal(leftside, rightside) def test_std_var_pass_ddof(self): index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile(np.arange(10), 5)]) df = DataFrame(np.random.randn(len(index), 5), index=index) for meth in ['var', 'std']: ddof = 4 alt = lambda x: getattr(x, meth)(ddof=ddof) result = getattr(df[0], meth)(level=0, ddof=ddof) expected = df[0].groupby(level=0).agg(alt) assert_series_equal(result, expected) result = getattr(df, meth)(level=0, ddof=ddof) expected = df.groupby(level=0).agg(alt) assert_frame_equal(result, expected) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd['A'].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd['A'].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df['A'].ix[2000, 4] = 1 exp['A'].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df['A'].ix[14] = 5 self.assertEquals(df['A'][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd['E'] = 'foo' self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') self.assert_(unstacked['A', 1].dtype == np.float64) self.assert_(unstacked['E', 1].dtype == np.object_) self.assert_(unstacked['F', 1].dtype == np.float64) def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (('bar', 'three'), 'B')) self.assertRaises(KeyError, self.frame.ix.__setitem__, (('bar', 'three'), 'B'), 0) #---------------------------------------------------------------------- # AMBIGUOUS CASES! def test_partial_ix_missing(self): raise nose.SkipTest result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]['A'] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) #---------------------------------------------------------------------- def test_to_html(self): self.ymd.columns.name = 'foo' self.ymd.to_html() self.ymd.T.to_html() def test_level_with_tuples(self): index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), ('foo', 'qux', 0)], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar', 0)] result2 = series.ix[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) result = frame.ix[('foo', 'bar', 0)] result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ('foo', 'qux')], [0, 1]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) result = series[('foo', 'bar')] result2 = series.ix[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = frame.ix[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 self.assert_(np.array_equal(s.values, exp.values)) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], [ '', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4,6),columns = index) result = df['a'] expected = df['a','',''] assert_series_equal(result, expected) self.assertEquals(result.name, 'a') result = df['routine1','result1'] expected = df['routine1','result1',''] assert_series_equal(result, expected) self.assertEquals(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], [ '', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4,6),columns = index) result = df.copy() expected = df.copy() result['b'] = [1,2,3,4] expected['b','',''] = [1,2,3,4] assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], [ '', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4,6),columns = index) result = df.drop('a',axis=1) expected = df.drop([('a','','')],axis=1) assert_frame_equal(expected, result) result = df.drop(['top'],axis=1) expected = df.drop([('top','OD','wx')], axis=1) expected = expected.drop([('top','OD','wy')], axis=1) assert_frame_equal(expected, result) result = df.drop(('top', 'OD', 'wx'), axis=1) expected = df.drop([('top','OD','wx')], axis=1) assert_frame_equal(expected, result) expected = df.drop([('top','OD','wy')], axis=1) expected = df.drop('top', axis=1) result = df.drop('result1', level=1, axis=1) expected = df.drop([('routine1', 'result1', ''), ('routine2', 'result1', '')], axis=1) assert_frame_equal(expected, result) def test_mixed_depth_pop(self): arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], [ '', 'OD', 'OD', 'result1', 'result2', 'result1'], [ '', 'wx', 'wy', '', '', '']] tuples = zip(*arrays) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4,6),columns = index) df1 = df.copy() df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a','','')) assert_series_equal(expected, result) assert_frame_equal(df1, df2) self.assertEquals(result.name,'a') expected = df1['top'] df1 = df1.drop(['top'],axis=1) result = df2.pop('top') assert_frame_equal(expected, result) assert_frame_equal(df1, df2) def test_reindex_level_partial_selection(self): result = self.frame.reindex(['foo', 'qux'], level=0) expected = self.frame.ix[[0, 1, 2, 7, 8, 9]] assert_frame_equal(result, expected) result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0) assert_frame_equal(result, expected.T) result = self.frame.ix[['foo', 'qux']] assert_frame_equal(result, expected) result = self.frame['A'].ix[['foo', 'qux']] assert_series_equal(result, expected['A']) result = self.frame.T.ix[:, ['foo', 'qux']] assert_frame_equal(result, expected.T) def test_setitem_multiple_partial(self): expected = self.frame.copy() result = self.frame.copy() result.ix[['foo', 'bar']] = 0 expected.ix['foo'] = 0 expected.ix['bar'] = 0 assert_frame_equal(result, expected) expected = self.frame.copy() result = self.frame.copy() result.ix['foo':'bar'] = 0 expected.ix['foo'] = 0 expected.ix['bar'] = 0 assert_frame_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.ix[['foo', 'bar']] = 0 expected.ix['foo'] = 0 expected.ix['bar'] = 0 assert_series_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.ix['foo':'bar'] = 0 expected.ix['foo'] = 0 expected.ix['bar'] = 0 assert_series_equal(result, expected) def test_drop_level(self): result = self.frame.drop(['bar', 'qux'], level='first') expected = self.frame.ix[[0, 1, 2, 5, 6]] assert_frame_equal(result, expected) result = self.frame.drop(['two'], level='second') expected = self.frame.ix[[0, 2, 3, 6, 7, 9]] assert_frame_equal(result, expected) result = self.frame.T.drop(['bar', 'qux'], axis=1, level='first') expected = self.frame.ix[[0, 1, 2, 5, 6]].T assert_frame_equal(result, expected) result = self.frame.T.drop(['two'], axis=1, level='second') expected = self.frame.ix[[0, 2, 3, 6, 7, 9]].T assert_frame_equal(result, expected) def test_unicode_repr_issues(self): levels = [Index([u'a/\u03c3', u'b/\u03c3',u'c/\u03c3']), Index([0, 1])] labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, labels=labels) repr(index.levels)
df2 = DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],[1,2,1,2]],columns=[['NY','NY','SF','SF'],['cold','hot','cold','hot']]) # a more elegant way of creating hierarchical indexes using MultiIndex hier_cols = pd.MultiIndex.from_arrays([['Ny','Ny','La','La'],[1,2,1,2]], names = ['city','some_values']) df2 = DataFrame(np.arange(16).reshape(4,4), columns = hier_cols) df2 df2.columns.values # output column names df2.index.names = ['INDEX_1','INDEX_2'] # name or rename an index df2.columns.names = ['Cities','Temp'] # name or rename column indexes # other ways to rename indexes or columns df.rename(index = {'Old_Name': 'New_Name'}, columns = {'Old_Col_Name': 'New_Col_Name'}, inplace = True) df2.swaplevel('Cities','Temp', axis = 1) # swamp index relationships. (axis =1 is for columns) df2.swaplevel('Cities','Temp', axis = 1) # swamp index relationships. (axis =1 is for columns) df2.sortlevel(0) #sorts according to index. df2.sortlevel(1) will sort according to secondary index df2.sum(level = 'Temp', axis = 1) # sum across 'columnar' index = "Temp" (columnar because of axis = 1) df.reset_index(level=0, inplace = True) # converts index into a column. # This is useful if index has relevant information like date. In which case reset_index(level = 0) converts that date into a column # Using loc (location) function df['a_column'].loc[df['a_column'] > 0] = 'I am not zero' # loc(ate) rows where df['a_column'] > 0, and set the value of those rows to 'I am not zero' df['a_column'].loc[df['a_column'] == 0] = 'I am zero' ############################################################### ### ###
print(data['a':'b']) print(data[['a', 'd']]) print(data['a'][[1, 3]]) print(data[:, 2]) print(data[:, 3]) df = DataFrame(np.arange(12).reshape(4, 3), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['seoul', 'busan', 'jeju'], ['red', 'green', 'red']]) print(df) df.columns.names = ['cityName', 'color'] print(df) df.index.names = ['key1', 'key2'] print(df) print(df['seoul']) df2 = df.swaplevel('key1', 'key2') print(df2) print(df.swaplevel(0, 1)) print(df.sum(level='key2')) print(df.sum(level='color', axis=1))
df = DataFrame(np.arange(12).reshape(4, 3), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['seoul', 'busan', 'kwangu'], ['red', 'green', 'blue']]) print(df) # 다중 색인의 이름을 표시하고 싶을때 df.columns.names = ['city', 'color'] df.index.names = ['key1', 'key2'] print(df) print(df['seoul']) # 색인 위치 바꾸기, 색인 계층의 순서를 바꾸기 # swaplevel() : 색인 순서를 바꾸는 메서드 df2 = df.swaplevel('key1', 'key2') print(df2) # key1과 key2를 바꾸겠다는 의미 # sortlevel() : 사전식으로 계층을 바꾸어 정렬하기 # 라고 강의에선 설명하였으나 더이상 sortlevel을 지원하지 않는다 # sort_index, sort_values를 활용 #print(df2.sortlevel(0)) print(df2.sort_index()) # index 상위계층 값이 1 2 1 2 가 아닌 1 1 2 2 로 묶여 출력된다. print(df2.sort_index(level=1)) # level값을 1을 줌으로써 2번째 계층을 기준으로 정렬하겠다는 의미 # 활용 print(df.sum(level='key2')) # key2를 기준으로 묶어서 합산 df3 = DataFrame(np.arange(12).reshape(4, 3), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
class TestMultiLevel(unittest.TestCase): def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN self.series = s tm.N = 100 self.tdf = tm.makeTimeDataFrame() self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work self.ymd.index.levels = [lev.astype('i8') for lev in self.ymd.index.levels] self.ymd.index.names = ['year', 'month', 'day'] def test_append(self): a, b = self.frame[:5], self.frame[5:] result = a.append(b) tm.assert_frame_equal(result, self.frame) result = a['A'].append(b['A']) tm.assert_series_equal(result, self.frame['A']) def test_reindex_level(self): # axis=0 month_sums = self.ymd.sum(level='month') result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) assert_series_equal(result, expected) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): op = getattr(DataFrame, opname) month_sums = self.ymd.sum(level='month') result = op(self.ymd, month_sums, level='month') broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) assert_frame_equal(result, expected) # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') broadcasted = self.ymd['A'].groupby(level='month').transform(np.sum) expected = op(self.ymd['A'], broadcasted) assert_series_equal(result, expected) _check_op('sub') _check_op('add') _check_op('mul') _check_op('div') def test_pickle(self): import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) unpickled = cPickle.loads(pickled) assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) _test_roundtrip(self.ymd) _test_roundtrip(self.ymd.T) def test_reindex(self): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] expected = self.frame.ix[[0, 3]] assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) self.assert_(chunk.index is new_index) chunk = self.ymd.ix[new_index] self.assert_(chunk.index is new_index) ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) self.assert_(chunk.columns is new_index) chunk = ymdT.ix[:, new_index] self.assert_(chunk.columns is new_index) def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEquals(result.index.names, self.frame.index.names) def test_repr_to_string(self): repr(self.frame) repr(self.ymd) repr(self.frame.T) repr(self.ymd.T) buf = StringIO() self.frame.to_string(buf=buf) self.ymd.to_string(buf=buf) self.frame.T.to_string(buf=buf) self.ymd.T.to_string(buf=buf) def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] assert_almost_equal(col.values, df.values[:, 0]) self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) self.assertRaises(KeyError, df.__getitem__, 'foobar') def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] result2 = s.ix[2000, 3] expected = s[42:65] expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] self.assertEquals(result, expected) # fancy result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] expected = s[49:51] assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s[42:65]).all()) self.assert_(notnull(s[:42]).all()) self.assert_(notnull(s[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49])) def test_series_slice_partial(self): pass def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.ix[('bar', 'two')] assert_series_equal(xs, xs2) assert_almost_equal(xs.values, self.frame.values[4]) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.ix['foo'] expected = self.frame.T['foo'].T assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_fancy_2d(self): result = self.frame.ix['foo', 'B'] expected = self.frame.xs('foo')['B'] assert_series_equal(result, expected) ft = self.frame.T result = ft.ix['B', 'foo'] expected = ft.xs('B')['foo'] assert_series_equal(result, expected) def test_get_loc_single_level(self): s = Series(np.random.randn(len(self.single_level)), index=self.single_level) for k in self.single_level.values: s[k] def test_getitem_toplevel(self): df = self.frame.T result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) result = df['bar'] result2 = df.ix[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result, result2) def test_getitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) res = frame.ix[1:2] exp = frame[2:] assert_frame_equal(res, exp) series = Series(np.random.randn(len(index)), index=index) res = series.ix[1:2] exp = series[2:] assert_series_equal(res, exp) def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, labels=labels) frame = DataFrame(np.random.randn(6, 2), index=index) result = frame.ix[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.ix.__getitem__, 3) # however this will work result = self.frame.ix[2] expected = self.frame.xs(self.frame.index[2]) assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) assert_frame_equal(result, expected) def test_getitem_slice_not_sorted(self): df = self.frame.sortlevel(1).T # buglet with int typechecking result = df.ix[:, :np.int32(3)] expected = df.reindex(columns=df.columns[:3]) assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.ix[('bar', 'two'), 'B'] = 5 self.assertEquals(self.frame.ix[('bar', 'two'), 'B'], 5) # with integer labels df = self.frame.copy() df.columns = range(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) def test_fancy_slice_partial(self): result = self.frame.ix['bar':'baz'] expected = self.frame[3:7] assert_frame_equal(result, expected) result = self.ymd.ix[(2000,2):(2000,4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] assert_frame_equal(result, expected) def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) self.assertRaises(Exception, df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) self.assertRaises(Exception, self.frame.reset_index()['A'].sortlevel) # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2'])) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] result = self.frame.sortlevel(level='second') expected = self.frame.sortlevel(level=1) assert_frame_equal(result, expected) def test_sortlevel_mixed(self): sorted_before = self.frame.sortlevel(1) df = self.frame.copy() df['foo'] = 'bar' sorted_after = df.sortlevel(1) assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) dft = self.frame.T sorted_before = dft.sortlevel(1, axis=1) dft['foo', 'three'] = 'bar' sorted_after = dft.sortlevel(1, axis=1) assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), sorted_after.drop([('foo', 'three')], axis=1)) def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) for i in range(index.nlevels): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count(axis=axis) expected = expected.reindex_like(result).astype('i8') assert_frame_equal(result, expected) self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan self.ymd.ix[1, [1, 2]] = np.nan self.ymd.ix[7, [0, 1]] = np.nan _check_counts(self.frame) _check_counts(self.ymd) _check_counts(self.frame.T, axis=1) _check_counts(self.ymd.T, axis=1) # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() self.assertRaises(Exception, df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) result = s.count(level=0) expected = s.groupby(level=0).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() assert_series_equal(result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0]) assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(int) assert_frame_equal(result, expected) def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() unstacked2 = unstacked.unstack() # test that ints work unstacked = self.ymd.astype(int).unstack() def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sortlevel(2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) assert_frame_equal(restacked.sortlevel(0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).ix[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) expected = self.ymd.unstack(0).stack(0) def test_stack_mixed_dtype(self): df = self.frame.T df['foo', 'four'] = 'foo' df = df.sortlevel(1, axis=1) stacked = df.stack() assert_series_equal(stacked['foo'], df['foo'].stack()) self.assert_(stacked['bar'].dtype == np.float_) def test_unstack_bug(self): df = DataFrame({'state': ['naive','naive','naive', 'activ','activ','activ'], 'exp':['a','b','b','b','a','a'], 'barcode':[1,2,3,4,1,3], 'v':['hi','hi','bye','bye','bye','peace'], 'extra': np.arange(6.)}) result = df.groupby(['state','exp','barcode','v']).apply(len) unstacked = result.unstack() restacked = unstacked.stack() assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() self.assertEquals(unstacked.index.name, 'first') self.assertEquals(unstacked.columns.names, ['exp', 'second']) restacked = unstacked.stack() self.assertEquals(restacked.index.names, self.frame.index.names) def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) self.assertEquals(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sortlevel(0) assert_frame_equal(restacked, self.ymd) self.assertEquals(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1) assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1) assert_frame_equal(unstacked, expected) def test_groupby_transform(self): s = self.frame['A'] grouper = s.index.get_level_values(0) grouped = s.groupby(grouper) applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) assert_series_equal(applied.reindex(expected.index), expected) def test_join(self): a = self.frame.ix[:5, ['A']] b = self.frame.ix[2:, ['B', 'C']] joined = a.join(b, how='outer').reindex(self.frame.index) expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan self.assert_(not np.isnan(joined.values).all()) assert_frame_equal(joined, expected) def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) swapped2 = self.frame['A'].swaplevel('first', 'second') self.assert_(not swapped.index.equals(self.frame.index)) assert_series_equal(swapped, swapped2) back = swapped.swaplevel(0, 1) back2 = swapped.swaplevel('second', 'first') self.assert_(back.index.equals(self.frame.index)) assert_series_equal(back, back2) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA' : self.frame, 'ItemB' : self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] self.assert_(isinstance(df.columns, MultiIndex)) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): x = Series(data=[1,2,3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B",3)])) y = Series(data=[4,5,6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B",3)])) res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) assert_series_equal(res, exp) def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assert_(index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assert_(not index.is_lexsorted()) index = MultiIndex(levels=levels, labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assert_(not index.is_lexsorted()) self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): df = self.frame.T df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) df['foo']['one'] = 2 self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())] result = df['foo'] result2 = df.ix[:, 'foo'] expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) expected.columns = expected.columns.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) df = df.T result = df.xs('foo') result2 = df.ix['foo'] expected = df.reindex(df.index[arrays[0] == 'foo']) expected.index = expected.index.droplevel(0) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.get_tuple_index())] result = s['qux'] result2 = s.ix['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) assert_series_equal(result, expected) assert_series_equal(result2, expected) AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var'] def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.ix[1, [1, 2]] = np.nan self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, range(2), range(2), [False, True]): if axis == 0: frame = self.frame else: frame = self.frame.T grouped = frame.groupby(level=level, axis=axis) aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis) leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] self.assert_(leftside._get_axis(axis).equals(level_index)) self.assert_(rightside._get_axis(axis).equals(level_index)) assert_frame_equal(leftside, rightside) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() k1 = self.ymd.index.get_level_values(0) k2 = self.ymd.index.get_level_values(1) expected = self.ymd.groupby([k1, k2]).mean() assert_frame_equal(result, expected) self.assertEquals(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() def test_ix_preserve_names(self): result = self.ymd.ix[2000] result2 = self.ymd['A'].ix[2000] self.assertEquals(result.index.names, self.ymd.index.names[1:]) self.assertEquals(result2.index.names, self.ymd.index.names[1:]) result = self.ymd.ix[2000, 2] result2 = self.ymd['A'].ix[2000, 2] self.assertEquals(result.index.name, self.ymd.index.names[2]) self.assertEquals(result2.index.name, self.ymd.index.names[2]) def test_partial_set(self): # GH #397 df = self.ymd.copy() exp = self.ymd.copy() df.ix[2000, 4] = 0 exp.ix[2000, 4].values[:] = 0 assert_frame_equal(df, exp) df['A'].ix[2000, 4] = 1 exp['A'].ix[2000, 4].values[:] = 1 assert_frame_equal(df, exp) df.ix[2000] = 5 exp.ix[2000].values[:] = 5 assert_frame_equal(df, exp) # this works...for now df['A'].ix[14] = 5 self.assertEquals(df['A'][14], 5) def test_unstack_preserve_types(self): # GH #403 self.ymd['E'] = 'foo' self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') self.assert_(unstacked['A', 1].dtype == np.float64) self.assert_(unstacked['E', 1].dtype == np.object_) self.assert_(unstacked['F', 1].dtype == np.float64) def test_partial_ix_missing(self): result = self.ymd.ix[2000, 0] expected = self.ymd.ix[2000]['A'] assert_series_equal(result, expected) # need to put in some work here # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) def test_to_html(self): self.ymd.columns.name = 'foo' self.ymd.to_html() self.ymd.T.to_html()
import numpy as np from pandas import Series, DataFrame import pandas as pd from numpy.random import randn ser = Series(np.random.randn(6), index=[[1, 1, 1, 2, 2, 2], ['a', 'b', 'c', 'a', 'b', 'c']]) ser[2] print(ser[:, 'a']) dframe = ser.unstack() #Seriesのindexを使ってデータフレームにする dframe.T.unstack() #データフレームからシリーズに dframe2 = DataFrame(np.arange(16).reshape((4, 4)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['NY', 'NY', 'LA', 'SF'], ['cold', 'hot', 'hot', 'cold']]) dframe2.index.names = ['INDEX_1', 'INDEX_2'] #行方向のindexに名前をつける dframe2.columns.names = ['Cities', 'Temp'] #カラムの名前をつける dframe2.swaplevel('Cities', 'Temp', axis=1) #行方向のIndexを入れ替える dframe2.sortlevel(1) #2列目のindexをソートする(並べ替える) dframe2.sum(level='Temp', axis=1) #Tempに注目して都市名を無視して合計を計算してくれる
yc19['b'] #可以更方便選取子集 yc19['b':'c'] yc19.loc[['b', 'd']] yc19[:, 2] #可以在內存中進行選取 yc19.unstack() yc19.unstack().stack() frame16 = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame16 frame16.index.name = ['key1', 'key2'] frame16['Ohio'] #重排分級順列 frame16.swaplevel(0, 1) #page156的命名無法找到,在找原因 frame16.sort_index(level=0) #對單個級別進行排序 frame16.swaplevel(0, 1).sort_index(level=0) #使用DataFrame的列 frame17 = DataFrame({ 'a': range(7), 'b': range(7, 0, -1), 'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'], 'd': [0, 1, 2, 0, 1, 2, 3] }) frame17 frame17.set_index(['c', 'd']) frame17.set_index(['c', 'd'], drop=False)
d['a'] #%% d[:3] #%% d[::-1] #%% d['a','a1'] #%% d[:,'a1'] # 对于多级系列这样的操作是可以的,但对于数据框则行不通 #%% 对数据多级索引起名字 d.index.names = ['row1','row2'] d.columns.names = ['col1','col2'] d #%% 交换行索引的级别 d.swaplevel(0,1) #%% d.swaplevel('row1','row2') #%% 交换列索引顺序 d.swaplevel(0,1,axis=1) #%% 将行索引的最低一级转化为列索引 d.unstack() #%% 将列索引的最低一级转化为行索引 d.stack() # 疑问: # d.stack().stack() 变成一个系列是可以理解的,但为何d.unstack().unstack() # 也变成一个系列了 #%% 互换行和列 d.swapaxes(0,1)
df3.index df3 df3.ix["b", ].ix[1:, ] df3.ix["a":"b",].ix[1:, ] df3.unstack().unstack() frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame frame.swaplevel("key1", "key2") frame frame.sortlevel(0) frame frame.sum(level="key1") frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame.sum(level="color", axis=1) frame.sum(level="state", axis=1) ser = Series(np.arange(3.), index=list("abc"))
data['b'] data['b':'c'] data.ix[['b','d']] data[:, 2] data.unstack() #unstack() 默认应用于内层 #for DataFrame frame = DataFrame(np.arange(12).reshape((4, 3)),index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] frame frame['Ohio'] frame.swaplevel(0, 1).sortlevel(0) frame.sum(level='key2') frame.sum(level='color', axis=1) #use column to index!!!!! frame = DataFrame({'a': range(7), 'b': range(7, 0, -1),'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],'d': [0, 1, 2, 0, 1, 2, 3]}) frame frame2 = frame.set_index(['c', 'd']) frame2 frame2.reset_index() #inverse operation of set_index #Sorting and ranking #Axis indexes with duplicate values #Unique values, value counts, and membership #Handling missing data
# -*- coding: utf-8 -*- import numpy as np from pandas import Series, DataFrame print '索引层级交换' frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) frame.index.names = ['key1', 'key2'] frame_swapped = frame.swaplevel('key1', 'key2') print frame_swapped print frame_swapped.swaplevel(0, 1) print print '根据索引排序' print frame.sortlevel('key2') print frame.swaplevel(0, 1).sortlevel(0)
dframe.unstack() # We can also apply multiple level indexing to DataFrames dframe2 = DataFrame(np.arange(16).reshape(4, 4), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=[['NY', 'NY', 'LA', 'SF'], ['cold', 'hot', 'hot', 'cold']]) dframe2 # We can also give these index levels names #Name the index levels dframe2.index.names = ['INDEX_1', 'INDEX_2'] #Name the column levels dframe2.columns.names = ['Cities', 'Temp'] dframe2 # We can also interchange level orders (note the axis=1 for columns) dframe2.swaplevel('Cities', 'Temp', axis=1) #We can also sort levels dframe2.sort_index(level='INDEX_2') #Note the change in sorting, now the Dframe index is sorted by the INDEX_2 #We can also perform operations on particular levels dframe2.sum(level='Temp', axis=1) #Thats the end of this section! Next up, Section 5: Working with Data Part 1 !!!
frame.corrwith(frame2) frame.corrwith(um) frame.corrwith(um.to_frame()) frame.ix[:, 'Washu':'UMST'].apply(lambda x: x.mean()) frame.set_index('UM', drop=True, inplace=True) keys = frame.index frame.reset_index(level=keys) df = DataFrame(np.random.randn(6, 5), columns=['Ohio', 'Dallas', 'Michigan', 'Miami', 'DC'], index=[['a', 'a', 'b', 'b', 'c', 'd'], [1, 2, 3, 1, 2, 3]]) df.index df.ix['a'] df.sortlevel(level=0, axis=0) df.sortlevel(level=1, axis=0) df.swaplevel(0, 1) df_unstack = df.swaplevel(0, 1).unstack() #turn the column into Multiindex import pandas.io.data as web data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010') p = pd.Panel(data) p2 = p.swapaxes('items', 'major') """ Extra """ obj.values np.exp(obj) 'b' in obj pd.isnull(obj)
#单独创建MultiIndex可以复用。如: #MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']], # names=['state','color']) print '##############################################' print frame # state Ohio Colorado # color Green Red Green # key1 key2 # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # 2 9 10 11 #互换级别 print frame.swaplevel('key1', 'key2') # state Ohio Colorado # color Green Red Green # key2 key1 # 1 a 0 1 2 # 2 a 3 4 5 # 1 b 6 7 8 # 2 b 9 10 11 #sortlevel根据单个级别中的值对数据进行排序。 #按key2排序 print frame.sortlevel(1) # state Ohio Colorado # color Green Red Green # key1 key2
def test_set_index_datetime(self): # GH#3950 df = DataFrame( { "label": ["a", "a", "a", "b", "b", "b"], "datetime": [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", ], "value": range(6), } ) df.index = to_datetime(df.pop("datetime"), utc=True) df.index = df.index.tz_convert("US/Pacific") expected = DatetimeIndex( ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], name="datetime", ) expected = expected.tz_localize("UTC").tz_convert("US/Pacific") df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) assert df.index.names == ["datetime", "label"] df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) assert df.index.names == ["label", "datetime"] df = DataFrame(np.random.random(6)) idx1 = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", ], tz="US/Eastern", ) idx2 = DatetimeIndex( [ "2012-04-01 09:00", "2012-04-01 09:00", "2012-04-01 09:00", "2012-04-02 09:00", "2012-04-02 09:00", "2012-04-02 09:00", ], tz="US/Eastern", ) idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") idx3 = idx3._with_freq(None) df = df.set_index(idx1) df = df.set_index(idx2, append=True) df = df.set_index(idx3, append=True) expected1 = DatetimeIndex( ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], tz="US/Eastern", ) expected2 = DatetimeIndex( ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" ) tm.assert_index_equal(df.index.levels[0], expected1) tm.assert_index_equal(df.index.levels[1], expected2) tm.assert_index_equal(df.index.levels[2], idx3) # GH#7092 tm.assert_index_equal(df.index.get_level_values(0), idx1) tm.assert_index_equal(df.index.get_level_values(1), idx2) tm.assert_index_equal(df.index.get_level_values(2), idx3)
def main(): """ Handling of not applicable values """ data = Series(np.random.randn(10), index=[list('aaabbbccdd'), map(int, list('1231231223'))]) print data print data.index print type(data.index) print data['b'] print data['b':'c'] print data.ix[['b', 'd']] print data[:, 2] print data.unstack() print data.unstack().stack() print '','' frame = DataFrame(np.arange(12).reshape((4, 3)), index=[['a','a','b','b'], [1,2,1,2]], columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) print frame frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] print frame print frame['Ohio'] print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']], names=['state', 'color']) # change hierarchy and sort print '','' print frame.swaplevel('key1', 'key2') print '','' print frame.sortlevel(1) # sorted by key2 print '','' print frame.swaplevel(0, 1).sortlevel(0) # swap and sorted by key 2 # summary statistics for each hierarchy print '','' print frame.sum(level='key2') print '','' print frame.sum(level='color', axis=1) print '','' # Using column of the DataFrame for index print '','-------------------------' frame = DataFrame({ 'a': range(7), 'b': range(7, 0, -1), 'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'], 'd': [0, 1, 2, 0, 1, 2, 3], }) print frame frame2 = frame.set_index(['c', 'd']) print '','' print frame2 print '','' print frame.set_index(['c', 'd'], drop=False) print '','' print frame2.reset_index()