def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) wp = tm.makePanel() store.append('wp1', wp.ix[:,:10,:]) store.append('wp1', wp.ix[:,10:,:]) tm.assert_panel_equal(store['wp1'], wp) except: raise finally: store.close() os.remove(pth)
def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) wp = tm.makePanel() store.append('wp1', wp.ix[:, :10, :]) store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(store['wp1'], wp) except: raise finally: store.close() os.remove(pth)
def _check_roundtrip_table(self, obj, comparator): store = HDFStore(self.scratchpath, 'w') try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath)
def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath)
def hsave(self,fname): from pandas.io.pytables import HDFStore LOGGER.debug('Saving HDF in %s\n%s',fname,self.report()) store = HDFStore(fname,mode='w') for k,v in self.items(): if re.match('^__',k): continue if isinstance(v,np.ndarray): v = Series(v) LOGGER.debug('Saving HDF for %s',k) store.put(k,v) if self._missing: store['_MISSING']=Series(self._missing) store.close()
def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath)
def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath)
def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) store.append('/df3', df[:10]) store.append('/df3', df[10:]) tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it import warnings import tables warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) # panel wp = tm.makePanel() store.append('wp1', wp.ix[:,:10,:]) store.append('wp1', wp.ix[:,10:,:]) tm.assert_panel_equal(store['wp1'], wp) # ndim p4d = tm.makePanel4D() store.append('p4d', p4d.ix[:,:,:10,:]) store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(store['p4d'], p4d) except: raise finally: store.close() os.remove(pth)
def test_walk_groups(self): with tm.ensure_clean('walk_groups.hdf') as filename: store = HDFStore(filename, 'w') dfs = { 'df1': pd.DataFrame([1, 2, 3]), 'df2': pd.DataFrame([4, 5, 6]), 'df3': pd.DataFrame([6, 7, 8]), 'df4': pd.DataFrame([9, 10, 11]), } store.put('/first_group/df1', dfs['df1']) store.put('/first_group/df2', dfs['df2']) store.put('/second_group/df3', dfs['df3']) store.put('/second_group/third_group/df4', dfs['df4']) expect = { '/': ({'first_group', 'second_group'}, set()), '/first_group': (set(), {'df1', 'df2'}), '/second_group': ({'third_group'}, {'df3'}), '/second_group/third_group': (set(), {'df4'}), } for path, groups, frames in store.walk_groups(): self.assertIn(path, expect) expect_groups, expect_frames = expect[path] self.assertEqual(expect_groups, set(groups)) self.assertEqual(expect_frames, set(frames)) for frame in frames: frame_path = '/'.join([path, frame]) df = store.get(frame_path) self.assert_(df.equals(dfs[frame]))
def test_walk_groups(self): with tm.ensure_clean('walk_groups.hdf') as filename: store = HDFStore(filename, 'w') dfs = { 'df1': pd.DataFrame([1,2,3]), 'df2': pd.DataFrame([4,5,6]), 'df3': pd.DataFrame([6,7,8]), 'df4': pd.DataFrame([9,10,11]), } store.put('/first_group/df1', dfs['df1']) store.put('/first_group/df2', dfs['df2']) store.put('/second_group/df3', dfs['df3']) store.put('/second_group/third_group/df4', dfs['df4']) expect = { '/': ({'first_group', 'second_group'}, set()), '/first_group': (set(), {'df1', 'df2'}), '/second_group': ({'third_group'}, {'df3'}), '/second_group/third_group': (set(), {'df4'}), } for path, groups, frames in store.walk_groups(): self.assertIn(path, expect) expect_groups, expect_frames = expect[path] self.assertEqual(expect_groups, set(groups)) self.assertEqual(expect_frames, set(frames)) for frame in frames: frame_path = '/'.join([path, frame]) df = store.get(frame_path) self.assert_(df.equals(dfs[frame]))
def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) store.append('/df3', df[:10]) store.append('/df3', df[10:]) tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it import warnings import tables warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) wp = tm.makePanel() store.append('wp1', wp.ix[:, :10, :]) store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(store['wp1'], wp) except: raise finally: store.close() os.remove(pth)
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_len_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.assertEquals(len(self.store), 4) self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd'])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() repr(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) self.assertRaises(AttributeError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.put('c', df[:10], table=True) self.store.append('c', df[10:]) tm.assert_frame_equal(self.store['c'], df) def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) def test_remove_where_not_exist(self): crit1 = { 'field' : 'index', 'op' : '>', 'value' : 'foo' } self.store.remove('a', where=[crit1]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame()) for i in items)) sp = p.to_sparse() self._check_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0.,1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r : tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self.assertRaises(ValueError, self._check_roundtrip, df[:0], tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # storing in Table not yet supported self.assertRaises(Exception, self.store.put, 'foo', df1, table=True) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty self.assertRaises(ValueError, self._check_roundtrip, wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } crit3 = { 'field' : 'column', 'value' : 'A' } result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = { 'field' : 'column', 'value' : df.columns[:75] } result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal)
class TesttHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_len(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.assertEquals(len(self.store), 4) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() repr(self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) self.assertRaises(AttributeError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.put('c', df[:10], table=True) self.store.append('c', df[10:]) tm.assert_frame_equal(self.store['c'], df) def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) def test_remove_where_not_exist(self): crit1 = { 'field' : 'index', 'op' : '>', 'value' : 'foo' } self.store.remove('a', where=[crit1]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = DateRange('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self.assertRaises(ValueError, self._check_roundtrip, df[:0], tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # storing in Table not yet supported self.assertRaises(Exception, self.store.put, 'foo', df1, table=True) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_wide(), right.to_wide()) wp = tm.makePanel() self._check_roundtrip(wp.to_long(), _check) # empty self.assertRaises(ValueError, self._check_roundtrip, wp.to_long()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } crit3 = { 'field' : 'column', 'value' : 'A' } result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = { 'field' : 'column', 'value' : df.columns[:75] } result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close()
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) self.assert_(set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.store['foo/bar'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) self.assert_('foo/bar' in self.store) self.assert_('/foo/bar' in self.store) self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) def test_versioning(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10') # write a file and wipe its versioning self.store.remove('df2') self.store.append('df2', df) self.store.get_node('df2')._v_attrs.pandas_version = None self.store.select('df2') self.store.select('df2', [ Term('index','>',df.index[2]) ]) def test_meta(self): raise nose.SkipTest('no meta') meta = { 'foo' : [ 'I love pandas ' ] } s = tm.makeTimeSeries() s.meta = meta self.store['a'] = s self.assert_(self.store['a'].meta == meta) df = tm.makeDataFrame() df.meta = meta self.store['b'] = df self.assert_(self.store['b'].meta == meta) # this should work, but because slicing doesn't propgate meta it doesn self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) results = self.store['df1'] #self.assert_(getattr(results,'meta',None) == meta) # no meta df = tm.makeDataFrame() self.store['b'] = df self.assert_(hasattr(self.store['b'],'meta') == False) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) left = self.store.get('/a') right = self.store['/a'] tm.assert_series_equal(left, right) self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store['foo/bar/bah'] = df[:10] self.store['foo'] = df[:10] self.store['/foo'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_string_index(self): index = Index([ "I am a very long string index: %s" % i for i in range(20) ]) s = Series(np.arange(20), index = index) df = DataFrame({ 'A' : s, 'B' : s }) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) self.store['b'] = df tm.assert_frame_equal(self.store['b'], df) # mixed length index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + [ "I am a very long string index: %s" % i for i in range(20) ]) s = Series(np.arange(21), index = index) df = DataFrame({ 'A' : s, 'B' : s }) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) self.store['b'] = df tm.assert_frame_equal(self.store['b'], df) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) tm.assert_frame_equal(self.store['df1'], df) self.store.remove('df2') self.store.put('df2', df[:10], table=True) self.store.append('df2', df[10:]) tm.assert_frame_equal(self.store['df2'], df) self.store.remove('df3') self.store.append('/df3', df[:10]) self.store.append('/df3', df[10:]) tm.assert_frame_equal(self.store['df3'], df) # this is allowed by almost always don't want to do it warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) self.store.remove('/df3 foo') self.store.append('/df3 foo', df[:10]) self.store.append('/df3 foo', df[10:]) tm.assert_frame_equal(self.store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) # panel wp = tm.makePanel() self.store.remove('wp1') self.store.append('wp1', wp.ix[:,:10,:]) self.store.append('wp1', wp.ix[:,10:,:]) tm.assert_panel_equal(self.store['wp1'], wp) # ndim p4d = tm.makePanel4D() self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:]) self.store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using axis labels self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:], axes=['items','major_axis','minor_axis']) self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['items','major_axis','minor_axis']) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using differnt number of items on each axis p4d2 = p4d.copy() p4d2['l4'] = p4d['l1'] p4d2['l5'] = p4d['l1'] self.store.remove('p4d2') self.store.append('p4d2', p4d2, axes=['items','major_axis','minor_axis']) tm.assert_panel4d_equal(self.store['p4d2'], p4d2) # test using differt order of items on the non-index axes self.store.remove('wp1') wp_append1 = wp.ix[:,:10,:] self.store.append('wp1', wp_append1) wp_append2 = wp.ix[:,10:,:].reindex(items = wp.items[::-1]) self.store.append('wp1', wp_append2) tm.assert_panel_equal(self.store['wp1'], wp) def test_append_frame_column_oriented(self): # column oriented df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df.ix[:,:2], axes = ['columns']) self.store.append('df1', df.ix[:,2:]) tm.assert_frame_equal(self.store['df1'], df) result = self.store.select('df1', 'columns=A') expected = df.reindex(columns=['A']) tm.assert_frame_equal(expected, result) # this isn't supported self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index','>',df.index[4]))) # selection on the non-indexable result = self.store.select('df1', ('columns=A', Term('index','=',df.index[0:4]))) expected = df.reindex(columns=['A'],index=df.index[0:4]) tm.assert_frame_equal(expected, result) def test_ndim_indexables(self): """ test using ndim tables in new ways""" p4d = tm.makePanel4D() def check_indexers(key, indexers): for i,idx in enumerate(indexers): self.assert_(getattr(getattr(self.store.root,key).table.description,idx)._v_pos == i) # append then change (will take existing schema) indexers = ['items','major_axis','minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) self.store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(self.store.select('p4d'),p4d) check_indexers('p4d',indexers) # same as above, but try to append with differnt axes self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) self.store.append('p4d', p4d.ix[:,:,10:,:], axes=['labels','items','major_axis']) tm.assert_panel4d_equal(self.store.select('p4d'),p4d) check_indexers('p4d',indexers) # pass incorrect number of axes self.store.remove('p4d') self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[:,:,:10,:], axes=['major_axis','minor_axis']) # different than default indexables #1 indexers = ['labels','major_axis','minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) self.store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d',indexers) # different than default indexables #2 indexers = ['major_axis','labels','minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:,:,:10,:], axes=indexers) self.store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d',indexers) # partial selection result = self.store.select('p4d',['labels=l1']) expected = p4d.reindex(labels = ['l1']) tm.assert_panel4d_equal(result, expected) # partial selection2 result = self.store.select('p4d',[Term('labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) expected = p4d.reindex(labels = ['l1'], items = ['ItemA'], minor_axis = ['B']) tm.assert_panel4d_equal(result, expected) # non-existant partial selection result = self.store.select('p4d',[Term('labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) expected = p4d.reindex(labels = ['l1'], items = [], minor_axis = ['B']) tm.assert_panel4d_equal(result, expected) def test_append_with_strings(self): wp = tm.makePanel() wp2 = wp.rename_axis(dict([ (x,"%s_extra" % x) for x in wp.minor_axis ]), axis = 2) self.store.append('s1', wp, min_itemsize = 20) self.store.append('s1', wp2) expected = concat([ wp, wp2], axis = 2) expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) # test dict format self.store.append('s2', wp, min_itemsize = { 'minor_axis' : 20 }) self.store.append('s2', wp2) expected = concat([ wp, wp2], axis = 2) expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s2'], expected) # apply the wrong field (similar to #1) self.store.append('s3', wp, min_itemsize = { 'major_axis' : 20 }) self.assertRaises(Exception, self.store.append, 's3') # test truncation of bigger strings self.store.append('s4', wp) self.assertRaises(Exception, self.store.append, 's4', wp2) # avoid truncation on elements df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) self.store.append('df_big',df, min_itemsize = { 'values' : 1024 }) tm.assert_frame_equal(self.store.select('df_big'), df) # appending smaller string ok df2 = DataFrame([[124,'asdqy'], [346,'dggnhefbdfb']]) self.store.append('df_big',df2) expected = concat([ df, df2 ]) tm.assert_frame_equal(self.store.select('df_big'), expected) # avoid truncation on elements df = DataFrame([[123,'asdqwerty'], [345,'dggnhebbsdfbdfb']]) self.store.append('df_big2',df, min_itemsize = { 'values' : 10 }) tm.assert_frame_equal(self.store.select('df_big2'), df) # bigger string on next append self.store.append('df_new',df, min_itemsize = { 'values' : 16 }) df_new = DataFrame([[124,'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) self.assertRaises(Exception, self.store.append, 'df_new',df_new) def test_create_table_index(self): wp = tm.makePanel() self.store.append('p5', wp) self.store.create_table_index('p5') assert(self.store.handle.root.p5.table.cols.major_axis.is_indexed == True) assert(self.store.handle.root.p5.table.cols.minor_axis.is_indexed == False) # default optlevels assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') # let's change the indexing scheme self.store.create_table_index('p5') assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') self.store.create_table_index('p5', optlevel=9) assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') self.store.create_table_index('p5', kind='full') assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'full') self.store.create_table_index('p5', optlevel=1, kind='light') assert(self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 1) assert(self.store.handle.root.p5.table.cols.major_axis.index.kind == 'light') df = tm.makeTimeDataFrame() self.store.append('f', df[:10]) self.store.append('f', df[10:]) self.store.create_table_index('f') # try to index a non-table self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') # try to change the version supports flag from pandas.io import pytables pytables._table_supports_index = False self.assertRaises(Exception, self.store.create_table_index, 'f') # test out some versions original = tables.__version__ for v in ['2.2','2.2b']: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v self.assertRaises(Exception, self.store.create_table_index, 'f') for v in ['2.3.1','2.3.1b','2.4dev','2.4',original]: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v self.store.create_table_index('f') pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = original def test_big_table(self): raise nose.SkipTest('no big table') # create and write a big table wp = Panel(np.random.randn(20, 1000, 1000), items= [ 'Item%s' % i for i in xrange(20) ], major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%s' % i for i in xrange(1000) ]) wp.ix[:,100:200,300:400] = np.nan try: store = HDFStore(self.scratchpath) store._debug_memory = True store.append('wp',wp) recons = store.select('wp') finally: store.close() os.remove(self.scratchpath) def test_append_diff_item_order(self): raise nose.SkipTest('append diff item order') wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]}, index=date_range('1/1/2000', periods=3)) self.store.put('frame', df1, table=True) self.assertRaises(Exception, self.store.put, 'frame', df2, table=True, append=True) def test_table_values_dtypes_roundtrip(self): df1 = DataFrame({'a': [1, 2, 3]}, dtype = 'f8') self.store.append('df1', df1) assert df1.dtypes == self.store['df1'].dtypes df2 = DataFrame({'a': [1, 2, 3]}, dtype = 'i8') self.store.append('df2', df2) assert df2.dtypes == self.store['df2'].dtypes # incompatible dtype self.assertRaises(Exception, self.store.append, 'df2', df1) def test_table_mixed_dtypes(self): # frame def _make_one_df(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['bool3'] = True df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one_df() self.store.append('df1_mixed', df1) tm.assert_frame_equal(self.store.select('df1_mixed'), df1) # panel def _make_one_panel(): wp = tm.makePanel() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['ItemA'] > 0 wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p1 = _make_one_panel() self.store.append('p1_mixed', p1) tm.assert_panel_equal(self.store.select('p1_mixed'), p1) # ndim def _make_one_p4d(): wp = tm.makePanel4D() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['l1'] > 0 wp['bool2'] = wp['l2'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p4d = _make_one_p4d() self.store.append('p4d_mixed', p4d) tm.assert_panel4d_equal(self.store.select('p4d_mixed'), p4d) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) # pathing self.store['a'] = ts self.store['b/foo'] = df self.store.remove('foo') self.store.remove('b/foo') self.assertEquals(len(self.store), 1) self.store['a'] = ts self.store['b/foo'] = df self.store.remove('b') self.assertEquals(len(self.store), 1) # __delitem__ self.store['a'] = ts self.store['b'] = df del self.store['a'] del self.store['b'] self.assertEquals(len(self.store), 0) def test_remove_where(self): # non-existance crit1 = Term('index','>','foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() self.store.put('wp', wp, table=True) self.store.remove('wp', [('minor_axis', ['A', 'D'])]) rs = self.store.select('wp') expected = wp.reindex(minor_axis = ['B','C']) tm.assert_panel_equal(rs,expected) # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) # deleted number (entire table) n = self.store.remove('wp', []) assert(n == 120) # non - empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.assertRaises(Exception, self.store.remove, 'wp', ['foo']) # selectin non-table with a where #self.store.put('wp2', wp, table=False) #self.assertRaises(Exception, self.store.remove, # 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): wp = tm.makePanel() # group row removal date4 = wp.major_axis.take([ 0,1,2,4,5,6,8,9,10 ]) crit4 = Term('major_axis',date4) self.store.put('wp3', wp, table=True) n = self.store.remove('wp3', where=[crit4]) assert(n == 36) result = self.store.select('wp3') expected = wp.reindex(major_axis = wp.major_axis-date4) tm.assert_panel_equal(result, expected) # upper half self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('major_axis','>',date) crit2 = Term('minor_axis',['A', 'D']) n = self.store.remove('wp', where=[crit1]) assert(n == 56) n = self.store.remove('wp', where=[crit2]) assert(n == 32) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # individual row elements self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] crit1 = Term('major_axis',date1) self.store.remove('wp2', where=[crit1]) result = self.store.select('wp2') expected = wp.reindex(major_axis=wp.major_axis-date1) tm.assert_panel_equal(result, expected) date2 = wp.major_axis[5] crit2 = Term('major_axis',date2) self.store.remove('wp2', where=[crit2]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis-date1-Index([date2])) tm.assert_panel_equal(result, expected) date3 = [wp.major_axis[7],wp.major_axis[9]] crit3 = Term('major_axis',date3) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis-date1-Index([date2])-Index(date3)) tm.assert_panel_equal(result, expected) # corners self.store.put('wp4', wp, table=True) n = self.store.remove('wp4', where=[Term('major_axis','>',wp.major_axis[-1])]) result = self.store.select('wp4') tm.assert_panel_equal(result, wp) def test_terms(self): wp = tm.makePanel() p4d = tm.makePanel4D() self.store.put('wp', wp, table=True) self.store.put('p4d', p4d, table=True) # some invalid terms terms = [ [ 'minor', ['A','B'] ], [ 'index', ['20121114'] ], [ 'index', ['20121114', '20121114'] ], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) # panel result = self.store.select('wp',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']) ]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # p4d result = self.store.select('p4d',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']), Term('items', '=', ['ItemA','ItemB']) ]) expected = p4d.truncate(after='20000108').reindex(minor=['A', 'B'],items=['ItemA','ItemB']) tm.assert_panel4d_equal(result, expected) # valid terms terms = [ dict(field = 'major_axis', op = '>', value = '20121114'), ('major_axis', '20121114'), ('major_axis', '>', '20121114'), (('major_axis', ['20121114','20121114']),), ('major_axis', datetime(2012,11,14)), 'major_axis>20121114', 'major_axis>20121114', 'major_axis>20121114', (('minor_axis', ['A','B']),), (('minor_axis', ['A','B']),), ((('minor_axis', ['A','B']),),), (('items', ['ItemA','ItemB']),), ('items=ItemA'), ] for t in terms: self.store.select('wp', t) self.store.select('p4d', t) # valid for p4d only terms = [ (('labels', '=', ['l1','l2']),), Term('labels', '=', ['l1','l2']), ] for t in terms: self.store.select('p4d', t) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) sp = p.to_sparse() self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0.,1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r : tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) def test_empty_series_frame(self): s0 = Series() s1 = Series(name='myseries') df0 = DataFrame() df1 = DataFrame(index=['a', 'b', 'c']) df2 = DataFrame(columns=['d', 'e', 'f']) self._check_roundtrip(s0, tm.assert_series_equal) self._check_roundtrip(s1, tm.assert_series_equal) self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_fixed_offset_tz(self): rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty # self._check_roundtrip(wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_select(self): wp = tm.makePanel() # put/select ok self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.select('wp') # non-table ok (where = None) self.store.remove('wp') self.store.put('wp2', wp, table=False) self.store.select('wp2') # selection on the non-indexable with a large number of columns wp = Panel(np.random.randn(100, 100, 100), items = [ 'Item%03d' % i for i in xrange(100) ], major_axis=date_range('1/1/2000', periods=100), minor_axis = [ 'E%03d' % i for i in xrange(100) ]) self.store.remove('wp') self.store.append('wp', wp) items = [ 'Item%03d' % i for i in xrange(80) ] result = self.store.select('wp', Term('items', items)) expected = wp.reindex(items = items) tm.assert_panel_equal(expected, result) # selectin non-table with a where #self.assertRaises(Exception, self.store.select, # 'wp2', ('column', ['A', 'D'])) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = ('major_axis','>=',date) crit2 = ('minor_axis', '=', ['A', 'D']) result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) result = self.store.select('wp', [ 'major_axis>=20000124', ('minor_axis', '=', ['A','B']) ]) expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index','>=',date) crit2 = ('columns',['A', 'D']) crit3 = ('columns','A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # other indicies for a frame # integer df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20))) self.store.append('df_int', df) self.store.select('df_int', [ Term("index<10"), Term("columns", "=", ["A"]) ]) df = DataFrame(dict(A = np.random.rand(20), B = np.random.rand(20), index = np.arange(20,dtype='f8'))) self.store.append('df_float', df) self.store.select('df_float', [ Term("index<10.0"), Term("columns", "=", ["A"]) ]) # can't select if not written as table #self.store['frame'] = df #self.assertRaises(Exception, self.store.select, # 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = Term('columns', df.columns[:75]) result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_legacy_table_read(self): # legacy table types pth = curpath() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') store.select('df1') store.select('df2') store.select('wp1') # force the frame store.select('df2', typ = 'legacy_frame') # old version (this still throws an exception though) import warnings warnings.filterwarnings('ignore', category=IncompatibilityWarning) self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis','=','B')) warnings.filterwarnings('always', category=IncompatibilityWarning) store.close() def test_legacy_table_write(self): # legacy table types pth = curpath() df = tm.makeDataFrame() wp = tm.makePanel() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') self.assertRaises(Exception, store.append, 'df1', df) self.assertRaises(Exception, store.append, 'wp1', wp) store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): df = DataFrame({'a': [1,2,3], 'b': [1.,2.,3.], 'c': ['a', 'b', 'c']}) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) def test_cant_write_multiindex_table(self): # for now, #1848 df = DataFrame(np.random.randn(10, 4), index=[np.arange(5).repeat(2), np.tile(np.arange(2), 5)]) self.assertRaises(Exception, self.store.put, 'foo', df, table=True)
class TesttHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_len(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makeWidePanel() self.assertEquals(len(self.store), 4) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makeWidePanel() repr(self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) self.assertRaises(AttributeError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.put('c', df[:10], table=True) self.store.append('c', df[10:]) tm.assert_frame_equal(self.store['c'], df) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) def test_remove_crit(self): wp = tm.makeWidePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = DateRange('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) self._check_roundtrip(ts, tm.assert_series_equal) def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # storing in Table not yet supported self.assertRaises(Exception, self.store.put, 'foo', df1, table=True) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) def test_wide(self): wp = tm.makeWidePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makeWidePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_wide(), right.to_wide()) wp = tm.makeWidePanel() self._check_roundtrip(wp.to_long(), _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_panel_select(self): wp = tm.makeWidePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } crit3 = { 'field' : 'column', 'value' : 'A' } result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = { 'field' : 'column', 'value' : df.columns[:75] } result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator): store = HDFStore(self.scratchpath, 'w') try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator): store = HDFStore(self.scratchpath, 'w') try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close()
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) self.assert_( set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar' ])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.store['foo/bar'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) self.assert_('foo/bar' in self.store) self.assert_('/foo/bar' in self.store) self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) left = self.store.get('/a') right = self.store['/a'] tm.assert_series_equal(left, right) self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store['foo/bar/bah'] = df[:10] self.store['foo'] = df[:10] self.store['/foo'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) store.append('/df3', df[:10]) store.append('/df3', df[10:]) tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it import warnings import tables warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) wp = tm.makePanel() store.append('wp1', wp.ix[:, :10, :]) store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(store['wp1'], wp) except: raise finally: store.close() os.remove(pth) def test_append_with_strings(self): wp = tm.makePanel() wp2 = wp.rename_axis(dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) self.store.append('s1', wp, min_itemsize=20) self.store.append('s1', wp2) expected = concat([wp, wp2], axis=2) expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) # test dict format self.store.append('s2', wp, min_itemsize={'column': 20}) self.store.append('s2', wp2) expected = concat([wp, wp2], axis=2) expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s2'], expected) # apply the wrong field (similar to #1) self.store.append('s3', wp, min_itemsize={'index': 20}) self.assertRaises(Exception, self.store.append, 's3') # test truncation of bigger strings self.store.append('s4', wp) self.assertRaises(Exception, self.store.append, 's4', wp2) def test_create_table_index(self): wp = tm.makePanel() self.store.append('p5', wp) self.store.create_table_index('p5') assert (self.store.handle.root.p5.table.cols.index.is_indexed == True) assert ( self.store.handle.root.p5.table.cols.column.is_indexed == False) df = tm.makeTimeDataFrame() self.store.append('f', df[:10]) self.store.append('f', df[10:]) self.store.create_table_index('f') # create twice self.store.create_table_index('f') # try to index a non-table self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') # try to change the version supports flag from pandas.io import pytables pytables._table_supports_index = False self.assertRaises(Exception, self.store.create_table_index, 'f') def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]}, index=date_range('1/1/2000', periods=3)) self.store.put('frame', df1, table=True) self.assertRaises(Exception, self.store.put, 'frame', df2, table=True, append=True) def test_table_values_dtypes_roundtrip(self): df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') self.store.append('df1', df1) assert df1.dtypes == self.store['df1'].dtypes df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') self.store.append('df2', df2) assert df2.dtypes == self.store['df2'].dtypes # incompatible dtype self.assertRaises(Exception, self.store.append, 'df2', df1) def test_table_mixed_dtypes(self): # frame def _make_one_df(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['bool3'] = True df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one_df() self.store.append('df1_mixed', df1) tm.assert_frame_equal(self.store.select('df1_mixed'), df1) # panel def _make_one_panel(): wp = tm.makePanel() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['ItemA'] > 0 wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p1 = _make_one_panel() self.store.append('p1_mixed', p1) tm.assert_panel_equal(self.store.select('p1_mixed'), p1) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) # pathing self.store['a'] = ts self.store['b/foo'] = df self.store.remove('foo') self.store.remove('b/foo') self.assertEquals(len(self.store), 1) self.store['a'] = ts self.store['b/foo'] = df self.store.remove('b') self.assertEquals(len(self.store), 1) # __delitem__ self.store['a'] = ts self.store['b'] = df del self.store['a'] del self.store['b'] self.assertEquals(len(self.store), 0) def test_remove_where(self): # non-existance crit1 = Term('index', '>', 'foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() self.store.put('wp', wp, table=True) self.store.remove('wp', [('column', ['A', 'D'])]) rs = self.store.select('wp') expected = wp.reindex(minor_axis=['B', 'C']) tm.assert_panel_equal(rs, expected) # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.remove('wp', []) # non - empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.assertRaises(Exception, self.store.remove, 'wp', ['foo']) # selectin non-table with a where self.store.put('wp2', wp, table=False) self.assertRaises(Exception, self.store.remove, 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('index', '>', date) crit2 = Term('column', ['A', 'D']) self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # test non-consecutive row removal wp = tm.makePanel() self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] date2 = wp.major_axis[5] date3 = [wp.major_axis[7], wp.major_axis[9]] crit1 = Term('index', date1) crit2 = Term('index', date2) crit3 = Term('index', date3) self.store.remove('wp2', where=[crit1]) self.store.remove('wp2', where=[crit2]) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] ma = list(wp.major_axis) for d in date1: ma.remove(d) ma.remove(date2) for d in date3: ma.remove(d) expected = wp.reindex(major=ma) tm.assert_panel_equal(result, expected) def test_terms(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) # some invalid terms terms = [ ['minor', ['A', 'B']], ['index', ['20121114']], ['index', ['20121114', '20121114']], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) result = self.store.select( 'wp', [Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # valid terms terms = [ dict(field='index', op='>', value='20121114'), ('index', '20121114'), ('index', '>', '20121114'), (('index', ['20121114', '20121114']), ), ('index', datetime(2012, 11, 14)), 'index>20121114', 'major>20121114', 'major_axis>20121114', (('minor', ['A', 'B']), ), (('minor_axis', ['A', 'B']), ), ((('minor_axis', ['A', 'B']), ), ), (('column', ['A', 'B']), ), ] for t in terms: self.store.select('wp', t) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) sp = p.to_sparse() self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0., 1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r: tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) def test_empty_series_frame(self): s0 = Series() s1 = Series(name='myseries') df0 = DataFrame() df1 = DataFrame(index=['a', 'b', 'c']) df2 = DataFrame(columns=['d', 'e', 'f']) self._check_roundtrip(s0, tm.assert_series_equal) self._check_roundtrip(s1, tm.assert_series_equal) self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_fixed_offset_tz(self): rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert (recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert (recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert (recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty # self._check_roundtrip(wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_select(self): wp = tm.makePanel() # put/select ok self.store.put('wp', wp, table=True) self.store.select('wp') # non-table ok (where = None) self.store.put('wp2', wp, table=False) self.store.select('wp2') # selectin non-table with a where self.assertRaises(Exception, self.store.select, 'wp2', ('column', ['A', 'D'])) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = ('index', '>=', date) crit2 = ('column', '=', ['A', 'D']) result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) result = self.store.select( 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index', '>=', date) crit2 = ('column', ['A', 'D']) crit3 = ('column', 'A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = Term('column', df.columns[:75]) result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_legacy_table_read(self): # legacy table types pth = curpath() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') store.select('df1') store.select('df2') store.select('wp1') store.close() def test_legacy_table_write(self): # legacy table types pth = curpath() df = tm.makeDataFrame() wp = tm.makePanel() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') self.assertRaises(Exception, store.append, 'df1', df) self.assertRaises(Exception, store.append, 'wp1', wp) store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): df = DataFrame({ 'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c'] }) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) def test_cant_write_multiindex_table(self): # for now, #1848 df = DataFrame( np.random.randn(10, 4), index=[np.arange(5).repeat(2), np.tile(np.arange(2), 5)]) self.assertRaises(Exception, self.store.put, 'foo', df, table=True)
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) self.assert_(set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.store['foo/bar'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) self.assert_('foo/bar' in self.store) self.assert_('/foo/bar' in self.store) self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) left = self.store.get('/a') right = self.store['/a'] tm.assert_series_equal(left, right) self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store['foo/bar/bah'] = df[:10] self.store['foo'] = df[:10] self.store['/foo'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) store.append('/df3', df[:10]) store.append('/df3', df[10:]) tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it import warnings import tables warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) # panel wp = tm.makePanel() store.append('wp1', wp.ix[:,:10,:]) store.append('wp1', wp.ix[:,10:,:]) tm.assert_panel_equal(store['wp1'], wp) # ndim p4d = tm.makePanel4D() store.append('p4d', p4d.ix[:,:,:10,:]) store.append('p4d', p4d.ix[:,:,10:,:]) tm.assert_panel4d_equal(store['p4d'], p4d) except: raise finally: store.close() os.remove(pth) def test_append_with_strings(self): wp = tm.makePanel() wp2 = wp.rename_axis(dict([ (x,"%s_extra" % x) for x in wp.minor_axis ]), axis = 2) self.store.append('s1', wp, min_itemsize = 20) self.store.append('s1', wp2) expected = concat([ wp, wp2], axis = 2) expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) # test dict format self.store.append('s2', wp, min_itemsize = { 'minor_axis' : 20 }) self.store.append('s2', wp2) expected = concat([ wp, wp2], axis = 2) expected = expected.reindex(minor_axis = sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s2'], expected) # apply the wrong field (similar to #1) self.store.append('s3', wp, min_itemsize = { 'major_axis' : 20 }) self.assertRaises(Exception, self.store.append, 's3') # test truncation of bigger strings self.store.append('s4', wp) self.assertRaises(Exception, self.store.append, 's4', wp2) def test_create_table_index(self): wp = tm.makePanel() self.store.append('p5', wp) self.store.create_table_index('p5') assert(self.store.handle.root.p5.table.cols.major_axis.is_indexed == True) assert(self.store.handle.root.p5.table.cols.minor_axis.is_indexed == False) df = tm.makeTimeDataFrame() self.store.append('f', df[:10]) self.store.append('f', df[10:]) self.store.create_table_index('f') # create twice self.store.create_table_index('f') # try to index a non-table self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') # try to change the version supports flag from pandas.io import pytables pytables._table_supports_index = False self.assertRaises(Exception, self.store.create_table_index, 'f') def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]}, index=date_range('1/1/2000', periods=3)) self.store.put('frame', df1, table=True) self.assertRaises(Exception, self.store.put, 'frame', df2, table=True, append=True) def test_table_values_dtypes_roundtrip(self): df1 = DataFrame({'a': [1, 2, 3]}, dtype = 'f8') self.store.append('df1', df1) assert df1.dtypes == self.store['df1'].dtypes df2 = DataFrame({'a': [1, 2, 3]}, dtype = 'i8') self.store.append('df2', df2) assert df2.dtypes == self.store['df2'].dtypes # incompatible dtype self.assertRaises(Exception, self.store.append, 'df2', df1) def test_table_mixed_dtypes(self): # frame def _make_one_df(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['bool3'] = True df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one_df() self.store.append('df1_mixed', df1) tm.assert_frame_equal(self.store.select('df1_mixed'), df1) # panel def _make_one_panel(): wp = tm.makePanel() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['ItemA'] > 0 wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p1 = _make_one_panel() self.store.append('p1_mixed', p1) tm.assert_panel_equal(self.store.select('p1_mixed'), p1) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) # pathing self.store['a'] = ts self.store['b/foo'] = df self.store.remove('foo') self.store.remove('b/foo') self.assertEquals(len(self.store), 1) self.store['a'] = ts self.store['b/foo'] = df self.store.remove('b') self.assertEquals(len(self.store), 1) # __delitem__ self.store['a'] = ts self.store['b'] = df del self.store['a'] del self.store['b'] self.assertEquals(len(self.store), 0) def test_remove_where(self): # non-existance crit1 = Term('index','>','foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() self.store.put('wp', wp, table=True) self.store.remove('wp', [('minor_axis', ['A', 'D'])]) rs = self.store.select('wp') expected = wp.reindex(minor_axis = ['B','C']) tm.assert_panel_equal(rs,expected) # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.remove('wp', []) # non - empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.assertRaises(Exception, self.store.remove, 'wp', ['foo']) # selectin non-table with a where self.store.put('wp2', wp, table=False) self.assertRaises(Exception, self.store.remove, 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('major_axis','>',date) crit2 = Term('minor_axis',['A', 'D']) self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # test non-consecutive row removal wp = tm.makePanel() self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] date2 = wp.major_axis[5] date3 = [wp.major_axis[7],wp.major_axis[9]] crit1 = Term('major_axis',date1) crit2 = Term('major_axis',date2) crit3 = Term('major_axis',date3) self.store.remove('wp2', where=[crit1]) self.store.remove('wp2', where=[crit2]) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] ma = list(wp.major_axis) for d in date1: ma.remove(d) ma.remove(date2) for d in date3: ma.remove(d) expected = wp.reindex(major = ma) tm.assert_panel_equal(result, expected) def test_terms(self): wp = tm.makePanel() p4d = tm.makePanel4D() self.store.put('wp', wp, table=True) self.store.put('p4d', p4d, table=True) # some invalid terms terms = [ [ 'minor', ['A','B'] ], [ 'index', ['20121114'] ], [ 'index', ['20121114', '20121114'] ], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) # panel result = self.store.select('wp',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']) ]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # p4d result = self.store.select('p4d',[ Term('major_axis<20000108'), Term('minor_axis', '=', ['A','B']) ]) expected = p4d.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel4d_equal(result, expected) # valid terms terms = [ dict(field = 'major_axis', op = '>', value = '20121114'), ('major_axis', '20121114'), ('major_axis', '>', '20121114'), (('major_axis', ['20121114','20121114']),), ('major_axis', datetime(2012,11,14)), 'major_axis>20121114', 'major_axis>20121114', 'major_axis>20121114', (('minor_axis', ['A','B']),), (('minor_axis', ['A','B']),), ((('minor_axis', ['A','B']),),), ] for t in terms: self.store.select('wp', t) self.store.select('p4d', t) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) sp = p.to_sparse() self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0.,1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r : tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) def test_empty_series_frame(self): s0 = Series() s1 = Series(name='myseries') df0 = DataFrame() df1 = DataFrame(index=['a', 'b', 'c']) df2 = DataFrame(columns=['d', 'e', 'f']) self._check_roundtrip(s0, tm.assert_series_equal) self._check_roundtrip(s1, tm.assert_series_equal) self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_fixed_offset_tz(self): rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty # self._check_roundtrip(wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_select(self): wp = tm.makePanel() # put/select ok self.store.put('wp', wp, table=True) self.store.select('wp') # non-table ok (where = None) self.store.put('wp2', wp, table=False) self.store.select('wp2') # selectin non-table with a where self.assertRaises(Exception, self.store.select, 'wp2', ('column', ['A', 'D'])) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = ('major_axis','>=',date) crit2 = ('minor_axis', '=', ['A', 'D']) result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) result = self.store.select('wp', [ 'major_axis>=20000124', ('minor_axis', '=', ['A','B']) ]) expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index','>=',date) crit2 = ('columns',['A', 'D']) crit3 = ('columns','A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = Term('columns', df.columns[:75]) result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_legacy_table_read(self): # legacy table types pth = curpath() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') store.select('df1') store.select('df2') store.select('wp1') store.close() def test_legacy_table_write(self): # legacy table types pth = curpath() df = tm.makeDataFrame() wp = tm.makePanel() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') self.assertRaises(Exception, store.append, 'df1', df) self.assertRaises(Exception, store.append, 'wp1', wp) store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): df = DataFrame({'a': [1,2,3], 'b': [1.,2.,3.], 'c': ['a', 'b', 'c']}) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) def test_cant_write_multiindex_table(self): # for now, #1848 df = DataFrame(np.random.randn(10, 4), index=[np.arange(5).repeat(2), np.tile(np.arange(2), 5)]) self.assertRaises(Exception, self.store.put, 'foo', df, table=True)
class TesttHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_len_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.assertEquals(len(self.store), 4) self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd'])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() repr(self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) self.assertRaises(AttributeError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.put('c', df[:10], table=True) self.store.append('c', df[10:]) tm.assert_frame_equal(self.store['c'], df) def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) def test_remove_where_not_exist(self): crit1 = { 'field' : 'index', 'op' : '>', 'value' : 'foo' } self.store.remove('a', where=[crit1]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0.,1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = DateRange('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self.assertRaises(ValueError, self._check_roundtrip, df[:0], tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # storing in Table not yet supported self.assertRaises(Exception, self.store.put, 'foo', df1, table=True) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty self.assertRaises(ValueError, self._check_roundtrip, wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } crit3 = { 'field' : 'column', 'value' : 'A' } result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = { 'field' : 'column', 'value' : df.columns[:75] } result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt)
text = file.read() text = json.loads(text) 转换成了一个字符串 json_normalize(text,'books') 查看图书信息表格,读取以books作为键的元素的值 ----5.8 HDF5格式 (二进制) =====分析大量数据====== HDF5库 from pandas.io.pytables import HDFStore 创建一个.h5文件,把 DataFrame对象存储 store = HDFStore('mydata.h5') store['obj1'] = frame 还可以吧多种数据结构的数据存储到一个 HDF5文件中 两种存储模式 ;fixed和table 后者有点慢 store.put('obj2',frame,format='table') store.select('obj2',where=['index>=10 and index <= 15']) 逆操作 store['obj2'] 5.9 pickle python对象序列化====== 序列化是指把对象的层级几个转换成字节流的过程 5.9.1 用 cPickle实现python对象序列化 import cpickle as pickle dumps()函数 pickled_data = pickle.dumps(data)
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_len_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.assertEquals(len(self.store), 4) self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd'])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() repr(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.put('c', df[:10], table=True) self.store.append('c', df[10:]) tm.assert_frame_equal(self.store['c'], df) def test_append_diff_item_order(self): wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) def test_remove_where_not_exist(self): crit1 = { 'field' : 'index', 'op' : '>', 'value' : 'foo' } self.store.remove('a', where=[crit1]) def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame()) for i in items)) sp = p.to_sparse() self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0.,1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r : tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) def test_empty_series_frame(self): s0 = Series() s1 = Series(name='myseries') df0 = DataFrame() df1 = DataFrame(index=['a', 'b', 'c']) df2 = DataFrame(columns=['d', 'e', 'f']) self._check_roundtrip(s0, tm.assert_series_equal) self._check_roundtrip(s1, tm.assert_series_equal) self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert(recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # storing in Table not yet supported self.assertRaises(Exception, self.store.put, 'foo', df1, table=True) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty # self._check_roundtrip(wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = { 'field' : 'index', 'op' : '>=', 'value' : date } crit2 = { 'field' : 'column', 'value' : ['A', 'D'] } crit3 = { 'field' : 'column', 'value' : 'A' } result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = { 'field' : 'column', 'value' : df.columns[:75] } result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] sorted_obj = _test_sort(obj) comparator(retrieved, sorted_obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): df = DataFrame({'a': [1,2,3], 'b': [1.,2.,3.], 'c': ['a', 'b', 'c']}) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) def test_cant_write_multiindex_table(self): # for now, #1848 df = DataFrame(np.random.randn(10, 4), index=[np.arange(5).repeat(2), np.tile(np.arange(2), 5)]) self.assertRaises(Exception, self.store.put, 'foo', df, table=True)
class TestHDFStore(unittest.TestCase): path = '__test__.h5' scratchpath = '__scratch__.h5' def setUp(self): self.store = HDFStore(self.path) def tearDown(self): self.store.close() os.remove(self.path) def test_factory_fun(self): try: with get_store(self.scratchpath) as tbl: raise ValueError('blah') except ValueError: pass with get_store(self.scratchpath) as tbl: tbl['a'] = tm.makeDataFrame() with get_store(self.scratchpath) as tbl: self.assertEquals(len(tbl), 1) self.assertEquals(type(tbl['a']), DataFrame) os.remove(self.scratchpath) def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) self.assert_( set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar' ])) def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store) def test_contains(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() self.store['foo/bar'] = tm.makeDataFrame() self.assert_('a' in self.store) self.assert_('b' in self.store) self.assert_('c' not in self.store) self.assert_('foo/bar' in self.store) self.assert_('/foo/bar' in self.store) self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) def test_versioning(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10') # write a file and wipe its versioning self.store.remove('df2') self.store.append('df2', df) self.store.get_node('df2')._v_attrs.pandas_version = None self.store.select('df2') self.store.select('df2', [Term('index', '>', df.index[2])]) def test_meta(self): raise nose.SkipTest('no meta') meta = {'foo': ['I love pandas ']} s = tm.makeTimeSeries() s.meta = meta self.store['a'] = s self.assert_(self.store['a'].meta == meta) df = tm.makeDataFrame() df.meta = meta self.store['b'] = df self.assert_(self.store['b'].meta == meta) # this should work, but because slicing doesn't propgate meta it doesn self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) results = self.store['df1'] #self.assert_(getattr(results,'meta',None) == meta) # no meta df = tm.makeDataFrame() self.store['b'] = df self.assert_(hasattr(self.store['b'], 'meta') == False) def test_reopen_handle(self): self.store['a'] = tm.makeTimeSeries() self.store.open('w', warn=False) self.assert_(self.store.handle.isopen) self.assertEquals(len(self.store), 0) def test_flush(self): self.store['a'] = tm.makeTimeSeries() self.store.flush() def test_get(self): self.store['a'] = tm.makeTimeSeries() left = self.store.get('a') right = self.store['a'] tm.assert_series_equal(left, right) left = self.store.get('/a') right = self.store['/a'] tm.assert_series_equal(left, right) self.assertRaises(KeyError, self.store.get, 'b') def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store['foo/bar/bah'] = df[:10] self.store['foo'] = df[:10] self.store['/foo'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c']) def test_put_string_index(self): index = Index( ["I am a very long string index: %s" % i for i in range(20)]) s = Series(np.arange(20), index=index) df = DataFrame({'A': s, 'B': s}) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) self.store['b'] = df tm.assert_frame_equal(self.store['b'], df) # mixed length index = Index( ['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)]) s = Series(np.arange(21), index=index) df = DataFrame({'A': s, 'B': s}) self.store['a'] = s tm.assert_series_equal(self.store['a'], s) self.store['b'] = df tm.assert_frame_equal(self.store['b'], df) def test_put_compression(self): df = tm.makeTimeDataFrame() self.store.put('c', df, table=True, compression='zlib') tm.assert_frame_equal(self.store['c'], df) # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df) def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) def test_append(self): df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) tm.assert_frame_equal(self.store['df1'], df) self.store.remove('df2') self.store.put('df2', df[:10], table=True) self.store.append('df2', df[10:]) tm.assert_frame_equal(self.store['df2'], df) self.store.remove('df3') self.store.append('/df3', df[:10]) self.store.append('/df3', df[10:]) tm.assert_frame_equal(self.store['df3'], df) # this is allowed by almost always don't want to do it warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) self.store.remove('/df3 foo') self.store.append('/df3 foo', df[:10]) self.store.append('/df3 foo', df[10:]) tm.assert_frame_equal(self.store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) # panel wp = tm.makePanel() self.store.remove('wp1') self.store.append('wp1', wp.ix[:, :10, :]) self.store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(self.store['wp1'], wp) # ndim p4d = tm.makePanel4D() self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :]) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using axis labels self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=['items', 'major_axis', 'minor_axis']) self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=['items', 'major_axis', 'minor_axis']) tm.assert_panel4d_equal(self.store['p4d'], p4d) # test using differnt number of items on each axis p4d2 = p4d.copy() p4d2['l4'] = p4d['l1'] p4d2['l5'] = p4d['l1'] self.store.remove('p4d2') self.store.append('p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) tm.assert_panel4d_equal(self.store['p4d2'], p4d2) # test using differt order of items on the non-index axes self.store.remove('wp1') wp_append1 = wp.ix[:, :10, :] self.store.append('wp1', wp_append1) wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1]) self.store.append('wp1', wp_append2) tm.assert_panel_equal(self.store['wp1'], wp) def test_append_frame_column_oriented(self): # column oriented df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df.ix[:, :2], axes=['columns']) self.store.append('df1', df.ix[:, 2:]) tm.assert_frame_equal(self.store['df1'], df) result = self.store.select('df1', 'columns=A') expected = df.reindex(columns=['A']) tm.assert_frame_equal(expected, result) # this isn't supported self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index', '>', df.index[4]))) # selection on the non-indexable result = self.store.select( 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result) def test_ndim_indexables(self): """ test using ndim tables in new ways""" p4d = tm.makePanel4D() def check_indexers(key, indexers): for i, idx in enumerate(indexers): self.assert_( getattr( getattr(self.store.root, key).table.description, idx)._v_pos == i) # append then change (will take existing schema) indexers = ['items', 'major_axis', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store.select('p4d'), p4d) check_indexers('p4d', indexers) # same as above, but try to append with differnt axes self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=['labels', 'items', 'major_axis']) tm.assert_panel4d_equal(self.store.select('p4d'), p4d) check_indexers('p4d', indexers) # pass incorrect number of axes self.store.remove('p4d') self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[:, :, :10, :], axes=['major_axis', 'minor_axis']) # different than default indexables #1 indexers = ['labels', 'major_axis', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d', indexers) # different than default indexables #2 indexers = ['major_axis', 'labels', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d', indexers) # partial selection result = self.store.select('p4d', ['labels=l1']) expected = p4d.reindex(labels=['l1']) tm.assert_panel4d_equal(result, expected) # partial selection2 result = self.store.select( 'p4d', [Term('labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) expected = p4d.reindex(labels=['l1'], items=['ItemA'], minor_axis=['B']) tm.assert_panel4d_equal(result, expected) # non-existant partial selection result = self.store.select( 'p4d', [Term('labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) tm.assert_panel4d_equal(result, expected) def test_append_with_strings(self): wp = tm.makePanel() wp2 = wp.rename_axis(dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) self.store.append('s1', wp, min_itemsize=20) self.store.append('s1', wp2) expected = concat([wp, wp2], axis=2) expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s1'], expected) # test dict format self.store.append('s2', wp, min_itemsize={'minor_axis': 20}) self.store.append('s2', wp2) expected = concat([wp, wp2], axis=2) expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) tm.assert_panel_equal(self.store['s2'], expected) # apply the wrong field (similar to #1) self.store.append('s3', wp, min_itemsize={'major_axis': 20}) self.assertRaises(Exception, self.store.append, 's3') # test truncation of bigger strings self.store.append('s4', wp) self.assertRaises(Exception, self.store.append, 's4', wp2) # avoid truncation on elements df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) self.store.append('df_big', df, min_itemsize={'values': 1024}) tm.assert_frame_equal(self.store.select('df_big'), df) # appending smaller string ok df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) self.store.append('df_big', df2) expected = concat([df, df2]) tm.assert_frame_equal(self.store.select('df_big'), expected) # avoid truncation on elements df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) self.store.append('df_big2', df, min_itemsize={'values': 10}) tm.assert_frame_equal(self.store.select('df_big2'), df) # bigger string on next append self.store.append('df_new', df, min_itemsize={'values': 16}) df_new = DataFrame([[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) self.assertRaises(Exception, self.store.append, 'df_new', df_new) def test_create_table_index(self): wp = tm.makePanel() self.store.append('p5', wp) self.store.create_table_index('p5') assert ( self.store.handle.root.p5.table.cols.major_axis.is_indexed == True) assert (self.store.handle.root.p5.table.cols.minor_axis.is_indexed == False) # default optlevels assert (self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) assert (self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') # let's change the indexing scheme self.store.create_table_index('p5') assert (self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 6) assert (self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') self.store.create_table_index('p5', optlevel=9) assert (self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) assert (self.store.handle.root.p5.table.cols.major_axis.index.kind == 'medium') self.store.create_table_index('p5', kind='full') assert (self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 9) assert (self.store.handle.root.p5.table.cols.major_axis.index.kind == 'full') self.store.create_table_index('p5', optlevel=1, kind='light') assert (self.store.handle.root.p5.table.cols.major_axis.index.optlevel == 1) assert (self.store.handle.root.p5.table.cols.major_axis.index.kind == 'light') df = tm.makeTimeDataFrame() self.store.append('f', df[:10]) self.store.append('f', df[10:]) self.store.create_table_index('f') # try to index a non-table self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') # try to change the version supports flag from pandas.io import pytables pytables._table_supports_index = False self.assertRaises(Exception, self.store.create_table_index, 'f') # test out some versions original = tables.__version__ for v in ['2.2', '2.2b']: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v self.assertRaises(Exception, self.store.create_table_index, 'f') for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]: pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = v self.store.create_table_index('f') pytables._table_mod = None pytables._table_supports_index = False tables.__version__ = original def test_big_table(self): raise nose.SkipTest('no big table') # create and write a big table wp = Panel(np.random.randn(20, 1000, 1000), items=['Item%s' % i for i in xrange(20)], major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%s' % i for i in xrange(1000)]) wp.ix[:, 100:200, 300:400] = np.nan try: store = HDFStore(self.scratchpath) store._debug_memory = True store.append('wp', wp) recons = store.select('wp') finally: store.close() os.remove(self.scratchpath) def test_append_diff_item_order(self): raise nose.SkipTest('append diff item order') wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] self.store.put('panel', wp1, table=True) self.assertRaises(Exception, self.store.put, 'panel', wp2, append=True) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]}, index=date_range('1/1/2000', periods=3)) self.store.put('frame', df1, table=True) self.assertRaises(Exception, self.store.put, 'frame', df2, table=True, append=True) def test_table_values_dtypes_roundtrip(self): df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') self.store.append('df1', df1) assert df1.dtypes == self.store['df1'].dtypes df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') self.store.append('df2', df2) assert df2.dtypes == self.store['df2'].dtypes # incompatible dtype self.assertRaises(Exception, self.store.append, 'df2', df1) def test_table_mixed_dtypes(self): # frame def _make_one_df(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['bool3'] = True df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one_df() self.store.append('df1_mixed', df1) tm.assert_frame_equal(self.store.select('df1_mixed'), df1) # panel def _make_one_panel(): wp = tm.makePanel() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['ItemA'] > 0 wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p1 = _make_one_panel() self.store.append('p1_mixed', p1) tm.assert_panel_equal(self.store.select('p1_mixed'), p1) # ndim def _make_one_p4d(): wp = tm.makePanel4D() wp['obj1'] = 'foo' wp['obj2'] = 'bar' wp['bool1'] = wp['l1'] > 0 wp['bool2'] = wp['l2'] > 0 wp['int1'] = 1 wp['int2'] = 2 return wp.consolidate() p4d = _make_one_p4d() self.store.append('p4d_mixed', p4d) tm.assert_panel4d_equal(self.store.select('p4d_mixed'), p4d) def test_remove(self): ts = tm.makeTimeSeries() df = tm.makeDataFrame() self.store['a'] = ts self.store['b'] = df self.store.remove('a') self.assertEquals(len(self.store), 1) tm.assert_frame_equal(df, self.store['b']) self.store.remove('b') self.assertEquals(len(self.store), 0) # pathing self.store['a'] = ts self.store['b/foo'] = df self.store.remove('foo') self.store.remove('b/foo') self.assertEquals(len(self.store), 1) self.store['a'] = ts self.store['b/foo'] = df self.store.remove('b') self.assertEquals(len(self.store), 1) # __delitem__ self.store['a'] = ts self.store['b'] = df del self.store['a'] del self.store['b'] self.assertEquals(len(self.store), 0) def test_remove_where(self): # non-existance crit1 = Term('index', '>', 'foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() self.store.put('wp', wp, table=True) self.store.remove('wp', [('minor_axis', ['A', 'D'])]) rs = self.store.select('wp') expected = wp.reindex(minor_axis=['B', 'C']) tm.assert_panel_equal(rs, expected) # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) # deleted number (entire table) n = self.store.remove('wp', []) assert (n == 120) # non - empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.assertRaises(Exception, self.store.remove, 'wp', ['foo']) # selectin non-table with a where #self.store.put('wp2', wp, table=False) #self.assertRaises(Exception, self.store.remove, # 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): wp = tm.makePanel() # group row removal date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) crit4 = Term('major_axis', date4) self.store.put('wp3', wp, table=True) n = self.store.remove('wp3', where=[crit4]) assert (n == 36) result = self.store.select('wp3') expected = wp.reindex(major_axis=wp.major_axis - date4) tm.assert_panel_equal(result, expected) # upper half self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('major_axis', '>', date) crit2 = Term('minor_axis', ['A', 'D']) n = self.store.remove('wp', where=[crit1]) assert (n == 56) n = self.store.remove('wp', where=[crit2]) assert (n == 32) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # individual row elements self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] crit1 = Term('major_axis', date1) self.store.remove('wp2', where=[crit1]) result = self.store.select('wp2') expected = wp.reindex(major_axis=wp.major_axis - date1) tm.assert_panel_equal(result, expected) date2 = wp.major_axis[5] crit2 = Term('major_axis', date2) self.store.remove('wp2', where=[crit2]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis - date1 - Index([date2])) tm.assert_panel_equal(result, expected) date3 = [wp.major_axis[7], wp.major_axis[9]] crit3 = Term('major_axis', date3) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3)) tm.assert_panel_equal(result, expected) # corners self.store.put('wp4', wp, table=True) n = self.store.remove( 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) result = self.store.select('wp4') tm.assert_panel_equal(result, wp) def test_terms(self): wp = tm.makePanel() p4d = tm.makePanel4D() self.store.put('wp', wp, table=True) self.store.put('p4d', p4d, table=True) # some invalid terms terms = [ ['minor', ['A', 'B']], ['index', ['20121114']], ['index', ['20121114', '20121114']], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) # panel result = self.store.select( 'wp', [Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # p4d result = self.store.select('p4d', [ Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B']), Term('items', '=', ['ItemA', 'ItemB']) ]) expected = p4d.truncate(after='20000108').reindex( minor=['A', 'B'], items=['ItemA', 'ItemB']) tm.assert_panel4d_equal(result, expected) # valid terms terms = [ dict(field='major_axis', op='>', value='20121114'), ('major_axis', '20121114'), ('major_axis', '>', '20121114'), (('major_axis', ['20121114', '20121114']), ), ('major_axis', datetime(2012, 11, 14)), 'major_axis>20121114', 'major_axis>20121114', 'major_axis>20121114', (('minor_axis', ['A', 'B']), ), (('minor_axis', ['A', 'B']), ), ((('minor_axis', ['A', 'B']), ), ), (('items', ['ItemA', 'ItemB']), ), ('items=ItemA'), ] for t in terms: self.store.select('wp', t) self.store.select('p4d', t) # valid for p4d only terms = [ (('labels', '=', ['l1', 'l2']), ), Term('labels', '=', ['l1', 'l2']), ] for t in terms: self.store.select('p4d', t) def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) def test_sparse_panel(self): items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) sp = p.to_sparse() self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) def test_tuple_index(self): # GH #492 col = np.arange(10) idx = [(0., 1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): values = np.random.randn(2) func = lambda l, r: tm.assert_series_equal(l, r, True, True, True) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) ser = Series(values, [datetime.today(), 0]) self._check_roundtrip(ser, func) ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) from datetime import date ser = Series(values, [date.today(), 'a']) self._check_roundtrip(ser, func) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) ser = Series(values, [datetime(2012, 1, 1), datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') def test_frame(self): df = tm.makeDataFrame() # put in some random NAs df.values[0, 0] = np.nan df.values[5, 3] = np.nan self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) # not consolidated df['foo'] = np.random.randn(len(df)) self.store['df'] = df recons = self.store['df'] self.assert_(recons._data.is_consolidated()) # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) def test_empty_series_frame(self): s0 = Series() s1 = Series(name='myseries') df0 = DataFrame() df1 = DataFrame(index=['a', 'b', 'c']) df2 = DataFrame(columns=['d', 'e', 'f']) self._check_roundtrip(s0, tm.assert_series_equal) self._check_roundtrip(s1, tm.assert_series_equal) self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) def test_can_serialize_dates(self): rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_fixed_offset_tz(self): rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) finally: store.close() os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self._check_roundtrip(frame, tm.assert_frame_equal) self._check_roundtrip(frame.T, tm.assert_frame_equal) self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored try: store = HDFStore(self.scratchpath) store['frame'] = frame recons = store['frame'] assert (recons.index.names == ['foo', 'bar']) finally: store.close() os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' try: store = HDFStore(self.scratchpath) store['frame'] = df recons = store['frame'] assert (recons.index.name == 'foo') finally: store.close() os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] try: store = HDFStore(self.scratchpath) store['series'] = series recons = store['series'] assert (recons.name == 'A') finally: store.close() os.remove(self.scratchpath) def test_store_mixed(self): def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' df['bool1'] = df['A'] > 0 df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 return df.consolidate() df1 = _make_one() df2 = _make_one() self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) self.store['obj'] = df1 tm.assert_frame_equal(self.store['obj'], df1) self.store['obj'] = df2 tm.assert_frame_equal(self.store['obj'], df2) # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['bool1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=True) self._check_roundtrip(df1, tm.assert_frame_equal, compression=True) def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() try: store = HDFStore(self.scratchpath) store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) finally: store.close() os.remove(self.scratchpath) def test_long(self): def _check(left, right): tm.assert_panel_equal(left.to_panel(), right.to_panel()) wp = tm.makePanel() self._check_roundtrip(wp.to_frame(), _check) # empty # self._check_roundtrip(wp.to_frame()[:0], _check) def test_longpanel(self): pass def test_overwrite_node(self): self.store['a'] = tm.makeTimeDataFrame() ts = tm.makeTimeSeries() self.store['a'] = ts tm.assert_series_equal(self.store['a'], ts) def test_select(self): wp = tm.makePanel() # put/select ok self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.select('wp') # non-table ok (where = None) self.store.remove('wp') self.store.put('wp2', wp, table=False) self.store.select('wp2') # selection on the non-indexable with a large number of columns wp = Panel(np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) self.store.remove('wp') self.store.append('wp', wp) items = ['Item%03d' % i for i in xrange(80)] result = self.store.select('wp', Term('items', items)) expected = wp.reindex(items=items) tm.assert_panel_equal(expected, result) # selectin non-table with a where #self.assertRaises(Exception, self.store.select, # 'wp2', ('column', ['A', 'D'])) def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = ('major_axis', '>=', date) crit2 = ('minor_axis', '=', ['A', 'D']) result = self.store.select('wp', [crit1, crit2]) expected = wp.truncate(before=date).reindex(minor=['A', 'D']) tm.assert_panel_equal(result, expected) result = self.store.select( 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index', '>=', date) crit2 = ('columns', ['A', 'D']) crit3 = ('columns', 'A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # other indicies for a frame # integer df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) self.store.append('df_int', df) self.store.select( 'df_int', [Term("index<10"), Term("columns", "=", ["A"])]) df = DataFrame( dict(A=np.random.rand(20), B=np.random.rand(20), index=np.arange(20, dtype='f8'))) self.store.append('df_float', df) self.store.select('df_float', [Term("index<10.0"), Term("columns", "=", ["A"])]) # can't select if not written as table #self.store['frame'] = df #self.assertRaises(Exception, self.store.select, # 'frame', [crit1, crit2]) def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = Term('columns', df.columns[:75]) result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) finally: store.close() os.remove(self.scratchpath) def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor store = HDFStore(self.scratchpath, 'w', **options) try: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) finally: store.close() os.remove(self.scratchpath) def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') store['a'] store['b'] store['c'] store['d'] store.close() def test_legacy_table_read(self): # legacy table types pth = curpath() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') store.select('df1') store.select('df2') store.select('wp1') # force the frame store.select('df2', typ='legacy_frame') # old version (this still throws an exception though) import warnings warnings.filterwarnings('ignore', category=IncompatibilityWarning) self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) warnings.filterwarnings('always', category=IncompatibilityWarning) store.close() def test_legacy_table_write(self): # legacy table types pth = curpath() df = tm.makeDataFrame() wp = tm.makePanel() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a') self.assertRaises(Exception, store.append, 'df1', df) self.assertRaises(Exception, store.append, 'wp1', wp) store.close() def test_store_datetime_fractional_secs(self): dt = datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) self.store['a'] = series self.assertEquals(self.store['a'].index[0], dt) def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) def test_store_datetime_mixed(self): df = DataFrame({ 'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c'] }) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) def test_cant_write_multiindex_table(self): # for now, #1848 df = DataFrame( np.random.randn(10, 4), index=[np.arange(5).repeat(2), np.tile(np.arange(2), 5)]) self.assertRaises(Exception, self.store.put, 'foo', df, table=True)