def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=TimeSeries(np.arange(10).astype(np.int64), index=date_range('20130101',periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), per=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame(dict(A=series['float'], B=series['float'] + 1)), int=DataFrame(dict(A=series['int'], B=series['int'] + 1)), mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])), mi=DataFrame(dict(A=np.arange(5).astype(np.float64), B=np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), mixed_dup=mixed_dup_df) mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] panel = dict(float=Panel(dict(ItemA=frame['float'], ItemB=frame['float'] + 1)), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < '0.19.2': timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']])), names=[u'first', u'second'])) series = dict(float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict(float=DataFrame({u'A': series[u'float'], u'B': series[u'float'] + 1}), int=DataFrame({u'A': series[u'int'], u'B': series[u'int'] + 1}), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame({u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64)}), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET')}, index=range(5)) ) mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1}), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def create_data(): """ create the pickle data """ from distutils.version import LooseVersion import numpy as np import pandas from pandas import (Series,TimeSeries,DataFrame,Panel, SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel, Index,MultiIndex,PeriodIndex, date_range,period_range,bdate_range,Timestamp,Categorical) nan = np.nan data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E' : [0., 1, Timestamp('20100101'),'foo',2.], } index = dict(int = Index(np.arange(10)), date = date_range('20130101',periods=10), period = period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float = Series(data['A']), int = Series(data['B']), mixed = Series(data['E']), ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)), mi = Series(np.arange(5).astype(np.float64),index=MultiIndex.from_tuples(tuple(zip(*[[1,1,2,2,2], [3,4,3,4,5]])), names=['one','two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz']))) frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])), mi = DataFrame(dict(A = np.arange(5).astype(np.float64), B = np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple(zip(*[['bar','bar','baz','baz','baz'], ['one','two','one','two','three']])), names=['first','second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), ) panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)), dup = Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A'])) if LooseVersion(pandas.__version__) >= '0.14.1': # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and # panels if their columns/items were non-unique. mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] frame['mixed_dup'] = mixed_dup_df panel['mixed_dup'] = mixed_dup_panel return dict( series = series, frame = frame, panel = panel, index = index, mi = mi, sp_series = dict(float = _create_sp_series(), ts = _create_sp_tsseries()), sp_frame = dict(float = _create_sp_frame()) )
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101')) if LooseVersion(pandas.__version__) >= '0.17.0': scalars['period'] = Period('2012', 'M') index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=TimeSeries(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz']))) if LooseVersion(pandas.__version__) >= '0.17.0': series['period'] = Series([Period('2000Q1')] * 5) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame(dict(A=series['float'], B=series['float'] + 1)), int=DataFrame(dict(A=series['int'], B=series['int'] + 1)), mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])), mi=DataFrame(dict(A=np.arange(5).astype(np.float64), B=np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame( dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), mixed_dup=mixed_dup_df) mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] panel = dict(float=Panel( dict(ItemA=frame['float'], ItemB=frame['float'] + 1)), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5))) mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']])), names=[u'first', u'second'])) series = dict(float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict(float=DataFrame({u'A': series[u'float'], u'B': series[u'float'] + 1}), int=DataFrame({u'A': series[u'int'], u'B': series[u'int'] + 1}), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame({u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64)}), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET')}, index=range(5)), dt_mixed2_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) with catch_warnings(record=True): filterwarnings("ignore", "\\nPanel", FutureWarning) mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1}), dup=Panel( np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = {'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1)} return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
frame = dict(float=DataFrame(dict(A=series['float'], B=series['float'] + 1)), int=DataFrame(dict(A=series['int'], B=series['int'] + 1)), mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])), mi=DataFrame(dict(A=np.arange(5).astype(np.float64), B=np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), mixed_dup=mixed_dup_df) mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] panel = dict(float=Panel(dict(ItemA=frame['float'], ItemB=frame['float'] + 1)), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']])), names=[u'first', u'second'])) series = dict(float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict(float=DataFrame({u'A': series[u'float'], u'B': series[u'float'] + 1}), int=DataFrame({u'A': series[u'int'], u'B': series[u'int'] + 1}), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame({u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64)}), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET')}, index=range(5)), dt_mixed2_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) with catch_warnings(record=True): mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1}), dup=Panel( np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < '0.19.2': timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = {'DateOffset': DateOffset(years=1), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Hour': Hour(1), 'Minute': Minute(1)} return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)