def test_resample_5minute(self):
        rng = period_range('1/1/2000', '1/5/2000', freq='T')
        ts = TimeSeries(np.random.randn(len(rng)), index=rng)

        result = ts.resample('5min')
        expected = ts.to_timestamp().resample('5min')
        assert_series_equal(result, expected)
示例#2
0
def save2csv(r, csvpath=None, fileNamePrefix=''):
    """
    Parse and save to csv
    """

    # Create Dataframe
    try:
        d = {}
        for tup in r.json():
            d[dt.fromtimestamp(tup[0])] = tup[1]
        #pdb.set_trace()
        Ts = TimeSeries(data=d)
        # this line gives an error.  Should be checked, but for now I keep the nan's
        # Ts = Ts[Ts != 'nan']

    except:
        print "-------> Problem with Flukso data parsing <-------"
        raise

    # save to file
    if csvpath is None:
        csvpath = os.getcwd()
    s = strftime("%Y-%m-%d_%H-%M-%S", Ts.index[0].timetuple())
    e = strftime("%Y-%m-%d_%H-%M-%S", Ts.index[-1].timetuple())
    Ts.to_csv(
        os.path.join(csvpath,
                     fileNamePrefix + '_FROM_' + s + '_TO_' + e + '.csv'))
示例#3
0
    def test_resample_5minute(self):
        rng = period_range('1/1/2000', '1/5/2000', freq='T')
        ts = TimeSeries(np.random.randn(len(rng)), index=rng)

        result = ts.resample('5min')
        expected = ts.to_timestamp().resample('5min')
        assert_series_equal(result, expected)
示例#4
0
def save2csv(r, csvpath=None, fileNamePrefix=''):
    """
    Parse and save to csv
    """
    
    
    # Create Dataframe   
    try:
        d = {}
        for tup in r.json():
            d[dt.fromtimestamp(tup[0])] = tup[1]
        #pdb.set_trace()
        Ts = TimeSeries(data=d)
        # this line gives an error.  Should be checked, but for now I keep the nan's        
        # Ts = Ts[Ts != 'nan']
        
    except:
        print "-------> Problem with Flukso data parsing <-------"
        raise

    # save to file
    if csvpath is None:
        csvpath = os.getcwd()
    s = strftime("%Y-%m-%d_%H-%M-%S",Ts.index[0].timetuple())    
    e = strftime("%Y-%m-%d_%H-%M-%S",Ts.index[-1].timetuple())
    Ts.to_csv(os.path.join(csvpath, fileNamePrefix + '_FROM_' + s + 
                                    '_TO_' + e + '.csv'))    
示例#5
0
 def test_pad_nan(self):
     x = TimeSeries([np.nan, 1., np.nan, 3., np.nan],
                    ['z', 'a', 'b', 'c', 'd'], dtype=float)
     x = x.fillna(method='pad')
     expected = TimeSeries([np.nan, 1.0, 1.0, 3.0, 3.0],
                             ['z', 'a', 'b', 'c', 'd'], dtype=float)
     assert_series_equal(x[1:], expected[1:])
     self.assert_(np.isnan(x[0]), np.isnan(expected[0]))
示例#6
0
    def test_median(self):
        self.assertAlmostEqual(np.median(self.ts), self.ts.median())

        ts = self.ts.copy()
        ts[::2] = np.NaN

        self.assertAlmostEqual(np.median(ts.valid()), ts.median())

        # test with integers, test failure
        int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10))
        self.assertAlmostEqual(np.median(int_ts), int_ts.median())
示例#7
0
文件: mypandas.py 项目: mrayson/soda
    def __init__(self, data, dtime,**kwargs):
	"""
	Time series w/ specific IO methods
	"""
        self.__dict__.update(kwargs)

        TimeSeries.__init__(self, data, index=dtime)
	#super(ObsTimeSeries,self).__init__(data,index=dtime)

	# Time coordinates
	self.nt = self.index.shape
	self.tsec = othertime.SecondsSince(self.index,\
		basetime = pd.datetime(self.baseyear,1,1))
示例#8
0
    def __init__(self, data, dtime, **kwargs):
        """
	Time series w/ specific IO methods
	"""
        self.__dict__.update(kwargs)

        TimeSeries.__init__(self, data, index=dtime)
        #super(ObsTimeSeries,self).__init__(data,index=dtime)

        # Time coordinates
        self.nt = self.index.shape
        self.tsec = othertime.SecondsSince(self.index,\
         basetime = pd.datetime(self.baseyear,1,1))
示例#9
0
文件: dividend.py 项目: glei/datafeed
    def _split(self, frame):
        if self.share_afterward == 1:
            return

        splits = [self.share_afterward, 1.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(adj_day)
        indexes.append(datetime.date.today())
        
        splits = TimeSeries(splits, index=indexes)
        ri_splits = splits.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] / ri_splits
示例#10
0
文件: dividend.py 项目: glei/datafeed
    def _divide(self, frame):
        if self.cash_afterward == 0:
            return

        cashes = [self.cash_afterward, 0.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(adj_day)
        indexes.append(datetime.date.today())
        
        cashes = TimeSeries(cashes, index=indexes)
        ri_cashes = cashes.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] - ri_cashes
示例#11
0
    def _split(self, frame):
        if self.share_afterward == 1:
            return

        splits = [self.share_afterward, 1.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(adj_day)
        indexes.append(datetime.date.today())

        splits = TimeSeries(splits, index=indexes)
        ri_splits = splits.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] / ri_splits
示例#12
0
    def _divide(self, frame):
        if self.cash_afterward == 0:
            return

        cashes = [self.cash_afterward, 0.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(adj_day)
        indexes.append(datetime.date.today())

        cashes = TimeSeries(cashes, index=indexes)
        ri_cashes = cashes.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] - ri_cashes
示例#13
0
    def test_resample_fill_missing(self):
        rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')

        s = TimeSeries(np.random.randn(4), index=rng)

        stamps = s.to_timestamp()

        filled = s.resample('A')
        expected = stamps.resample('A').to_period('A')
        assert_series_equal(filled, expected)

        filled = s.resample('A', fill_method='ffill')
        expected = stamps.resample('A', fill_method='ffill').to_period('A')
        assert_series_equal(filled, expected)
示例#14
0
    def test_resample_fill_missing(self):
        rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')

        s = TimeSeries(np.random.randn(4), index=rng)

        stamps = s.to_timestamp()

        filled = s.resample('A')
        expected = stamps.resample('A').to_period('A')
        assert_series_equal(filled, expected)

        filled = s.resample('A', fill_method='ffill')
        expected = stamps.resample('A', fill_method='ffill').to_period('A')
        assert_series_equal(filled, expected)
示例#15
0
	def setUp(self):
		""" Building test case scaffolding. """
		fore.forecast = Mock()
		fore.graphics = Mock()
		self.config_file = Mock()
		seriesidx = PeriodIndex(start=ctime(10000), periods=10)
		self.tseries = TimeSeries(data=range(10), index=seriesidx)
示例#16
0
def test():
    """DataFrame editor test"""
    from numpy import nan

    df1 = DataFrame([
                     [True, "bool"],
                     [1+1j, "complex"],
                     ['test', "string"],
                     [1.11, "float"],
                     [1, "int"],
                     [np.random.rand(3, 3), "Unkown type"],
                     ["Large value", 100],
                     ["áéí", "unicode"]
                    ],
                    index=['a', 'b', nan, nan, nan, 'c',
                           "Test global max", 'd'],
                    columns=[nan, 'Type'])
    out = test_edit(df1)
    print("out:", out)
    out = test_edit(df1.iloc[0])
    print("out:", out)
    df1 = DataFrame(np.random.rand(100001, 10))
    # Sorting large DataFrame takes time
    df1.sort(columns=[0, 1], inplace=True)
    out = test_edit(df1)
    print("out:", out)
    out = test_edit(TimeSeries(np.arange(10)))
    print("out:", out)
    return out
示例#17
0
文件: api.py 项目: akshatknsl/synaps
    def get_metric_statistics(self,
                              project_id,
                              end_time,
                              metric_name,
                              namespace,
                              period,
                              start_time,
                              statistics,
                              unit=None,
                              dimensions=None):
        """
        입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다.
        """
        def to_datapoint(df, idx):
            datapoint = df.ix[idx].dropna()
            if len(datapoint):
                return idx, datapoint

        end_idx = end_time.replace(second=0, microsecond=0)
        start_idx = start_time.replace(second=0, microsecond=0)
        start_ana_idx = start_idx - datetools.Minute() * (period / 60)
        daterange = DateRange(start_idx, end_idx, offset=datetools.Minute())
        daterange_ana = DateRange(start_ana_idx,
                                  end_idx,
                                  offset=datetools.Minute())

        # load default unit for metric from database
        if unit == "None" or not unit:
            metric_key = self.cass.get_metric_key(project_id=project_id,
                                                  namespace=namespace,
                                                  metric_name=metric_name,
                                                  dimensions=dimensions)

            if metric_key:
                unit = self.cass.get_metric_unit(metric_key)
            else:
                unit = "None"

        # load statistics data from database
        stats = self.cass.get_metric_statistics(project_id=project_id,
                                                namespace=namespace,
                                                metric_name=metric_name,
                                                start_time=start_ana_idx,
                                                end_time=end_time,
                                                period=period,
                                                statistics=statistics,
                                                dimensions=dimensions)

        period = period / 60  # convert sec to min
        stat = DataFrame(index=daterange)

        for statistic, series in zip(statistics, stats):
            func = self.ROLLING_FUNC_MAP[statistic]
            ts = TimeSeries(series, index=daterange_ana)
            rolled_ts = func(ts, period, min_periods=0)
            stat[statistic] = rolled_ts.ix[::period]
            LOG.debug("stat %s\n%s" % (statistic, stat[statistic]))

        ret = filter(None, (to_datapoint(stat, i) for i in stat.index))
        return ret, unit
示例#18
0
 def attach_ynames(self, result):
     squeezed = result.squeeze()
     # May be zero-dim, for example in the case of forecast one step in tsa
     if squeezed.ndim < 2:
         return TimeSeries(squeezed, name=self.ynames)
     else:
         return DataFrame(result, columns=self.ynames)
示例#19
0
def test_ar_select_order():
    # 2118
    np.random.seed(12345)
    y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100)
    ts = TimeSeries(y, index=DatetimeIndex(start='1/1/1990', periods=100,
                                           freq='M'))
    ar = AR(ts)
    res = ar.select_order(maxlag=12, ic='aic')
    assert_(res == 2)
示例#20
0
def create_data():
    """ create the pickle data """

    import numpy as np
    import pandas
    from pandas import (Series, TimeSeries, DataFrame, Panel, SparseSeries,
                        SparseTimeSeries, SparseDataFrame, SparsePanel, Index,
                        MultiIndex, PeriodIndex, date_range, bdate_range,
                        Timestamp)
    nan = np.nan

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.],
    }

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10))
    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))
    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=TimeSeries(np.arange(10).astype(np.int64),
                                index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])))
    frame = dict(
        float=DataFrame(dict(A=series['float'], B=series['float'] + 1)),
        int=DataFrame(dict(A=series['int'], B=series['int'] + 1)),
        mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])),
        mi=DataFrame(dict(A=np.arange(5).astype(np.float64),
                          B=np.arange(5).astype(np.int64)),
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])))
    panel = dict(
        float=Panel(dict(ItemA=frame['float'], ItemB=frame['float'] + 1)))

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()))
示例#21
0
def test_period_index():
    # test 1285
    from pandas import PeriodIndex, TimeSeries
    dates = PeriodIndex(start="1/1/1990", periods=20, freq="M")
    x = np.arange(1, 21.)

    model = TimeSeriesModel(Series(x, index=dates))
    npt.assert_(model.data.freq == "M")
    model = TimeSeriesModel(TimeSeries(x, index=dates))
    npt.assert_(model.data.freq == "M")
示例#22
0
    def _divide(self, frame):
        """divided close price to adjclose column

        WARNING
        =======
        frame should be chronological ordered otherwise wrong backfill.
        """
        if self.cash_afterward == 0:
            return

        cashes = [self.cash_afterward, 0.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(self.d2t(adj_day))
        indexes.append(self.d2t(datetime.date.today()))

        cashes = TimeSeries(cashes, index=indexes)
        ri_cashes = cashes.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] - ri_cashes
示例#23
0
    def _divide(self, frame):
        """divided close price to adjclose column

        WARNING
        =======
        frame should be chronological ordered otherwise wrong backfill.
        """
        if self.cash_afterward == 0:
            return

        cashes = [self.cash_afterward, 0.0]
        adj_day = self.ex_date - datetime.timedelta(days=1)
        indexes = []
        indexes.append(self.d2t(adj_day))
        indexes.append(self.d2t(datetime.date.today()))
        
        cashes = TimeSeries(cashes, index=indexes)
        ri_cashes = cashes.reindex(frame.index, method='backfill')

        frame['adjclose'] = frame['adjclose'] - ri_cashes
示例#24
0
		tradeSummary = addDailyPNLChange(tradeSummary, 'DynamicDollarPNL', 'RealizedDollarPNL', 'DailyPNLChange')
		tradeSummary.DailyPNLChange[tradeSummary.Action == 'none'] = 0
		tradeSummaryList.append(tradeSummary)
		#cd "C:\Gary Yang\Dropbox\seasonal_Report\Test"
		#tradeSummary.to_csv('test.csv')
	################################################################################################

os.chdir("C:\\Users\\GYANG\\Google Drive\\Historical Data\\Sensonal_Fu_D")

#""" Generate a total Summary from all trades depends on # of Trades vs. PNL """
#totalSummary = mergeResultList(tradeSummaryList)
#totalSummary.to_csv('totalSummary.csv')
""" Generate time-series based data """
t_start = stringDate_toDatetime(str(backTestYearStart-1) + "0101")
t_end = datetime.now()
ts = TimeSeries(pd.date_range(t_start, t_end))
''' For plotting '''
t_code = getDateCode(ts) 
t_int = getDateInt(ts)
timeFrame_index = 'DateTime'
tradeSummaryList = set_timeFrameIndex(tradeSummaryList, timeFrame_index)

#''' get a certain tradeSummary to fit in timeFrame '''
#x = tradeEnd - tradeStart
#tf_tradeSummary = tradeSummary_fitTimeFrame(x, tradeSummaryList, ts, 'TradeID', 'Year', 'Symbol', 'StartTrading', 'EndTrading', 'Date', \
#	'Close', 'IsPeriod', 'Action', 'PosSize', 'PosDir', 'PosPrc', 'DynamicPNL', 'RealizedPNL', 'DynamicDollarPNL', 'RealizedDollarPNL', 'DailyPNLChange')	
#tf_tradeSummary.to_csv(str(x)+'.csv')

""" Generate Lists that fill the timeFrame """
	#startTradingList = []
	#endTradingList = []
示例#25
0
 def test_cant_fill_missing_dups(self):
     rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A')
     s = TimeSeries(np.random.randn(5), index=rng)
     self.assertRaises(Exception, s.resample, 'A')
示例#26
0
def _simple_pts(start, end, freq='D'):
    rng = period_range(start, end, freq=freq)
    return TimeSeries(np.random.randn(len(rng)), index=rng)
示例#27
0
def create_data():
    """ create the pickle data """

    from distutils.version import LooseVersion
    import numpy as np
    import pandas
    from pandas import (Series,TimeSeries,DataFrame,Panel,
                        SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel,
                        Index,MultiIndex,PeriodIndex,
                        date_range,period_range,bdate_range,Timestamp,Categorical)
    nan = np.nan

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E' : [0., 1, Timestamp('20100101'),'foo',2.],
        }

    index = dict(int = Index(np.arange(10)),
                 date = date_range('20130101',periods=10),
                 period = period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                                                      ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                                 names=['first', 'second']))
    series = dict(float = Series(data['A']),
                  int = Series(data['B']),
                  mixed = Series(data['E']),
                  ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)),
                  mi = Series(np.arange(5).astype(np.float64),index=MultiIndex.from_tuples(tuple(zip(*[[1,1,2,2,2],
                                                                                                    [3,4,3,4,5]])),
                                                                                           names=['one','two'])),
                  dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])))

    frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
                 int = DataFrame(dict(A = series['int']  , B = series['int']   + 1)),
                 mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])),
                 mi = DataFrame(dict(A = np.arange(5).astype(np.float64), B = np.arange(5).astype(np.int64)),
                                index=MultiIndex.from_tuples(tuple(zip(*[['bar','bar','baz','baz','baz'],
                                                                       ['one','two','one','two','three']])),
                                                             names=['first','second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))),
                 cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']),
                                              B=np.arange(3).astype(np.int64))),
    )
    panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)),
                 dup = Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64),
                             items=['A', 'B', 'A']))

    if LooseVersion(pandas.__version__) >= '0.14.1':
        # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and
        # panels if their columns/items were non-unique.
        mixed_dup_df = DataFrame(data)
        mixed_dup_df.columns = list("ABCDA")

        mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int']))
        mixed_dup_panel.items = ['ItemA', 'ItemA']

        frame['mixed_dup'] = mixed_dup_df
        panel['mixed_dup'] = mixed_dup_panel

    return dict( series = series,
                 frame = frame,
                 panel = panel,
                 index = index,
                 mi = mi,
                 sp_series = dict(float = _create_sp_series(),
                                  ts = _create_sp_tsseries()),
                 sp_frame = dict(float = _create_sp_frame())
                 )
示例#28
0
 def attach_dates(self, result):
     return TimeSeries(result, index=self.predict_dates)
示例#29
0
    def test_median(self):
        self._check_stat_op('median', np.median)

        # test with integers, test failure
        int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10))
        self.assertAlmostEqual(np.median(int_ts), int_ts.median())
示例#30
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'))
    if LooseVersion(pandas.__version__) >= '0.17.0':
        scalars['period'] = Period('2012', 'M')

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10))

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))
    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=TimeSeries(np.arange(10).astype(np.int64),
                                index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])))
    if LooseVersion(pandas.__version__) >= '0.17.0':
        series['period'] = Series([Period('2000Q1')] * 5)

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame(dict(A=series['float'], B=series['float'] + 1)),
        int=DataFrame(dict(A=series['int'], B=series['int'] + 1)),
        mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])),
        mi=DataFrame(dict(A=np.arange(5).astype(np.float64),
                          B=np.arange(5).astype(np.int64)),
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=['A', 'B', 'A']),
        cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))),
        cat_and_float=DataFrame(
            dict(A=Categorical(['foo', 'bar', 'baz']),
                 B=np.arange(3).astype(np.int64))),
        mixed_dup=mixed_dup_df)

    mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int']))
    mixed_dup_panel.items = ['ItemA', 'ItemA']
    panel = dict(float=Panel(
        dict(ItemA=frame['float'], ItemB=frame['float'] + 1)),
                 dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64),
                           items=['A', 'B', 'A']),
                 mixed_dup=mixed_dup_panel)

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()))