Exemplo n.º 1
0
    def test_create_cache_train_once(self):
        self.ctx = context.DataContext(store.MemoryStore('test', verbose=True),
                self.data, train_once=True)
        f = base.Normalize(base.F(10) + base.F('a'))
        ctx = self.ctx
        r = f.create(ctx)
        r = r[r.columns[0]]
        self.assertAlmostEqual(r.mean(), 0)
        self.assertAlmostEqual(r.std(), 1)

        # now add some new data
        idx = len(self.data) + 1000
        ctx.data = ctx.data.append(DataFrame([100, 200], columns=['a'], index=Index([idx, idx+1])))
        r = f.create(ctx)
        r = r[r.columns[0]]
        self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std())

        # drop all the other data ... should still use old prep data
        ctx.data = ctx.data.ix[[idx, idx+1]]
        r = f.create(ctx)
        r = r[r.columns[0]]
        self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std())

        # new train_index should NOT bust old cache
        ctx.data = ctx.data.ix[[idx, idx+1]]
        ctx.train_index = ctx.data.index
        ctx.prep_index = ctx.data.index
        r = f.create(ctx)
        r = r[r.columns[0]]
        self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std())
Exemplo n.º 2
0
 def setUp(self):
     self.data = make_data(10)
     self.ctx = context.DataContext(store.MemoryStore('test', verbose=True),
                                    self.data)
Exemplo n.º 3
0
 def setUp(self):
     self.data = make_data(10)
     self.data['groups'] = self.data['ints'].apply(lambda x: x > 5)
     self.ctx = context.DataContext(store.MemoryStore(verbose=True), self.data)