Пример #1
0
    def NOtest_vec_distances(self):
        s= self.scheduler()
        vec=VECLoader(get_dataset('warlogs'),scheduler=s)
#        dis=PairwiseDistances(metric='cosine',scheduler=s)
#        dis.input.df = vec.output.df
#        dis.input.array = vec.output.array
        cnt = Every(proc=self.terse,constant_time=True,scheduler=s)
#        cnt.input.df = dis.output.dist
        cnt.input.df = vec.output.table
        global times
        times = 0
        s.start()
        table = vec.table()
Пример #2
0
 def NOtest_vec_distances(self):
     s=Scheduler()
     vec=VECLoader(get_dataset('warlogs'),scheduler=s)
     dis=PairwiseDistances(metric='cosine',scheduler=s)
     dis.input.df = vec.output.df
     dis.input.array = vec.output.array
     cnt = Every(proc=print_len,constant_time=True,scheduler=s)
     cnt.input.df = dis.output.dist
     global times
     times = 0
     s.start()
     df = vec.df()
     computed = dis.dist()
     self.assertEquals(computed.shape[0], len(df))
     truth = pairwise_distances(vec.toarray(), metric=dis._metric)
     self.assertTrue(np.allclose(truth, computed))
Пример #3
0
 def NOtest_vec_distances(self) -> None:
     s = self.scheduler()
     vec = VECLoader(get_dataset("warlogs"), scheduler=s)
     #        dis=PairwiseDistances(metric='cosine',scheduler=s)
     #        dis.input[0] = vec.output.df
     #        dis.input.array = vec.output.array
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     #        cnt.input[0] = dis.output.dist
     cnt.input[0] = vec.output.result
     global times
     times = 0
     s.start()
     _ = vec.result
Пример #4
0
    def test_read_vec(self) -> None:
        module = VECLoader(get_dataset("warlogs"), name="test_read_vec")
        # self.assertTrue(module.table() is None)
        module.run(0)
        _ = module.trace_stats(max_runs=1)
        df = module.table
        self.assertFalse(df is None)
        _ = len(df)
        # self.assertEqual(l, len(df[df[UPDATE_COLUMN]==module.last_update()]))
        cnt = 1

        while not module.is_zombie():
            module.run(cnt)
            cnt += 1
            _ = module.trace_stats(max_runs=1)
            df = module.table
            _ = len(df)
            # print ("Run time: %gs, loaded %d rows" % (s['duration'][len(s)-1], ln))
            # self.assertEqual(ln-l, len(df[df[UPDATE_COLUMN]==module.last_update()]))
            # l = ln
        _ = module.trace_stats(max_runs=1)
        _ = len(module.table)
Пример #5
0
 def test_read_vec(self):
     module=VECLoader(get_dataset('warlogs'),
                      id='test_read_vec')
     self.assertTrue(module.df() is None)
     module.run(0)
     s = module.trace_stats(max_runs=1)
     df = module.df()
     self.assertFalse(df is None)
     l = len(df)
     self.assertEqual(l, len(df[df[module.UPDATE_COLUMN]==module.last_update()]))
     cnt = 1
     
     while not module.is_zombie():
         module.run(cnt)
         cnt += 1
         s = module.trace_stats(max_runs=1)
         df = module.df()
         ln = len(df)
         print "Run time: %gs, loaded %d rows" % (s['duration'].irow(-1), ln)
         self.assertEqual(ln-l, len(df[df[module.UPDATE_COLUMN]==module.last_update()]))
         l =  ln
     s = module.trace_stats(max_runs=1)
     print "Done. Run time: %gs, loaded %d rows" % (s['duration'].irow(-1), len(module.df()))
     df2 = module.df().groupby([Module.UPDATE_COLUMN])
     self.assertEqual(cnt, len(df2))