Пример #1
0
    def test_filtering(self) -> None:
        t = Table("table_filtering",
                  dshape="{a: int, b: float32}",
                  create=True)
        t.resize(20)
        ivalues = np.random.randint(100, size=20)
        t["a"] = ivalues
        fvalues: np.ndarray[Any, Any] = np.random.rand(20) * 100
        t["b"] = fvalues
        df = pd.DataFrame(t.to_dict())

        def small_fun(expr: str, r: Any) -> None:
            te = t.eval(expr, result_object=r)
            dfe = df.eval(expr)
            self.assertTrue(np.array_equal(te["a"].loc[:], df[dfe]["a"]))
            self.assertTrue(np.allclose(te["b"].loc[:], df[dfe]["b"]))

        def small_fun_ne(expr: str) -> None:
            r = "raw_numexpr"
            te = t.eval(expr, result_object=r)
            dfe: pd.PandasObject = df.eval(expr)
            self.assertTrue(np.array_equal(te, dfe.values))

        small_fun_ne("(a>10) & (a <80)")
        small_fun_ne("(b>10) & (b <80)")
        small_fun_ne("a>=b")
        small_fun("(a>10) & (a <80)", "table")
        small_fun("(b>10) & (b <80)", "table")
        small_fun("a>=b", "table")
        small_fun("(a>10) & (a <80)", "view")
Пример #2
0
    def test_filtering(self):
        t = Table('table_filtering',
                  dshape="{a: int, b: float32}",
                  create=True)
        t.resize(20)
        ivalues = np.random.randint(100, size=20)
        t['a'] = ivalues
        fvalues = np.random.rand(20) * 100
        t['b'] = fvalues
        df = pd.DataFrame(t.to_dict())

        def small_fun(expr, r):
            te = t.eval(expr, result_object=r)
            dfe = df.eval(expr)
            self.assertTrue(np.array_equal(te['a'], df[dfe]['a']))
            self.assertTrue(np.allclose(te['b'], df[dfe]['b']))

        def small_fun_ne(expr):
            r = 'raw_numexpr'
            te = t.eval(expr, result_object=r)
            dfe = df.eval(expr)
            self.assertTrue(np.array_equal(te, dfe.values))

        small_fun_ne('(a>10) & (a <80)')
        small_fun_ne('(b>10) & (b <80)')
        small_fun_ne('a>=b')
        small_fun('(a>10) & (a <80)', 'table')
        small_fun('(b>10) & (b <80)', 'table')
        small_fun('a>=b', 'table')
        small_fun('(a>10) & (a <80)', 'view')
 def test_combine_first_nan(self):
     s = self.scheduler()
     cst1 = Constant(Table(name='tcf_xmin_xmax_nan',
                           data=pd.DataFrame({
                               'xmin': [1],
                               'xmax': [2]
                           }),
                           create=True),
                     scheduler=s)
     cst2 = Constant(Table(name='tcf_ymin_ymax_nan',
                           data=pd.DataFrame({
                               'ymin': [np.nan],
                               'ymax': [np.nan]
                           }),
                           create=True),
                     scheduler=s)
     cst3 = Constant(Table(name='tcf_ymin_ymax2_nan',
                           data=pd.DataFrame({
                               'ymin': [3],
                               'ymax': [4]
                           }),
                           create=True),
                     scheduler=s)
     cf = CombineFirst(scheduler=s)
     cf.input.table = cst1.output.table
     cf.input.table = cst2.output.table
     cf.input.table = cst3.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = cf.output.table
     s.start()
     s.join()
     df = cf.table()
     last = df.last().to_dict()
     self.assertTrue(last['xmin']==1 and last['xmax']==2 and \
                     last['ymin']==3 and last['ymax']==4)
Пример #4
0
 def run_step(self, run_number, step_size, howlong):
     dfslot = self.get_input_slot('table')
     dfslot.update(run_number)
     if dfslot.updated.any() or dfslot.deleted.any():
         dfslot.reset()
         if self._table is not None:
             self._table.resize(0)
         dfslot.update(run_number)
     indices = dfslot.created.next(step_size)  # returns a slice
     steps = indices_len(indices)
     if steps == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     input_df = dfslot.data()
     data = pd.DataFrame(dict(counter=steps), index=[0])
     if self._table is None:
         self._table = Table(
             self.generate_table_name('counter'),
             data=data,
             #                                scheduler=self.scheduler(),
             create=True)
     elif len(self._table) == 0:  # has been resetted
         self._table.append(data)
     else:
         self._table['counter'].loc[0] += steps
     return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Пример #5
0
 def test_last_row_simple(self):
     s = self.scheduler()
     t1 = Table(name=get_random_name("cst1"),
                data={
                    'xmin': [1],
                    'xmax': [2]
                })
     t2 = Table(name=get_random_name("cst2"),
                data={
                    'ymin': [3],
                    'ymax': [4]
                })
     cst1 = Constant(t1, scheduler=s)
     cst2 = Constant(t2, scheduler=s)
     join = Join(scheduler=s)
     join.input.table = cst1.output.table
     join.input.table = cst2.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     s.start()
     s.join()
     #res = join.trace_stats(max_runs=1)
     #pd.set_option('display.expand_frame_repr', False)
     #print(res)
     df = join.table()
     last = df.last()
     self.assertTrue(last['xmin']==1 and last['xmax']==2 and \
                     last['ymin']==3 and last['ymax']==4)
Пример #6
0
 def test_merge_simple(self) -> None:
     s = self.scheduler()
     cst1 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               "xmin": [1],
                               "xmax": [2]
                           })),
                     scheduler=s)
     cst2 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               "ymin": [3],
                               "ymax": [4]
                           })),
                     scheduler=s)
     merge = Merge(left_index=True, right_index=True, scheduler=s)
     merge.input[0] = cst1.output.result
     merge.input[0] = cst2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = merge.output.result
     aio.run(s.start())
     _ = merge.trace_stats(max_runs=1)
     # pd.set_option('display.expand_frame_repr', False)
     # print(res)
     df = merge.table
     last = df.loc[df.index[-1]]
     assert last is not None
     self.assertTrue(last["xmin"] == 1 and last["xmax"] == 2
                     and last["ymin"] == 3 and last["ymax"] == 4)
Пример #7
0
 def test_paging_helper_t(self) -> None:
     t = Table("table_for_paging",
               dshape="{a: int, b: float32}",
               create=True)
     t.resize(200)
     _ = np.arange(200)
     ivalues = np.random.randint(100, size=200)
     t["a"] = ivalues
     fvalues = np.array(np.random.rand(200), np.float32)
     t["b"] = fvalues
     # import pdb; pdb.set_trace()
     ph_t = PagingHelper(t)
     page = ph_t.get_page(0, 10)
     self.assertEqual(page[0][0], 0)
     self.assertEqual(page[-1][0], 9)
     del t.loc[5]
     ph_t = PagingHelper(t)
     page = ph_t.get_page(0, 10)
     self.assertEqual(page[0][0], 0)
     self.assertEqual(page[-1][0], 10)
     sel = bitmap(range(10, 75, 2))
     print(sel)
     view = t.loc[sel, :]
     self.assertTrue(view is not None)
     assert view is not None
     ph_t = PagingHelper(view)
     page = ph_t.get_page(10, 20)
     self.assertEqual(page[0][0], 30)
     self.assertEqual(page[-1][0], 48)
     print(page)
Пример #8
0
 def test_join_simple(self):
     s = self.scheduler()
     cst1 = Constant(Table(name='test_join_simple_cst1',
                           data=pd.DataFrame({
                               'xmin': [1],
                               'xmax': [2]
                           }),
                           create=True),
                     scheduler=s)
     cst2 = Constant(Table(name='test_join_simple_cst2',
                           data=pd.DataFrame({
                               'ymin': [3],
                               'ymax': [4]
                           }),
                           create=True),
                     scheduler=s)
     reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     reduce_.input.table = cst1.output.table
     reduce_.input.table = cst2.output.table
     join = reduce_.expand()
     # join = BinJoin(scheduler=s)
     # join.input.first = cst1.output.table
     # join.input.second = cst2.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     s.start()
     s.join()
     res = join.trace_stats(max_runs=1)
     print(res)
     df = join.table()
     last = df.loc[df.index[-1]]
     self.assertTrue(last['xmin'] == 1 and last['xmax'] == 2 and \
                     last['ymin'] == 3 and last['ymax'] == 4)
Пример #9
0
 def test_intersection(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(random, "result")
     bisect_min = Bisect(column="_1",
                         op=">",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_min.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_min.input.limit = min_value.output.result
     bisect_max = Bisect(column="_1",
                         op="<",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_max.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_max.input.limit = max_value.output.result
     inter = Intersection(scheduler=s)
     inter.input[0] = bisect_min.output.result
     inter.input[0] = bisect_max.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = inter.output.result
     aio.run(s.start())
     assert hist_index.input_module is not None
     idx = (hist_index.input_module.output["result"].data().eval(
         "(_1>0.3)&(_1<0.8)", result_object="index"))
     self.assertEqual(inter.table.index, bitmap(idx))
Пример #10
0
def _2_csv_2_const_scenario(module: Module,
                            s: Scheduler) -> Callable[[Scheduler, int], None]:
    csv_a = CSVLoader(get_dataset("smallfile"),
                      index_col=False,
                      header=None,
                      scheduler=s)
    csv_b = CSVLoader(get_dataset("smallfile"),
                      index_col=False,
                      header=None,
                      scheduler=s)
    table_c = Table("const_c_2_csv_2_const_scenario",
                    dshape="{a: int}",
                    create=True)
    const_c = Constant(table=table_c, scheduler=s)
    table_d = Table("const_d_2_csv_2_const_scenario",
                    dshape="{a: int}",
                    create=True)
    const_d = Constant(table=table_d, scheduler=s)
    module.input.a = csv_a.output.result
    module.input.b = csv_b.output.result
    module.input.c = const_c.output.result
    module.input.d = const_d.output.result

    def _fun(s: Scheduler, r: int) -> None:
        if r > 10:
            s.task_stop()

    return _fun
Пример #11
0
    def create_table(self) -> None:
        t = Table(
            "table",
            storagegroup=self.storagegroup,
            dshape="{a: int, b: float32, c: string, d: 10*int}",
            create=True,
        )
        self.assertTrue(t is not None)
        self.assertEqual(t.ncol, 4)
        col1 = t["a"]
        col2 = t[0]
        self.assertTrue(col1 is col2)

        t = Table(
            "table",
            storagegroup=self.storagegroup,
            dshape="{a: int, b: float32, c: string, d: 10*int}",
        )
        self.assertTrue(t is not None)

        t = Table("table", storagegroup=self.storagegroup)
        self.assertEqual(
            t.dshape,
            ds.dshape("{a: int, b: float32, c: string, d: 10 * int}"))

        t2 = Table(
            "bar_table",
            dshape="{a: int64, b: float64}",
            fillvalues={"a": -1},
            create=True,
        )
        self.assertEqual(t2.dshape, ds.dshape("{a: int64, b: float64}"))
        self.assertEqual(t2[0].fillvalue, -1)
Пример #12
0
 def test_merge_simple(self):
     s = self.scheduler()
     cst1 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               'xmin': [1],
                               'xmax': [2]
                           })),
                     scheduler=s)
     cst2 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               'ymin': [3],
                               'ymax': [4]
                           })),
                     scheduler=s)
     merge = Merge(left_index=True, right_index=True, scheduler=s)
     merge.input.table = cst1.output.table
     merge.input.table = cst2.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = merge.output.table
     s.start()
     s.join()
     res = merge.trace_stats(max_runs=1)
     #pd.set_option('display.expand_frame_repr', False)
     #print(res)
     df = merge.table()
     last = df.loc[df.index[-1]]
     self.assertTrue(last['xmin']==1 and last['xmax']==2 and \
                     last['ymin']==3 and last['ymax']==4)
Пример #13
0
 def test_hist_index_min_max(self):
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     hist_index = range_qry.hist_index
     min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s)
     min_.input.table = hist_index.output.min_out
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = min_.output.table
     max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s)
     max_.input.table = hist_index.output.max_out
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().min()['_1']
     res2 = min_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
     res1 = random.table().max()['_1']
     res2 = max_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
Пример #14
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            if self._table is not None:
                self._table.resize(0)
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True)
        if self._table is None:
            self._table = Table(
                self.generate_table_name('max'),
                data=op,
                #                                scheduler=self.scheduler(),
                create=True)
        elif len(self._table) == 0:  # has been resetted
            self._table.append(op)
        else:
            last = self._table.last()
            for colname in last:
                current_max = op[colname]
                current_max[0] = np.maximum(current_max, last[colname])
            self._table.append(op)

        #TODO manage the history in a more efficient way
        #if len(self._table) > self.params.history:
        #    self._table = self._table.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Пример #15
0
    def test_intersection(self):
        s = self.scheduler()
        random = RandomTable(2, rows=100000, scheduler=s)
        t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
        min_value = Constant(table=t_min, scheduler=s)
        t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
        max_value = Constant(table=t_max, scheduler=s)
        hist_index = HistogramIndex(column='_1', scheduler=s)
        hist_index.create_dependent_modules(random, 'table')
        bisect_min = Bisect(column='_1',
                            op='>',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_min.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_min.input.limit = min_value.output.table

        bisect_max = Bisect(column='_1',
                            op='<',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_max.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_max.input.limit = max_value.output.table
        inter = Intersection(scheduler=s)
        inter.input.table = bisect_min.output.table
        inter.input.table = bisect_max.output.table
        pr = Print(proc=self.terse, scheduler=s)
        pr.input.df = inter.output.table
        s.start()
        s.join()
        idx = hist_index.input_module.output['table']\
          .data().eval('(_1>0.3)&(_1<0.8)', result_object='index')
        self.assertEqual(inter.table().selection, bitmap(idx))
Пример #16
0
 def test_mmap5(self):
     #pylint: disable=protected-access
     self._rmtree()
     t = Table('table_mmap_5', dshape='{anint: int, atext: string}')
     for i in range(100):
         t.add(dict(anint=i, atext="abc"))
         t.add(dict(anint=i, atext="xyz"))
     nb_str = len(set(t._column("atext").storagegroup["atext"].view))
     self.assertEqual(nb_str, 2)
Пример #17
0
 def test_mmap6(self):
     #pylint: disable=protected-access
     long_text = "a"*LONG_SIZE
     self._rmtree()
     t = Table('table_mmap_6', dshape='{anint: int, atext: string}')
     for i in range(100):
         t.add(dict(anint=i, atext=long_text))
     nb_str = len(set(t._column("atext").storagegroup["atext"].view))
     self.assertEqual(nb_str, 100)
Пример #18
0
 def test_join_simple(self) -> None:
     s = self.scheduler()
     cst1 = Constant(
         Table(
             name="test_join_simple_cst1",
             data=pd.DataFrame({"xmin": [1], "xmax": [2]}),
             create=True,
         ),
         scheduler=s,
     )
     cst2 = Constant(
         Table(
             name="test_join_simple_cst2",
             data=pd.DataFrame({"ymin": [3], "ymax": [4]}),
             create=True,
         ),
         scheduler=s,
     )
     cst3 = Constant(
         Table(
             name="test_join_simple_cst3",
             data=pd.DataFrame({"zmin": [5], "zmax": [6]}),
             create=True,
         ),
         scheduler=s,
     )
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = cst1.output.result
     # reduce_.input[0] = cst2.output.result
     # reduce_.input[0] = cst3.output.result
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "result",
         [cst1.output.result, cst2.output.result, cst3.output.result],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
     df = join.table
     last = df.loc[df.index[-1]]
     assert last is not None
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
         and last["zmin"] == 5
         and last["zmax"] == 6
     )
Пример #19
0
class Var(TableModule):
    """
    Compute the variance of the columns of an input dataframe.
    """
    parameters = [('history', np.dtype(int), 3)]

    def __init__(self, columns=None, **kwds):
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Var, self).__init__(dataframe_slot='table', **kwds)
        self._columns = columns
        self._data = {}
        self.default_step_size = 1000

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(Var, self).is_ready()

    def op(self, chunk):
        cols = chunk.columns
        ret = {}
        for c in cols:
            data = self._data.get(c)
            if data is None:
                data = OnlineVariance()
                self._data[c] = data
            data.add(chunk[c])
            ret[c] = data.variance
        return ret

    @synchronized
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():        
            dfslot.reset()
            self._table = None
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.op(self.filter_columns(input_df,fix_loc(indices)))
        if self._table is None:
            self._table = Table(self.generate_table_name('var'), dshape=input_df.dshape,
#                                scheduler=self.scheduler(),
                                create=True)
        self._table.append(op, indices=[run_number])
        print(self._table)

        if len(self._table) > self.params.history:
            self._table = self._table.loc[self._table.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Пример #20
0
    def test_to_array(self) -> None:
        t = Table("table_to_array",
                  dshape="{a: int, b: float32, c: real}",
                  create=True)
        t.resize(10)
        ivalues = np.random.randint(100, size=10)
        t["a"] = ivalues
        fvalues = np.random.rand(10)
        t["b"] = fvalues
        dvalues = np.random.rand(10)
        t["c"] = dvalues
        a = t["a"]
        b = t["b"]
        c = t["c"]
        arr = t.to_array()
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.shape[0], t.nrow)
        self.assertEqual(arr.shape[1], t.ncol)
        self.assertTrue(np.allclose(a[:], arr[:, 0]))
        self.assertTrue(np.allclose(b[:], arr[:, 1]))
        self.assertTrue(np.allclose(c[:], arr[:, 2]))

        # Columns
        arr = t.to_array(columns=["a", "b"])
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.shape[0], t.nrow)
        self.assertEqual(arr.shape[1], 2)
        self.assertTrue(np.allclose(a[:], arr[:, 0]))
        self.assertTrue(np.allclose(b[:], arr[:, 1]))

        # Keys
        key1 = slice(2, 7)
        arr = t.to_array(key1)
        key = t.id_to_index(
            key1).to_slice_maybe()  # slices contain their bounds
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.shape[0], key.stop - key.start)
        self.assertEqual(arr.shape[1], 3)
        self.assertTrue(np.allclose(a[key], arr[:, 0]))
        self.assertTrue(np.allclose(b[key], arr[:, 1]))
        self.assertTrue(np.allclose(c[key], arr[:, 2]))

        # Keys with fancy indexing
        key2 = [2, 4, 6, 8]
        arr = t.to_array(key2)
        indices = t.id_to_index(key2)  # slices contain their bounds
        self.assertEqual(arr.dtype, np.float64)
        self.assertEqual(arr.shape[0], len(indices))
        self.assertEqual(arr.shape[1], 3)
        self.assertTrue(np.allclose(a[indices], arr[:, 0]))
        self.assertTrue(np.allclose(b[indices], arr[:, 1]))
        self.assertTrue(np.allclose(c[indices], arr[:, 2]))
Пример #21
0
def _create_table(tname: str, columns: Parameters) -> Table:
    dshape = ""
    data = {}
    for (name, dtype, val) in columns:
        if dshape:
            dshape += ","
        dshape += "%s: %s" % (name, dshape_from_dtype(dtype))
        data[name] = val
    dshape = "{" + dshape + "}"
    assert Group.default_internal
    table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname))
    table.add(data)
    return table
Пример #22
0
 def test_range_query_min_max(self):
     "Test min and max on RangeQuery output"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     range_qry = self._query_min_max_impl(random, t_min, t_max, s)
     s.start()
     s.join()
     min_data = range_qry.output.min.data()
     max_data = range_qry.output.max.data()
     self.assertAlmostEqual(min_data['_1'].loc[0], 0.3)
     self.assertAlmostEqual(max_data['_1'].loc[0], 0.8)
Пример #23
0
 def run_step(self, run_number, step_size, howlong):
     if self._table is None:
         slot = self.get_input_slot('like')
         if slot is not None:
             like = slot.data()
             if like is not None:
                 with slot.lock:
                     self._table = Table(self.generate_table_name('like'),
                                         dshape=like.dshape,
                                         create=True)
                     self._table.append(like.last().to_dict(ordered=True),
                                        indices=[0])
     return self._return_run_step(self.state_blocked, steps_run=1)
Пример #24
0
 def test_last(self) -> None:
     t = Table("table_last", dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100, size=10)
     t["a"] = ivalues
     fvalues = np.random.rand(10)
     t["b"] = fvalues
     last_ = list(notNone(t.last()).values())
     self.assertEqual(last_, [t._column(0)[-1], t._column(1)[-1]])
     last_a = t.last("a")
     self.assertEqual(last_a, t._column(0)[-1])
     last_a_b = t.last(["a", "b"])
     self.assertEqual(list(last_a_b), last_)
Пример #25
0
 def test_last(self):
     t = Table('table_last', dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100,size=10)
     t['a'] = ivalues
     fvalues = np.random.rand(10)
     t['b'] = fvalues
     last_ = list(t.last().values())
     self.assertEqual(last_, [t._column(0)[-1],t._column(1)[-1]])
     last_a = t.last('a')
     self.assertEqual(last_a, t._column(0)[-1])
     last_a_b = t.last(['a','b'])
     self.assertEqual(list(last_a_b),last_)
Пример #26
0
def _create_table(tname, columns):
    dshape = ""
    data = {}
    for (name, dtype, val) in columns:
        if dshape:
            dshape += ','
        dshape += '%s: %s' % (name, dshape_from_dtype(dtype))
        data[name] = val
    dshape = '{' + dshape + '}'
    table = Table(tname,
                  dshape=dshape,
                  storagegroup=Group.default_internal(tname))
    table.add(data)
    return table
Пример #27
0
class Max(TableModule):
    parameters = [('history', np.dtype(int), 3)]

    def __init__(self, columns=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Max, self).__init__(**kwds)
        self._columns = columns
        self.default_step_size = 10000

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(Max, self).is_ready()

    @synchronized
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            if self._table is not None:
                self._table.resize(0)
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True)
        if self._table is None:
            self._table = Table(
                self.generate_table_name('max'),
                data=op,
                #                                scheduler=self.scheduler(),
                create=True)
        elif len(self._table) == 0:  # has been resetted
            self._table.append(op)
        else:
            last = self._table.last()
            for colname in last:
                current_max = op[colname]
                current_max[0] = np.maximum(current_max, last[colname])
            self._table.append(op)

        #TODO manage the history in a more efficient way
        #if len(self._table) > self.params.history:
        #    self._table = self._table.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Пример #28
0
 def test_mmap3(self):
     #pylint: disable=protected-access
     #self.scheduler._run_number = 1
     self._rmtree()
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3], 'c': ['a', 'b', 'cd']})
     t = Table('table_2', data=df)
     self.assertEqual(len(t),len(df))
     for colname in df:
         coldf = df[colname]
         colt = t[colname]
         self.assertEqual(len(coldf), len(colt))
         self.assertTrue(np.all(coldf.values==colt.values))
     t.append(df)
     self.assertEqual(len(t),2*len(df))
     self._rmtree()
Пример #29
0
 def _delete_table(self, t: Table) -> None:
     self.assertEqual(t.index_to_id(2), 2)
     a = t["a"]
     self.assertEqual(a[2], a.fillvalue)
     del t.loc[2]
     with self.assertRaises(KeyError):
         c = t.loc[2]
         print(c)
     self.assertEqual(len(t), a.size - 1)
     cnt = 0
     for row in t.iterrows():
         assert row is not None
         self.assertTrue("a" in row)
         cnt += 1
     self.assertEqual(len(t), cnt)
Пример #30
0
 def test_merge1(self):
     table_left = Table(name='table_left', data=df_left1, create=True)
     print(repr(table_left))
     table_right = Table(name='table_right',
                         data=df_right1,
                         create=True,
                         indices=df_right1.index.values)
     print(repr(table_right))
     #table_right2 = Table(name='table_right2', data=df_right2, create=True)
     table_merge = merge(table_left,
                         table_right,
                         name='table_merge',
                         left_index=True,
                         right_index=True)
     print(repr(table_merge))