def test_hist_index_min_max(self) -> None: "Test min_out and max_out on HistogramIndex" s = self.scheduler() with s: random = RandomTable(2, rows=100000, scheduler=s) t_min = PsDict({"_1": 0.3}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": 0.8}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules( random, "result", min_value=min_value, max_value=max_value ) prt = Print(proc=self.terse, scheduler=s) prt.input[0] = range_qry.output.result hist_index = range_qry.hist_index assert hist_index is not None min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s) min_.input[0] = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input[0] = min_.output.result max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s) max_.input[0] = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input[0] = max_.output.result aio.run(s.start()) res1 = cast(float, random.table.min()["_1"]) res2 = cast(float, min_.psdict["_1"]) self.assertAlmostEqual(res1, res2) res1 = cast(float, random.table.max()["_1"]) res2 = cast(float, max_.psdict["_1"]) self.assertAlmostEqual(res1, res2)
def test_hist_index_min_max(self): "Test min_out and max_out on HistogramIndex" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table hist_index = range_qry.hist_index min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s) min_.input.table = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = min_.output.table max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s) max_.input.table = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = max_.output.table s.start() s.join() res1 = random.table().min()['_1'] res2 = min_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2) res1 = random.table().max()['_1'] res2 = max_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2)
def _impl_stirred_tst_percentiles_rq(self, accuracy: float, **kw: Any) -> None: """ """ s = self.scheduler() with s: random = RandomTable(2, rows=10000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result t_min = PsDict({"_1": 0.3}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": 0.8}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules(stirrer, "result", min_value=min_value, max_value=max_value) hist_index = range_qry.hist_index assert hist_index t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0}) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(accuracy=accuracy, scheduler=s) percentiles.input[0] = range_qry.output.result percentiles.input.percentiles = which_percentiles.output.result percentiles.input.hist = hist_index.output.result prt = Print(proc=self.terse, scheduler=s) prt.input[0] = percentiles.output.result aio.run(s.start()) pdict = notNone(percentiles.table.last()).to_dict() v = range_qry.table["_1"].values p25 = np.percentile(v, 25.0) # type: ignore p50 = np.percentile(v, 50.0) # type: ignore p75 = np.percentile(v, 75.0) # type: ignore print( "TSV=> accuracy: ", accuracy, " 25:", p25, pdict["_25"], " 50:", p50, pdict["_50"], " 75:", p75, pdict["_75"], ) self.assertAlmostEqual(p25, pdict["_25"], delta=0.01) self.assertAlmostEqual(p50, pdict["_50"], delta=0.01) self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
def _query_min_max_impl(self, random, t_min, t_max, s): min_value = Constant(table=t_min, scheduler=s) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = range_qry.output.min pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = range_qry.output.max return range_qry
def _query_min_max_impl( self, random: RandomTable, t_min: PsDict, t_max: PsDict, s: Scheduler ) -> RangeQuery: min_value = Constant(table=t_min, scheduler=s) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules( random, "result", min_value=min_value, max_value=max_value ) prt = Print(proc=self.terse, scheduler=s) prt.input[0] = range_qry.output.result prt2 = Print(proc=self.terse, scheduler=s) prt2.input[0] = range_qry.output.min pr3 = Print(proc=self.terse, scheduler=s) pr3.input[0] = range_qry.output.max return range_qry
def _impl_tst_percentiles_rq(self, accuracy): """ """ s = self.scheduler() random = RandomTable(2, rows=10000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) hist_index = range_qry.hist_index t_percentiles = Table( name=None, dshape='{_25: float64, _50: float64, _75: float64}', data={ '_25': [25.0], '_50': [50.0], '_75': [75.0] }) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s) percentiles.input.table = range_qry.output.table percentiles.input.percentiles = which_percentiles.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = percentiles.output.table s.start() s.join() pdict = percentiles.table().last().to_dict() v = range_qry.table()['_1'].values p25 = np.percentile(v, 25.0) p50 = np.percentile(v, 50.0) p75 = np.percentile(v, 75.0) print("TSV=> accuracy: ", accuracy, " 25:", p25, pdict['_25'], " 50:", p50, pdict['_50'], " 75:", p75, pdict['_75']) self.assertAlmostEqual(p25, pdict['_25'], delta=0.01) self.assertAlmostEqual(p50, pdict['_50'], delta=0.01) self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
def test_range_query(self): "Run tests of the RangeQuery module" s = self.scheduler() random = RandomTable(2, rows=1000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table s.start() s.join() idx = range_qry.input_module.output['table']\ .data().eval('(_1>0.3)&(_1<0.8)', result_object='index') self.assertEqual(range_qry.table().selection, bitmap(idx))
def _range_query_impl(self, lo, up) -> None: "Run tests of the RangeQuery module" s = self.scheduler() with s: random = RandomTable(2, rows=1000, scheduler=s) t_min = PsDict({"_1": lo}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": up}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules( random, "result", min_value=min_value, max_value=max_value ) prt = Print(proc=self.terse, scheduler=s) prt.input[0] = range_qry.output.result aio.run(s.start()) assert range_qry.input_module is not None idx = ( range_qry.input_module.output["result"] .data() .eval(f"(_1>{lo})&(_1<{up})", result_object="index") ) self.assertEqual(range_qry.table.index, bitmap(idx))