def build_heatmap(self, inp: Slot, domain: Any, plan: int) -> Optional[JSon]: inp_table = inp.data() if inp_table is None: return None assert isinstance(inp_table, BaseTable) if len(inp_table) == 0: return None row = notNone(inp_table.last()).to_dict() json_: JSon = {} if not (np.isnan(row["xmin"]) or np.isnan(row["xmax"]) or np.isnan(row["ymin"]) or np.isnan(row["ymax"])): data = row["array"] json_["bounds"] = (row["xmin"], row["ymin"], row["xmax"], row["ymax"]) if self._ipydata: assert isinstance(plan, int) json_["binnedPixels"] = plan self.hist_tensor[:, :, plan] = row["array"] # type: ignore else: data = np.copy(row["array"]) # type: ignore json_["binnedPixels"] = data json_["range"] = [np.min(data), np.max(data)] # type: ignore json_["count"] = np.sum(data) json_["value"] = domain return json_ return None
def t_histogram1d_impl(self, **kw: Any) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = csv.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram1d = Histogram1D("_2", scheduler=s) # columns are called 1..30 histogram1d.input[0] = stirrer.output.result histogram1d.input.min = min_.output.result histogram1d.input.max = max_.output.result # pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) pr.input[0] = histogram1d.output.result aio.run(s.start()) _ = histogram1d.trace_stats() last = notNone(histogram1d.table.last()).to_dict() h1 = last["array"] bounds = (last["min"], last["max"]) tab = stirrer.table.loc[:, ["_2"]] assert tab is not None v = tab.to_array().reshape(-1) h2, _ = np.histogram( # type: ignore v, bins=histogram1d.params.bins, density=False, range=bounds ) self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.tolist(), h2.tolist())
def test_histogram1d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram1d = Histogram1D("_2", scheduler=s) # columns are called 1..30 histogram1d.input[0] = csv.output.result histogram1d.input.min = min_.output.result histogram1d.input.max = max_.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = histogram1d.output.result aio.run(s.start()) _ = histogram1d.trace_stats() last = notNone(histogram1d.table.last()).to_dict() h1 = last["array"] bounds = (last["min"], last["max"]) df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[2] # type: ignore ) v = df.to_numpy().reshape(-1) h2, _ = np.histogram( # type: ignore v, bins=histogram1d.params.bins, density=False, range=bounds ) self.assertListEqual(h1.tolist(), h2.tolist())
def test_idxmax2(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) stirrer = Stirrer(update_column="_1", delete_rows=5, fixed_step_size=100, scheduler=s) stirrer.input[0] = random.output.result idxmax = IdxMax(scheduler=s) idxmax.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = idxmax.output.result pr2 = Print(proc=self.terse, scheduler=s) pr2.input[0] = max_.output.result aio.run(s.start()) # import pdb;pdb.set_trace() max1 = max_.psdict # print('max1', max1) max = idxmax.max() assert max is not None max2 = notNone(max.last()).to_dict() # print('max2', max2) self.compare(max1, max2)
def test_paste(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_1 = Min(name="min_1" + str(hash(random)), scheduler=s, columns=["_1"]) min_1.input[0] = random.output.result d2t_1 = Dict2Table(scheduler=s) d2t_1.input.dict_ = min_1.output.result min_2 = Min(name="min_2" + str(hash(random)), scheduler=s, columns=["_2"]) min_2.input[0] = random.output.result d2t_2 = Dict2Table(scheduler=s) d2t_2.input.dict_ = min_2.output.result bj = Paste(scheduler=s) bj.input.first = d2t_1.output.result bj.input.second = d2t_2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = bj.output.result aio.run(s.start()) res1 = random.table.min() res2 = notNone(bj.table.last()).to_dict() self.assertAlmostEqual(res1["_1"], res2["_1"]) self.assertAlmostEqual(res1["_2"], res2["_2"])
def t_histogram2d_impl(self, **kw: Any) -> None: s = self.scheduler() random = RandomTable(3, rows=100000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram2d = Histogram2D( 0, 1, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = stirrer.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(s.start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] t = stirrer.table.loc[:, ["_1", "_2"]] assert t is not None v = t.to_array() bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
def test_histogram2d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[1, 2] # type: ignore ) v = df.to_numpy() # .reshape(-1, 2) bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertTrue(np.allclose(h1, h2))
def _impl_stirred_tst_percentiles_rq(self, accuracy: float, **kw: Any) -> None: """ """ s = self.scheduler() with s: random = RandomTable(2, rows=10000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result t_min = PsDict({"_1": 0.3}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": 0.8}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules(stirrer, "result", min_value=min_value, max_value=max_value) hist_index = range_qry.hist_index assert hist_index t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0}) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(accuracy=accuracy, scheduler=s) percentiles.input[0] = range_qry.output.result percentiles.input.percentiles = which_percentiles.output.result percentiles.input.hist = hist_index.output.result prt = Print(proc=self.terse, scheduler=s) prt.input[0] = percentiles.output.result aio.run(s.start()) pdict = notNone(percentiles.table.last()).to_dict() v = range_qry.table["_1"].values p25 = np.percentile(v, 25.0) # type: ignore p50 = np.percentile(v, 50.0) # type: ignore p75 = np.percentile(v, 75.0) # type: ignore print( "TSV=> accuracy: ", accuracy, " 25:", p25, pdict["_25"], " 50:", p50, pdict["_50"], " 75:", p75, pdict["_75"], ) self.assertAlmostEqual(p25, pdict["_25"], delta=0.01) self.assertAlmostEqual(p50, pdict["_50"], delta=0.01) self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
def test_last(self) -> None: t = Table("table_last", dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100, size=10) t["a"] = ivalues fvalues = np.random.rand(10) t["b"] = fvalues last_ = list(notNone(t.last()).values()) self.assertEqual(last_, [t._column(0)[-1], t._column(1)[-1]]) last_a = t.last("a") self.assertEqual(last_a, t._column(0)[-1]) last_a_b = t.last(["a", "b"]) self.assertEqual(list(last_a_b), last_)
def test_last_row(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("smallfile"), index_col=False, header=None, scheduler=s ) lr1 = LastRow(scheduler=s) lr1.input[0] = csv.output.result prlen = Every(proc=self.terse, constant_time=True, scheduler=s) prlen.input[0] = lr1.output.result aio.run(s.start()) df = csv.table res = lr1.table assert res is not None self.assertEqual(res.at[0, "_1"], notNone(df.last())["_1"])
def _impl_stirred_tst_percentiles(self, accuracy: float, **kw: Any) -> None: """ """ s = self.scheduler() with s: random = RandomTable(2, rows=10000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.input[0] = stirrer.output.result t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0}) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(accuracy=accuracy, scheduler=s) percentiles.input[0] = stirrer.output.result percentiles.input.percentiles = which_percentiles.output.result percentiles.input.hist = hist_index.output.result prt = Print(proc=self.terse, scheduler=s) prt.input[0] = percentiles.output.result aio.run(s.start()) pdict = notNone(percentiles.table.last()).to_dict() # v = random.table()['_1'].values # from nose.tools import set_trace; set_trace() v = stirrer.table.to_array(columns=["_1"]).reshape(-1) p25 = np.percentile(v, 25.0) # type: ignore p50 = np.percentile(v, 50.0) # type: ignore p75 = np.percentile(v, 75.0) # type: ignore print( "Table=> accuracy: ", accuracy, " 25:", p25, pdict["_25"], " 50:", p50, pdict["_50"], " 75:", p75, pdict["_75"], ) # from nose.tools import set_trace; set_trace() self.assertAlmostEqual(p25, pdict["_25"], delta=0.01) self.assertAlmostEqual(p50, pdict["_50"], delta=0.01) self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
def test_var_h(self) -> None: s = self.scheduler() random = RandomTable(1, rows=1000, scheduler=s) var = VarH(scheduler=s) var.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = var.output.result aio.run(s.start()) table = random.table assert table is not None res1 = np.array([float(e) for e in table.var(ddof=1).values()]) res2 = np.array([ float(e) for e in notNone(var.table.last()).to_dict(ordered=True).values() ]) print("res1:", res1) print("res2:", res2) self.assertTrue(np.allclose(res1, res2))
def get_image(self, run_number: Optional[int] = None) -> Optional[str]: filename: Optional[str] table = self.table if table is None or len(table) == 0: return None last = notNone(table.last()) # assert last is not None # len(table) > 0 so last is not None if run_number is None or run_number >= last["time"]: run_number = last["time"] filename = last["filename"] else: time = table["time"] idx = np.where(time == run_number)[0] assert last is not None if len(idx) == 0: filename = last["filename"] else: filename = table["filename"][idx[0]] return filename
def test_idxmax(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) idxmax = IdxMax(scheduler=s) idxmax.input[0] = random.output.result max_ = Max(scheduler=s) max_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = idxmax.output.result pr2 = Print(proc=self.terse, scheduler=s) pr2.input[0] = max_.output.result aio.run(s.start()) max1 = max_.psdict # print('max1', max1) max = idxmax.max() assert max is not None max2 = notNone(max.last()).to_dict() # print('max2', max2) self.compare(max1, max2)
def test_idxmin(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) idxmin = IdxMin(scheduler=s) idxmin.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = idxmin.output.result pr2 = Print(proc=self.terse, scheduler=s) pr2.input[0] = min_.output.result aio.run(s.start()) min1 = min_.psdict # print('min1', min1) min = idxmin.min() assert min is not None min2 = notNone(min.last()).to_dict() # print('min2', min2) self.compare(min1, min2)
def heatmap_to_json(self, json: JSon, short: bool) -> JSon: dfslot = self.get_input_slot("array") assert isinstance(dfslot.output_module, Histogram2D) histo: Histogram2D = dfslot.output_module json["columns"] = [histo.x_column, histo.y_column] histo_df = dfslot.data() if histo_df is not None and len(histo_df) != 0: row = histo_df.last() if not (np.isnan(row["xmin"]) or np.isnan(row["xmax"]) or np.isnan(row["ymin"]) or np.isnan(row["ymax"])): json["bounds"] = { "xmin": row["xmin"], "ymin": row["ymin"], "xmax": row["xmax"], "ymax": row["ymax"], } df = self.table if df is not None and self._last_update != 0: json["image"] = notNone(df.last())["filename"] return json
def test_stats(self) -> None: s = self.scheduler() csv_module = CSVLoader(get_dataset("smallfile"), index_col=False, header=None, scheduler=s) stats = Stats("_1", name="test_stats", scheduler=s) wait = Wait(name="wait", delay=3, scheduler=s) wait.input.inp = csv_module.output.result stats.input._params = wait.output.out stats.input[0] = csv_module.output.result pr = Print(proc=self.terse, name="print", scheduler=s) pr.input[0] = stats.output.result aio.run(s.start()) table = csv_module.table stable = stats.table last = notNone(stable.last()) tmin = table["_1"].min() self.assertTrue(np.isclose(tmin, last["__1_min"])) tmax = table["_1"].max() self.assertTrue(np.isclose(tmax, last["__1_max"]))
def test_combine_first_nan(self) -> None: s = self.scheduler(True) cst1 = Constant( Table( name="tcf_xmin_xmax_nan", data=pd.DataFrame({"xmin": [1], "xmax": [2]}), create=True, ), scheduler=s, ) cst2 = Constant( Table( name="tcf_ymin_ymax_nan", data=pd.DataFrame({"ymin": [np.nan], "ymax": [np.nan]}), create=True, ), scheduler=s, ) cst3 = Constant( Table( name="tcf_ymin_ymax2_nan", data=pd.DataFrame({"ymin": [3], "ymax": [4]}), create=True, ), scheduler=s, ) cf = CombineFirst(scheduler=s) cf.input[0] = cst1.output.result cf.input[0] = cst2.output.result cf.input[0] = cst3.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = cf.output.result aio.run(s.start()) last = notNone(cf.table.last()).to_dict() self.assertTrue( last["xmin"] == 1 and last["xmax"] == 2 and last["ymin"] == 3 and last["ymax"] == 4 )
def test_last_row_simple(self) -> None: s = self.scheduler() t1 = Table(name=get_random_name("cst1"), data={"xmin": [1], "xmax": [2]}) t2 = Table(name=get_random_name("cst2"), data={"ymin": [3], "ymax": [4]}) cst1 = Constant(t1, scheduler=s) cst2 = Constant(t2, scheduler=s) join = Join(scheduler=s) join.input[0] = cst1.output.result join.input[0] = cst2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = join.output.result aio.run(s.start()) # res = join.trace_stats(max_runs=1) # pd.set_option('display.expand_frame_repr', False) # print(res) last = notNone(join.table.last()) self.assertTrue( last["xmin"] == 1 and last["xmax"] == 2 and last["ymin"] == 3 and last["ymax"] == 4 )