Exemplo n.º 1
0
    def t_histogram1d_impl(self, **kw: Any) -> None:
        s = self.scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"), index_col=False, header=None, scheduler=s
        )
        stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
        stirrer.input[0] = csv.output.result
        min_ = Min(scheduler=s)
        min_.input[0] = stirrer.output.result
        max_ = Max(scheduler=s)
        max_.input[0] = stirrer.output.result
        histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
        histogram1d.input[0] = stirrer.output.result
        histogram1d.input.min = min_.output.result
        histogram1d.input.max = max_.output.result

        # pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input[0] = histogram1d.output.result
        aio.run(s.start())
        _ = histogram1d.trace_stats()
        last = notNone(histogram1d.table.last()).to_dict()
        h1 = last["array"]
        bounds = (last["min"], last["max"])
        tab = stirrer.table.loc[:, ["_2"]]
        assert tab is not None
        v = tab.to_array().reshape(-1)
        h2, _ = np.histogram(  # type: ignore
            v, bins=histogram1d.params.bins, density=False, range=bounds
        )
        self.assertEqual(np.sum(h1), np.sum(h2))
        self.assertListEqual(h1.tolist(), h2.tolist())
Exemplo n.º 2
0
    def _create_columns(self, columns: List[str], df: Any) -> None:
        bins: int = cast(int, self.params.bins)
        delta: float = cast(float, self.params.delta)  # crude
        inp = self.get_input_module("table")
        minmod = self.get_input_module("min")
        maxmod = self.get_input_module("max")

        assert inp and minmod and maxmod
        for column in columns:
            logger.debug("Creating histogram1d %s", column)
            dtype = df[column].dtype
            if not np.issubdtype(dtype, numbers.Number):
                # only create histograms for number columns
                continue
            histo = Histogram1D(
                group=self.name,
                column=column,
                bins=bins,
                delta=delta,
                scheduler=self.scheduler,
            )
            histo.input.table = inp.output.result
            histo.input.min = minmod.output.result
            histo.input.max = maxmod.output.result
            self.input.table = histo.output._trace  # will become table.1 ...
            self._histogram[column] = histo
Exemplo n.º 3
0
 def test_histogram1d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
     last = notNone(histogram1d.table.last()).to_dict()
     h1 = last["array"]
     bounds = (last["min"], last["max"])
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[2]  # type: ignore
     )
     v = df.to_numpy().reshape(-1)
     h2, _ = np.histogram(  # type: ignore
         v, bins=histogram1d.params.bins, density=False, range=bounds
     )
     self.assertListEqual(h1.tolist(), h2.tolist())
Exemplo n.º 4
0
 def test_histogram1d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
Exemplo n.º 5
0
def refresh_info_hist_1d(hout: WidgetType, h1d_mod: Histogram1D, name: str,
                         tab: "TreeTab") -> None:
    if (not tab.is_visible(name)) and h1d_mod.updated_once:  # type: ignore
        return
    if not h1d_mod.table:
        return
    last = h1d_mod.table.last()
    assert last
    res = last.to_dict()
    hist = res["array"]
    min_ = res["min"]
    max_ = res["max"]
    bins_ = np.linspace(min_, max_, len(hist))
    source = pd.DataFrame({
        "xvals": bins_,
        "nbins": range(len(hist)),
        "level": hist
    })
    hout.children[1].update("data", remove="true", insert=source)
    h1d_mod.updated_once = True  # type: ignore
Exemplo n.º 6
0
    def test_histogram1d(self):
        s = self.scheduler()
        csv = CSVLoader(get_dataset('bigfile'),
                        index_col=False,
                        header=None,
                        scheduler=s)
        min_ = Min(scheduler=s)
        min_.input.table = csv.output.table
        max_ = Max(scheduler=s)
        max_.input.table = csv.output.table
        histogram1d = Histogram1D('_2',
                                  scheduler=s)  # columns are called 1..30
        histogram1d.input.table = csv.output.table
        histogram1d.input.min = min_.output.table
        histogram1d.input.max = max_.output.table

        #pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input.df = csv.output.table
        s.start(tick_proc=lambda s, r: csv.is_terminated() and s.stop())
        s.join()
        s = histogram1d.trace_stats()
Exemplo n.º 7
0
 def _create_columns(self, columns, df):
     bins = self.params.bins
     delta = self.params.delta  # crude
     inp = self.get_input_module('table')
     minmod = self.get_input_module('min')
     maxmod = self.get_input_module('max')
     for column in columns:
         #if c==UPDATE_COLUMN:
         #    continue
         logger.debug('Creating histogram1d %s', column)
         dtype = df[column].dtype
         if not np.issubdtype(dtype, numbers.Number):
             # only create histograms for number columns
             continue
         histo = Histogram1D(group=self.name,
                             column=column,
                             bins=bins,
                             delta=delta,
                             scheduler=self.scheduler())
         histo.input.table = inp.output.table
         histo.input.min = minmod.output.table
         histo.input.max = maxmod.output.table
         self.input.table = histo.output._trace  # will become table.1 ...
         self._histogram[column] = histo