def __init__(self, scheduler=None, **kwds): self._add_slots(kwds, 'input_descriptors', [ SlotDescriptor('first', type=Table, required=True), SlotDescriptor('second', type=Table, required=True) ]) super(Paste, self).__init__(scheduler=scheduler, **kwds) self.join_kwds = self._filter_kwds(kwds, join) self._dialog = Dialog(self)
def __init__(self, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) self._add_slots(kwds, 'output_descriptors', [SlotDescriptor('min', type=Table, required=False)]) super(IdxMin, self).__init__(**kwds) self._min = None self.default_step_size = 10000
def __init__(self, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('inp', required=True)]) self._add_slots(kwds, 'output_descriptors', [SlotDescriptor('out', required=False)]) super(Wait, self).__init__(**kwds) if np.isnan(self.params.delay) and self.params.reads == -1: raise ProgressiveError( 'Module %s needs either a delay or ' 'a number of reads, not both', self.pretty_typename())
def __init__(self, columns=None, **kwds): self._add_slots(kwds, 'input_descriptors', [ SlotDescriptor('min', type=BaseTable, required=True), SlotDescriptor('max', type=BaseTable, required=True) ]) self._add_slots(kwds, 'output_descriptors', [ SlotDescriptor('min', type=BaseTable, required=False), SlotDescriptor('max', type=BaseTable, required=False) ]) super(Histograms, self).__init__(**kwds) self.default_step_size = 1 self._columns = columns self._histogram = {}
class Wait(Module): parameters = [("delay", np.dtype(float), np.nan), ("reads", np.dtype(int), -1)] inputs = [SlotDescriptor("inp", required=True)] outputs = [SlotDescriptor("out", required=False)] def __init__(self, **kwds: Any) -> None: super(Wait, self).__init__(**kwds) if np.isnan(self.params.delay) and self.params.reads == -1: raise ProgressiveError( "Module %s needs either a delay or " "a number of reads, not both", self.pretty_typename(), ) def is_ready(self) -> bool: if not super(Wait, self).is_ready(): return False if self.is_zombie(): return True # give it a chance to run before it dies delay = self.params.delay reads = self.params.reads if np.isnan(delay) and reads < 0: return False inslot = self.get_input_slot("inp") assert inslot is not None trace = inslot.output_module.tracer.trace_stats() if len(trace) == 0: return False if not np.isnan(delay): return bool(len(trace) >= delay) elif reads >= 0: return bool(len(inslot.data()) >= reads) return False def get_data(self, name: str) -> Any: if name == "inp": slot = self.get_input_slot("inp") if slot is not None: return slot.data() return None def predict_step_size(self, duration: float) -> int: return 1 def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: slot = self.get_input_slot("inp") if slot is not None: slot.clear_buffers() return self._return_run_step(self.state_blocked, steps_run=1)
def __init__(self, columns=None, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Var, self).__init__(dataframe_slot='table', **kwds) self._columns = columns self._data = {} self.default_step_size = 1000
def __init__(self, nary='table', **kwds): self._add_slots( kwds, 'input_descriptors', [SlotDescriptor('table', type=BaseTable, required=True)]) super(NAry, self).__init__(**kwds) self.nary = nary self.inputs = [nary]
def __init__(self, columns=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Max, self).__init__(**kwds) self._columns = columns self.default_step_size = 10000 self.cxx_module = CxxMax(self)
class Sink(Module): "Base class for modules supporting a variable number of input slots." inputs = [SlotDescriptor("inp", type=None, required=True, multiple=True)] def __init__(self, slot_name: str = "inp", **kwds: Any) -> None: super(Sink, self).__init__(**kwds) self.slot_name = slot_name def predict_step_size(self, duration: float) -> int: return 1 def get_input_slot_multiple(self, name: Optional[str] = None) -> List[str]: if name is None: name = self.slot_name return super(Sink, self).get_input_slot_multiple(name) def prepare_run(self, run_number: int) -> None: "Switch from zombie to terminated, or update slots." if self.state == Module.state_zombie: self.state = Module.state_terminated def is_ready(self) -> bool: self.state = Module.state_terminated return False def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: # pragma no cover raise NotImplementedError("run_step not defined")
class LastRow(TableModule): inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, reset_index: Optional[bool] = True, **kwds: Any) -> None: super(LastRow, self).__init__(**kwds) self._reset_index = reset_index def predict_step_size(self, duration: float) -> int: return 1 def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: slot = self.get_input_slot("table") assert slot is not None slot.clear_buffers() df = slot.data() if df is not None: last = df.last() if self.result is None: self.result = Table( self.generate_table_name("LastRow"), dshape=df.dshape ) if self._reset_index: self.table.add(last) else: self.table.add(last, last.index) elif self._reset_index: self.table.loc[0] = last else: del self.table.loc[0] self.table.add(last, last.index) return self._return_run_step(self.state_blocked, steps_run=1)
class MyStirrer(TableModule): inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, watched: str, proc_sensitive: bool = True, mode: str = "delete", value: float = 9999.0, **kwds: Any): super().__init__(**kwds) self.watched = watched self.proc_sensitive = proc_sensitive self.mode = mode self.default_step_size = 100 self.value = value self.done = False def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: input_slot = self.get_input_slot("table") # input_slot.update(run_number) steps = 0 if not input_slot.created.any(): return self._return_run_step(self.state_blocked, steps_run=0) created = input_slot.created.next(step_size) steps = indices_len(created) input_table = input_slot.data() if self.result is None: self.result = Table( self.generate_table_name("stirrer"), dshape=input_table.dshape, ) v = input_table.loc[fix_loc(created), :] self.table.append(v) if not self.done: module = self.scheduler()[self.watched] sensitive_ids = bitmap(getattr(module, "_sensitive_ids").values()) if sensitive_ids: if self.proc_sensitive: if self.mode == "delete": # print('delete sensitive', sensitive_ids) del self.table.loc[sensitive_ids] else: # print('update sensitive', sensitive_ids) self.table.loc[sensitive_ids, 0] = self.value self.done = True else: # non sensitive if len(self.result) > 10: for i in range(10): id_ = self.table.index[i] if id_ not in sensitive_ids: if self.mode == "delete": del self.table.loc[id_] else: self.table.loc[id_, 0] = self.value self.done = True return self._return_run_step(self.next_state(input_slot), steps_run=steps)
def __init__(self, column, **kwds): self._add_slots(kwds, 'input_descriptors', [ SlotDescriptor('table', type=Table, required=True), SlotDescriptor('min', type=Table, required=True), SlotDescriptor('max', type=Table, required=True) ]) super(Histogram1D, self).__init__(dataframe_slot='table', **kwds) self.column = column self.total_read = 0 self.default_step_size = 1000 self._histo = None self._edges = None self._bounds = None self._table = Table(self.generate_table_name('Histogram1D'), dshape=Histogram1D.schema, chunks={'array': (16384, 128)}, create=True)
class MergeDict(TableModule): """ Binary join module to join two dict and return a third one. Slots: first : Table module producing the first dict to join second : Table module producing the second dict to join Args: kwds : argument to pass to the join function """ inputs = [ SlotDescriptor("first", type=PsDict, required=True), SlotDescriptor("second", type=PsDict, required=True), ] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) self._dialog = Dialog(self) def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: first_slot = self.get_input_slot("first") # first_slot.update(run_number) second_slot = self.get_input_slot("second") assert first_slot is not None and second_slot is not None first_dict = first_slot.data() second_dict = second_slot.data() if first_dict is None or second_dict is None: return self._return_run_step(self.state_blocked, steps_run=0) # second_slot.update(run_number) first_slot.created.next() second_slot.created.next() first_slot.updated.next() second_slot.updated.next() first_slot.deleted.next() second_slot.deleted.next() if self.result is None: self.result = PsDict(**first_dict, **second_dict) else: self.psdict.update(first_dict) self.psdict.update(second_dict) return self._return_run_step(self.next_state(first_slot), steps_run=1)
def __init__(self, scheduler=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(DummyMod, self).__init__(scheduler=scheduler, **kwds) self._update_column = self.params.update_column self._update_rows = self.params.update_rows self._delete_rows = self.params.delete_rows self._delete_threshold = self.params.delete_threshold self._update_threshold = self.params.update_threshold self._mode = self.params.mode
class StirrerView(TableModule): parameters = [ ("update_column", np.dtype(object), ""), ("delete_rows", np.dtype(object), None), ("delete_threshold", np.dtype(object), None), ("fixed_step_size", np.dtype(np.int_), 0), ("mode", np.dtype(object), "random"), ] inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) self._update_column: str = self.params.update_column self._delete_rows: bool = self.params.delete_rows is not None self._delete_threshold: Optional[int] = self.params.delete_threshold self._mode: str = self.params.mode def test_delete_threshold(self, val: bitmap) -> bool: if self._delete_threshold is None: return True return len(val) > self._delete_threshold def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: if self.params.fixed_step_size and False: step_size = self.params.fixed_step_size input_slot = self.get_input_slot("table") assert input_slot is not None steps = 0 if not input_slot.created.any(): return self._return_run_step(self.state_blocked, steps_run=0) created = input_slot.created.next(length=step_size, as_slice=False) # created = fix_loc(created) steps = indices_len(created) input_table = input_slot.data() if self.result is None: self.result = TableSelectedView(input_table, bitmap([])) before_ = bitmap(self.table.index) self.selected.selection |= created # print(len(self.table.index)) delete = [] if self._delete_rows and self.test_delete_threshold(before_): if isinstance(self._delete_rows, int): delete = random.sample(tuple(before_), min(self._delete_rows, len(before_))) elif self._delete_rows == "half": delete = random.sample(tuple(before_), len(before_) // 2) elif self._delete_rows == "all": delete = before_ else: delete = self._delete_rows self.selected.selection -= bitmap(delete) return self._return_run_step(self.next_state(input_slot), steps_run=steps)
class FooABC(TableModule): inputs = [ SlotDescriptor("a", type=Table, required=True), SlotDescriptor("b", type=Table, required=True), SlotDescriptor("c", type=Table, required=True), SlotDescriptor("d", type=Table, required=True), ] def __init__(self, **kwds: Any) -> None: super().__init__(output_required=False, **kwds) def run_step_impl(self, ctx: _CtxImpl, run_number: int, step_size: int) -> ReturnRunStep: if self.result is None: self.result = Table(self.generate_table_name("Foo"), dshape="{a: int, b: int}", create=True) for sn in "abcd": getattr(ctx, sn).created.next() self.table.append({"a": [run_number], "b": [step_size]}) return self._return_run_step(self.state_blocked, steps_run=0)
def __init__(self, colormap=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('array', type=Table)]) super(Heatmap, self).__init__(table_slot='heatmap', **kwds) self.colormap = colormap self.default_step_size = 1 name = self.generate_table_name('Heatmap') params = self.params # if params.filename is None: # params.filename = name+'%d.png' self._table = Table(name, dshape=Heatmap.schema, create=True)
def __init__(self, x_column, y_column, with_output=True, **kwds): self._add_slots(kwds, 'input_descriptors', [ SlotDescriptor('table', type=Table, required=True), SlotDescriptor('min', type=Table, required=True), SlotDescriptor('max', type=Table, required=True) ]) super(Histogram2D, self).__init__(dataframe_slot='table', **kwds) self.x_column = x_column self.y_column = y_column self.default_step_size = 10000 self.total_read = 0 self._histo = None self._xedges = None self._yedges = None self._bounds = None self._with_output = with_output self._heatmap_cache = None self._table = Table( self.generate_table_name('Histogram2D'), dshape=Histogram2D.schema, chunks={'array': (1, 64, 64)}, # scheduler=self.scheduler(), create=True)
class Hadamard(TableModule): inputs = [ SlotDescriptor("x1", type=Table, required=True), SlotDescriptor("x2", type=Table, required=True), ] def reset(self) -> None: if self.result is not None: self.table.resize(0) def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: x1 = self.get_input_slot("x1") x2 = self.get_input_slot("x2") if x1.updated.any() or x1.deleted.any() or x2.updated.any() or x2.deleted.any(): x1.reset() x2.reset() if self.result is not None: self.table.resize(0) x1.update(run_number) x2.update(run_number) step_size = min(x1.created.length(), x2.created.length(), step_size) x1_indices = x1.created.next(step_size) x2_indices = x2.created.next(step_size) res = {} data1 = x1.data().loc[fix_loc(x1_indices)] data2 = x2.data().loc[fix_loc(x2_indices)] assert data1.columns == data2.columns for col in data1.columns: res[col] = np.multiply(data1[col].value, data2[col].value) if self.result is None: self.result = Table(name="simple_hadamard", data=res, create=True) else: self.table.append(res) return self._return_run_step(self.next_state(x1), steps_run=step_size)
def __init__(self, column, scheduler=None, **kwds): self._add_slots( kwds, 'input_descriptors', [ SlotDescriptor('table', type=Table, required=True), #SlotDescriptor('min', type=Table, required=True), #SlotDescriptor('max', type=Table, required=True) ]) self._add_slots(kwds, 'output_descriptors', [ SlotDescriptor('min_out', type=Table, required=False), SlotDescriptor('max_out', type=Table, required=False) ]) super(HistogramIndex, self).__init__(scheduler=scheduler, **kwds) self.column = column self._impl = None # will be created when the init_threshold is reached self.selection = bitmap() # will be filled when the table is read # so realistic initial values for min and max were available self.input_module = None self.input_slot = None self._input_table = None self._min_table = None self._max_table = None
class FilterMod(TableModule): parameters = [ ("expr", np.dtype(object), "unknown"), ("user_dict", np.dtype(object), None), ] inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) def reset(self) -> None: if self.result is not None: self.selected.selection = bitmap([]) def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: input_slot = self.get_input_slot("table") assert input_slot is not None input_table = input_slot.data() if input_table is None: return self._return_run_step(self.state_blocked, steps_run=0) if self.result is None: self.result = TableSelectedView(input_table, bitmap([])) steps = 0 if input_slot.updated.any(): input_slot.reset() input_slot.update(run_number) self.reset() if input_slot.deleted.any(): deleted = input_slot.deleted.next(length=step_size, as_slice=False) self.selected.selection -= deleted steps += indices_len(deleted) if input_slot.created.any(): created = input_slot.created.next(length=step_size, as_slice=False) indices = fix_loc(created) steps += indices_len(created) eval_idx = input_table.eval( expr=self.params.expr, locs=np.array(indices), as_slice=False, result_object="index", ) self.selected.selection |= bitmap(eval_idx) if not steps: return self._return_run_step(self.state_blocked, steps_run=0) return self._return_run_step(self.next_state(input_slot), steps)
class MyResetter(TableModule): inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, threshold: int, **kwds: Any) -> None: super().__init__(**kwds) self._threshold = threshold self.result = PsDict({"reset": True}) def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: input_slot = self.get_input_slot("table") input_slot.clear_buffers() data = input_slot.data() if data and len(data) >= self._threshold: self.psdict["reset"] = False return self._return_run_step(self.next_state(input_slot), steps_run=step_size)
def __init__(self, column, min_column=None, max_column=None, reset_index=False, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Stats, self).__init__(table_slot='stats', **kwds) self._column = column self.default_step_size = 10000 if min_column is None: min_column = '_' + str(column) + '_min' if max_column is None: max_column = '_' + str(column) + '_max' self._min_column = min_column self._max_column = max_column self._reset_index = reset_index # self.schema = [(self._min_column, np.dtype(float), np.nan), # (self._max_column, np.dtype(float), np.nan),] self.schema = '{'+self._min_column+': float64, '+self._max_column+': float64}' self._table = Table(get_random_name('stats_'), dshape=self.schema)
class Sample(TableModule): parameters = [("history", np.dtype(int), 3)] inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, columns: Optional[List[str]] = None, **kwds: Any) -> None: super().__init__(**kwds) self._columns = columns self.default_step_size = 10000 self.cxx_module = CxxSample(self) def is_ready(self) -> bool: if self.get_input_slot("table").created.any(): return True return super().is_ready() def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: return self.cxx_module.run(run_number, step_size, howlong) # type: ignore
class NAry(TableModule): "Base class for modules supporting a variable number of input slots." inputs = [SlotDescriptor("table", type=BaseTable, required=True, multiple=True)] def __init__(self, nary: str = "table", **kwds: Any) -> None: super(NAry, self).__init__(**kwds) self.nary = nary def predict_step_size(self, duration: float) -> int: return 1 def get_input_slot_multiple(self, name: Optional[str] = None) -> List[str]: if name is None: name = self.nary return super(NAry, self).get_input_slot_multiple(name) def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: # pragma no cover raise NotImplementedError("run_step not defined")
class Max(TableModule): """ Simplified Max, adapted for documentation """ inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) self.default_step_size = 10000 def is_ready(self) -> bool: if self.get_input_slot("table").created.any(): return True return super().is_ready() def reset(self) -> None: if self.result is not None: self.psdict.fill(-np.inf) def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: slot = self.get_input_slot("table") if slot.updated.any() or slot.deleted.any(): slot.reset() if self.result is not None: self.psdict.clear() # resize(0) slot.update(run_number) indices = slot.created.next(step_size) steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) data = slot.data() op = data.loc[fix_loc(indices)].max(keepdims=False) if self.result is None: self.result = PsDict(op) else: for k, v in self.psdict.items(): self.result[k] = np.maximum(op[k], v) return self._return_run_step(self.next_state(slot), steps_run=steps)
class Dict2Table(TableModule): """ dict to table convertor Slots: dict_ : Table module producing the first table to join Args: kwds : argument to pass to the join function """ inputs = [SlotDescriptor("dict_", type=PsDict, required=True)] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) def run_step( self, run_number: int, step_size: int, howlong: float ) -> ReturnRunStep: dict_slot = self.get_input_slot("dict_") assert dict_slot is not None dict_ = dict_slot.data() if dict_ is None: return self._return_run_step(self.state_blocked, steps_run=0) if not ( dict_slot.created.any() or dict_slot.updated.any() or dict_slot.deleted.any() ): return self._return_run_step(self.state_blocked, steps_run=0) dict_slot.created.next() dict_slot.updated.next() dict_slot.deleted.next() if self.result is None: self.result = Table(name=None, dshape=dict_.dshape) if len(self.result) == 0: # or history: self.table.append(dict_.as_row) else: self.table.loc[0] = dict_.array return self._return_run_step(self.next_state(dict_slot), steps_run=1)
class MaxDec(TableModule): """ Simplified Max with decorated run_step(), adapted for documentation """ inputs = [SlotDescriptor("table", type=Table, required=True)] def __init__(self, **kwds: Any) -> None: super().__init__(**kwds) self.default_step_size = 10000 def is_ready(self) -> bool: if self.get_input_slot("table").created.any(): return True return super().is_ready() def reset(self) -> None: if self.result is not None: self.psdict.fill(-np.inf) @process_slot("table", reset_cb="reset") @run_if_any def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: assert self.context with self.context as ctx: indices = ctx.table.created.next(step_size) # returns a slice steps = indices_len(indices) input_df = ctx.table.data() op = input_df.loc[fix_loc(indices)].max(keepdims=False) if self.result is None: self.result = PsDict(op) else: for k, v in self.psdict.items(): self.result[k] = np.maximum(op[k], v) return self._return_run_step(self.next_state(ctx.table), steps_run=steps)
class BinJoin(TableModule): """ Binary join module to join two tables and return a third one. Slots: first : Table module producing the first table to join second : Table module producing the second table to join Args: kwds : argument to pass to the join function """ inputs = [ SlotDescriptor("first", type=Table, required=True), SlotDescriptor("second", type=Table, required=True), ] def __init__(self, **kwds: Any) -> None: super(BinJoin, self).__init__(**kwds) self.join_kwds = filter_kwds(kwds, join) self._dialog = Dialog(self) def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: first_slot = self.get_input_slot("first") # first_slot.update(run_number) second_slot = self.get_input_slot("second") # second_slot.update(run_number) steps = 0 if first_slot.deleted.any() or second_slot.deleted.any(): first_slot.reset() second_slot.reset() if self.result is not None: self.table.resize(0) join_reset(self._dialog) first_slot.update(run_number) second_slot.update(run_number) created = {} if first_slot.created.any(): indices = first_slot.created.next(length=step_size) steps += indices_len(indices) created["table"] = indices if second_slot.created.any(): indices = second_slot.created.next(length=step_size) steps += indices_len(indices) created["other"] = indices updated = {} if first_slot.updated.any(): indices = first_slot.updated.next(length=step_size) steps += indices_len(indices) updated["table"] = indices if second_slot.updated.any(): indices = second_slot.updated.next(length=step_size) steps += indices_len(indices) updated["other"] = indices first_table = first_slot.data() second_table = second_slot.data() if not self._dialog.is_started: join_start(first_table, second_table, dialog=self._dialog, created=created, updated=updated, **self.join_kwds) else: join_cont( first_table, second_table, dialog=self._dialog, created=created, updated=updated, ) return self._return_run_step(self.next_state(first_slot), steps_run=steps)
class RangeQuery(TableModule): """ """ parameters = [ ("column", np.dtype(object), "unknown"), ("watched_key_lower", np.dtype(object), ""), ("watched_key_upper", np.dtype(object), ""), # ('hist_index', object, None) # to improve ... ] inputs = [ SlotDescriptor("table", type=Table, required=True), SlotDescriptor("lower", type=Table, required=False), SlotDescriptor("upper", type=Table, required=False), SlotDescriptor("min", type=PsDict, required=False), SlotDescriptor("max", type=PsDict, required=False), SlotDescriptor("hist", type=Table, required=True), ] outputs = [ SlotDescriptor("min", type=Table, required=False), SlotDescriptor("max", type=Table, required=False), ] def __init__( self, # hist_index: Optional[HistogramIndex] = None, approximate: bool = False, **kwds: Any) -> None: super(RangeQuery, self).__init__(**kwds) self._impl: RangeQueryImpl = RangeQueryImpl(self.params.column, approximate) # self._hist_index: Optional[HistogramIndex] = hist_index self._approximate = approximate self.default_step_size = 1000 self.input_module: Optional[Module] = None self._min_table: Optional[PsDict] = None self._max_table: Optional[PsDict] = None self.hist_index: Optional[HistogramIndex] = None # @property # def hist_index(self) -> Optional[HistogramIndex]: # return self._hist_index # @hist_index.setter # def hist_index(self, hi: HistogramIndex) -> None: # self._hist_index = hi # self._impl = RangeQueryImpl(self._column, hi, approximate=self._approximate) @property def column(self) -> str: return str(self.params.column) @property def watched_key_lower(self) -> str: return self.params.watched_key_lower or self.column @property def watched_key_upper(self) -> str: return self.params.watched_key_upper or self.column def create_dependent_modules(self, input_module: Module, input_slot: str, min_: Optional[Module] = None, max_: Optional[Module] = None, min_value: Optional[Module] = None, max_value: Optional[Module] = None, hist_index: Optional[HistogramIndex] = None, **kwds: Any) -> RangeQuery: if self.input_module is not None: # test if already called return self scheduler = self.scheduler() params = self.params self.input_module = input_module self.input_slot = input_slot with scheduler: if hist_index is None: hist_index = HistogramIndex(column=params.column, group=self.name, scheduler=scheduler) hist_index.input.table = input_module.output[input_slot] if min_ is None: min_ = Min(group=self.name, columns=[self.column], scheduler=scheduler) min_.input.table = hist_index.output.min_out if max_ is None: max_ = Max(group=self.name, columns=[self.column], scheduler=scheduler) max_.input.table = hist_index.output.max_out if min_value is None: min_value = Variable(group=self.name, scheduler=scheduler) min_value.input.like = min_.output.result if max_value is None: max_value = Variable(group=self.name, scheduler=scheduler) max_value.input.like = max_.output.result range_query = self range_query.hist_index = hist_index range_query.input.hist = hist_index.output.result range_query.input.table = input_module.output[input_slot] if min_value: range_query.input.lower = min_value.output.result if max_value: range_query.input.upper = max_value.output.result range_query.input.min = min_.output.result range_query.input.max = max_.output.result self.min = min_ self.max = max_ self.min_value = min_value self.max_value = max_value return range_query def _create_min_max(self) -> None: if self._min_table is None: self._min_table = PsDict({self.column: np.inf}) if self._max_table is None: self._max_table = PsDict({self.column: -np.inf}) def _set_minmax_out(self, attr_: str, val: float) -> None: d = {self.column: val} if getattr(self, attr_) is None: setattr(self, attr_, PsDict(d)) else: getattr(self, attr_).update(d) def _set_min_out(self, val: float) -> None: return self._set_minmax_out("_min_table", val) def _set_max_out(self, val: float) -> None: return self._set_minmax_out("_max_table", val) def get_data(self, name: str) -> Any: if name == "min": return self._min_table if name == "max": return self._max_table return super(RangeQuery, self).get_data(name) def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: input_slot = self.get_input_slot("table") self._create_min_max() # # lower/upper # lower_slot = self.get_input_slot("lower") # lower_slot.update(run_number) upper_slot = self.get_input_slot("upper") limit_changed = False if lower_slot.deleted.any(): lower_slot.deleted.next() if lower_slot.updated.any(): lower_slot.updated.next() limit_changed = True if lower_slot.created.any(): lower_slot.created.next() limit_changed = True if not (lower_slot is upper_slot): # upper_slot.update(run_number) if upper_slot.deleted.any(): upper_slot.deleted.next() if upper_slot.updated.any(): upper_slot.updated.next() limit_changed = True if upper_slot.created.any(): upper_slot.created.next() limit_changed = True # # min/max # min_slot = self.get_input_slot("min") min_slot.clear_buffers() # min_slot.update(run_number) # min_slot.created.next() # min_slot.updated.next() # min_slot.deleted.next() max_slot = self.get_input_slot("max") max_slot.clear_buffers() # max_slot.update(run_number) # max_slot.created.next() # max_slot.updated.next() # max_slot.deleted.next() if (lower_slot.data() is None or upper_slot.data() is None or len(lower_slot.data()) == 0 or len(upper_slot.data()) == 0): return self._return_run_step(self.state_blocked, steps_run=0) lower_value = lower_slot.data().get(self.watched_key_lower) upper_value = upper_slot.data().get(self.watched_key_upper) if (lower_slot.data() is None or upper_slot.data() is None or min_slot.data() is None or max_slot.data() is None or len(min_slot.data()) == 0 or len(max_slot.data()) == 0): return self._return_run_step(self.state_blocked, steps_run=0) minv = min_slot.data().get(self.watched_key_lower) maxv = max_slot.data().get(self.watched_key_upper) if lower_value == "*": lower_value = minv elif (lower_value is None or np.isnan(lower_value) or lower_value < minv or lower_value >= maxv): lower_value = minv limit_changed = True if upper_value == "*": upper_value = maxv elif (upper_value is None or np.isnan(upper_value) or upper_value > maxv or upper_value <= minv or upper_value <= lower_value): upper_value = maxv limit_changed = True self._set_min_out(lower_value) self._set_max_out(upper_value) # input_slot.update(run_number) if not input_slot.has_buffered() and not limit_changed: return self._return_run_step(self.state_blocked, steps_run=0) # ... steps = 0 deleted: Optional[bitmap] = None if input_slot.deleted.any(): deleted = input_slot.deleted.next(length=step_size, as_slice=False) steps += indices_len(deleted) created: Optional[bitmap] = None if input_slot.created.any(): created = input_slot.created.next(length=step_size, as_slice=False) steps += indices_len(created) updated: Optional[bitmap] = None if input_slot.updated.any(): updated = input_slot.updated.next(length=step_size, as_slice=False) steps += indices_len(updated) input_table = input_slot.data() if self.result is None: self.result = TableSelectedView(input_table, bitmap([])) assert self._impl hist_slot = self.get_input_slot("hist") hist_slot.clear_buffers() if not self._impl.is_started: self._impl.start( input_table, cast(HistogramIndex, hist_slot.output_module), lower_value, upper_value, limit_changed, created=created, updated=updated, deleted=deleted, ) else: self._impl.resume( cast(HistogramIndex, hist_slot.output_module), lower_value, upper_value, limit_changed, created=created, updated=updated, deleted=deleted, ) assert self._impl.result self.selected.selection = self._impl.result._values return self._return_run_step(self.next_state(input_slot), steps)