def test_ps_dict_new_ids(self) -> None: prev = PsDict(a=1, b=2, c=3) now = copy.copy(prev) now["x"] = 10 now["y"] = 20 new_ids = now.created_indices(prev) self.assertEqual(bitmap(new_ids), bitmap([3, 4]))
def test_to_dict2(self): # index=[1,2,3,8,11], df = pd.DataFrame( data={ 'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [10, 20, 30, 40, 50, 60, 70, 80], 'c': ['a', 'b', 'cd', 'ef', 'fg', 'gh', 'hi', 'ij'] }) t_ = Table(name=None, data=df) df = df.drop(df.index[[3, 4]]) sel = bitmap(t_.index) - bitmap([3, 4]) #del t.loc[[3,4]] t = TableSelectedView(t_, sel) #del t.loc[3] #print(df.to_dict(orient='records')) #print(df.to_dict(orient='records')) self.assertEqual(df.to_dict(orient='rows'), df.to_dict(orient='records')) #print(t.to_dict(orient='records')) # orient : {'dict', 'list', 'split', 'rows', 'record', 'index'} self.assertEqual(df.to_dict(orient='dict'), t.to_dict(orient='dict')) self.assertEqual(df.to_dict(orient='list'), t.to_dict(orient='list')) self.assertEqual(df.to_dict(orient='split'), t.to_dict(orient='split')) self.assertEqual(df.to_dict(orient='rows'), t.to_dict(orient='rows')) self.assertEqual(df.to_dict(orient='index'), t.to_dict(orient='index'))
def test_to_dict2(self) -> None: # index=[1,2,3,8,11], df = pd.DataFrame( data={ "a": [1, 2, 3, 4, 5, 6, 7, 8], "b": [10, 20, 30, 40, 50, 60, 70, 80], "c": ["a", "b", "cd", "ef", "fg", "gh", "hi", "ij"], }) t_ = Table(name=None, data=df) df = df.drop(df.index[[3, 4]]) sel = bitmap(t_.index) - bitmap([3, 4]) # del t.loc[[3,4]] t = t_.loc[sel, :] # TableSelectedView(t_, sel) assert t is not None # del t.loc[3] # print(df.to_dict(orient='records')) # print(df.to_dict(orient='records')) # print(t.to_dict(orient='records')) # orient : {'dict', 'list', 'split', 'rows', 'record', 'index'} self.assertEqual(df.to_dict(orient="dict"), t.to_dict(orient="dict")) self.assertEqual(df.to_dict(orient="list"), t.to_dict(orient="list")) self.assertEqual(df.to_dict(orient="split"), t.to_dict(orient="split")) self.assertEqual(df.to_dict(orient="records"), t.to_dict(orient="records")) self.assertEqual(df.to_dict(orient="index"), t.to_dict(orient="index"))
def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: if self.params.fixed_step_size and False: step_size = self.params.fixed_step_size input_slot = self.get_input_slot("table") assert input_slot is not None steps = 0 if not input_slot.created.any(): return self._return_run_step(self.state_blocked, steps_run=0) created = input_slot.created.next(length=step_size, as_slice=False) # created = fix_loc(created) steps = indices_len(created) input_table = input_slot.data() if self.result is None: self.result = TableSelectedView(input_table, bitmap([])) before_ = bitmap(self.table.index) self.selected.selection |= created # print(len(self.table.index)) delete = [] if self._delete_rows and self.test_delete_threshold(before_): if isinstance(self._delete_rows, int): delete = random.sample(tuple(before_), min(self._delete_rows, len(before_))) elif self._delete_rows == "half": delete = random.sample(tuple(before_), len(before_) // 2) elif self._delete_rows == "all": delete = before_ else: delete = self._delete_rows self.selected.selection -= bitmap(delete) return self._return_run_step(self.next_state(input_slot), steps_run=steps)
def run_step_progress(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: _b = bitmap.asbitmap # to_delete: List[bitmap] to_create: List[bitmap] steps = 0 tables = [] ph_table = None # assert len(self.inputs) > 0 reset_ = False for name in self.get_input_slot_multiple(): slot = self.get_input_slot(name) t = slot.data() assert isinstance(t, BaseTable) if ph_table is None: ph_table = _get_physical_table(t) else: assert ph_table is _get_physical_table(t) tables.append(t) # slot.update(run_number) if reset_ or slot.updated.any() or slot.deleted.any(): slot.reset() reset_ = True steps += 1 # if slot.deleted.any(): # deleted = slot.deleted.next(step_size) # steps += 1 # to_delete.append(_b(deleted)) # if slot.updated.any(): # actually don't care # _ = slot.updated.next(step_size) # #to_delete |= _b(updated) # #to_create |= _b(updated) # #steps += 1 # indices_len(updated) + 1 if slot.created.any(): created = slot.created.next(step_size) bm = _b(created) # - to_delete to_create.append(bm) steps += indices_len(created) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) # to_delete = bitmap.union(*to_delete) to_create_4sure = bitmap() if len(to_create) == len(tables): to_create_4sure = bitmap.intersection(*to_create) to_create_maybe = bitmap.union(*to_create) if not self.result: self.result = TableSelectedView(ph_table, bitmap([])) if reset_: self.selected.selection = bitmap([]) self.selected.selection = self.selected.index | to_create_4sure to_create_maybe -= to_create_4sure eff_create = to_create_maybe for t in tables: eff_create &= t.index self.selected.selection = self.selected.index | eff_create return self._return_run_step(self.state_blocked, steps)
def test_ps_dict_updated_ids(self) -> None: prev = PsDict(a=1, b=2, c=3, d=4, e=5) now = copy.copy(prev) updated_ids = now.updated_indices(prev) self.assertEqual(bitmap(updated_ids), bitmap()) now["b"] += 1 now["d"] *= 2 updated_ids = now.updated_indices(prev) self.assertEqual(bitmap(updated_ids), bitmap([1, 3]))
def run_step(self, run_number, step_size, howlong): if self.params.fixed_step_size and False: step_size = self.params.fixed_step_size input_slot = self.get_input_slot('table') input_slot.update(run_number) steps = 0 if not input_slot.created.any(): return self._return_run_step(self.state_blocked, steps_run=0) created = input_slot.created.next(step_size) steps = indices_len(created) with input_slot.lock: input_table = input_slot.data() p = self.params if self._table is None: self._table = Table( self.generate_table_name('dummy'), dshape=input_table.dshape, ) raw_ids = self._table.index.values before_ = bitmap(raw_ids[raw_ids >= 0]) v = input_table.loc[fix_loc(created), :] #print("creations: ", created) self._table.append(v) # indices=bitmap(created)) delete = [] if self._delete_rows and self.test_delete_threshold(before_): if isinstance(self._delete_rows, int): delete = random.sample(tuple(before_), min(self._delete_rows, len(before_))) elif self._delete_rows == 'half': delete = random.sample(tuple(before_), len(before_) // 2) elif self._delete_rows == 'all': delete = before_ else: delete = self._delete_rows #print("deletions: ", delete) if self.params.del_twice: mid = len(delete) // 2 del self._table.loc[delete[:mid]] del self._table.loc[delete[mid:]] else: del self._table.loc[delete] if self._update_rows and len(before_): before_ -= bitmap(delete) if isinstance(self._update_rows, int): updated = random.sample(tuple(before_), min(self._update_rows, len(before_))) else: updated = self._update_rows v = np.random.rand(len(updated)) if updated: self._table.loc[fix_loc(updated), [self._update_column]] = [v] return self._return_run_step(self.next_state(input_slot), steps_run=steps)
def compute_updates(self, start, now, mid=None, cleanup=True): if self._changes: self._flush_cache() updates = self._changes.compute_updates(start, now, mid, cleanup=cleanup) if updates is None: try: # EAFP updates = IndexUpdate(created=bitmap(self.dataset[:])) except OverflowError: # because rows could be created then removed in the same step ids = self.dataset[:] updates = IndexUpdate(created=bitmap(ids[ids >= 0])) return updates return None
def test_bitmapchangemanager(self) -> None: mid1 = "m1" bm = bitmap([1, 2, 3]) slot = FakeSlot(bm) cm = BitmapChangeManager(slot) self.assertEqual(cm.last_update(), 0) self.assertEqual(cm.created.length(), 0) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) cm.update(1, bm, mid1) self.assertEqual(cm.last_update(), 1) self.assertEqual(cm.created.length(), 3) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) bm = bitmap([2, 3, 4]) cm.update(2, bm, mid1) self.assertEqual(cm.last_update(), 2) # 1 should be removed because deleted at ts=2 self.assertEqual(cm.created.next(), slice(2, 5)) self.assertEqual(cm.updated.length(), 0) # 0 has been created then deleted before it got consumed self.assertEqual(cm.deleted.length(), 0) bm = bitmap([3, 4, 5]) cm.update(3, bm, mid1) self.assertEqual(cm.last_update(), 3) self.assertEqual(cm.created.next(), slice(5, 6)) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 1) # 2 is deleted but buffered bm = bitmap([2, 3, 4]) cm.update(4, bm, mid1) self.assertEqual(cm.last_update(), 4) # 2 has been created before it was consumed so it becomes updated self.assertEqual(cm.created.length(), 0) self.assertEqual(cm.created.length(), len(cm.created)) self.assertEqual(cm.updated.length(), 0) # updates are ignored by default # 2 should be removed because added at ts=4 self.assertEqual(cm.deleted.next(), slice(5, 6)) cm.created.clear() self.assertEqual(cm.created.length(), 0) cm.created.set_buffered(False) self.assertIsNone(cm.created.next())
def divide_bin(self, i): "Change the bounds of the index if needed" #import pdb;pdb.set_trace() ids = np.array(self.bitmaps[i], np.int64) if self._sampling_size * 1.2 < len(ids): samples = np.random.choice(ids, self._sampling_size, replace=False) else: samples = ids s_vals = self.column.loc[samples] v = np.median(s_vals) assert self.bins[i - 1] < v < self.bins[i] if i > 0 else v < self.bins[i] values = self.column.loc[ids] lower_bin = bitmap(ids[values < v]) upper_bin = self.bitmaps[i] - lower_bin lower_len = len(lower_bin) upper_len = len(upper_bin) t = len(ids) * self._perm_deviation if abs(lower_len - upper_len) > t: print("DIFF: ", lower_len, upper_len, float(abs(lower_len - upper_len)) / len(ids)) #old = self.bins self.bins = np.insert(self.bins, i, v) try: assert (self.bins[i - 1] < self.bins[i] < self.bins[i + 1] if i > 0 else self.bins[i] < self.bins[i + 1]) except: import pdb pdb.set_trace() self.bitmaps.insert(i, lower_bin) self.bitmaps[i + 1] = upper_bin print('*', end='')
def range_query_aslist( self, lower: float, upper: float, approximate: bool = APPROX ) -> List[bitmap]: """ Return the list of bitmaps with values in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower pos_lo, pos_up = np.digitize([lower, upper], self.bins) # type: ignore detail = bitmap() res = self.bitmaps[pos_lo + 1 : pos_up] if not approximate: ids = np.array(self.bitmaps[pos_lo], np.int64) values = self.column.loc[ids] if pos_lo == pos_up: selected = ids[(lower <= values) & (values < upper)] detail.update(selected) else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos_up], np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) res.append(detail) return res
def run_step_seq(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: steps = 0 tables = [] ph_table = None # assert len(self.inputs) > 0 for name in self.get_input_slot_multiple(): if not name.startswith("table"): continue slot = self.get_input_slot(name) t = slot.data() assert isinstance(t, BaseTable) if ph_table is None: ph_table = _get_physical_table(t) else: assert ph_table is _get_physical_table(t) tables.append(t) # slot.update(run_number) if slot.deleted.any(): slot.deleted.next() steps += 1 if slot.updated.any(): slot.updated.next() steps += 1 if slot.created.any(): slot.created.next() steps += 1 if steps == 0: return self._return_run_step(self.state_blocked, 0) if not self.result: self.result = TableSelectedView(ph_table, bitmap([])) self.selected.selection = bitmap.intersection( *[t.index for t in tables]) return self._return_run_step(self.state_blocked, steps)
def range_query( self, lower: float, upper: float, all_ids: bitmap, approximate: bool = APPROX ) -> bitmap: """ Return the bitmap of all rows with values in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower assert self.bins is not None pos_lo, pos_up = np.digitize([lower, upper], self.bins) if pos_up - pos_lo > len(self.bins) // 2: exclusion = self.bitmaps[: pos_lo + 1] + self.bitmaps[pos_up:] union = all_ids - bitmap.union(*exclusion) else: union = bitmap.union(*self.bitmaps[pos_lo + 1 : pos_up]) if not approximate: detail = bitmap() ids = np.array(self.bitmaps[pos_lo], np.int64) values = self.column.loc[ids] if pos_lo == pos_up: selected = ids[(lower <= values) & (values < upper)] detail.update(selected) else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos_up], np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) union.update(detail) return union
def restricted_query( self, operator_: Callable[[Any, Any], int], limit: Any, only_locs: Any, approximate: bool = APPROX, ) -> bitmap: # blocking... """ Returns the subset of only_locs matching the query. """ only_locs = bitmap.asbitmap(only_locs) assert self.bins is not None pos = np.digitize(limit, self.bins) detail = bitmap() if not approximate: ids = np.array(self.bitmaps[pos] & only_locs, np.int64) values = self.column.loc[ids] selected = ids[operator_(values, limit)] detail.update(selected) if operator_ in (operator.lt, operator.le): for bm in self.bitmaps[:pos]: detail.update(bm & only_locs) else: for bm in self.bitmaps[pos + 1 :]: detail.update(bm & only_locs) return detail
def query( self, operator_: Callable[[Any, Any], int], limit: Any, approximate: bool = APPROX, ) -> bitmap: # blocking... """ Return the list of rows matching the query. For example, returning all values less than 10 (< 10) would be `query(operator.__lt__, 10)` """ assert self.bins is not None pos = np.digitize(limit, self.bins) detail = bitmap() if not approximate: ids = np.array(self.bitmaps[pos], np.int64) values = self.column.loc[ids] selected = ids[operator_(values, limit)] detail.update(selected) if operator_ in (operator.lt, operator.le): for bm in self.bitmaps[:pos]: detail.update(bm) else: for bm in self.bitmaps[pos + 1 :]: detail.update(bm) return detail
def __contains__(self, loc): v = Loc.dispatch(loc) end = self.size ids = self._ids_dict if v == Loc.INT: if self._is_identity: return 0 < loc < end else: return loc in ids if v == Loc.SLICE: if self._is_identity: return loc.start >= 0 and (loc.end == None or loc.end == end) else: loc = range(*loc.index(end)) v = Loc.ITERABLE elif v == Loc.BITMAP: if self._is_identity: inside = bitmap(range(0, end)) return loc.difference_cardinality(inside) == 0 else: v = Loc.ITERABLE if Loc.isiterable(v): if self._is_identity: for l in loc: if l < 0 or l >= end: return False else: for l in loc: if not l in ids: return False return True else: raise ValueError('Unsupported data for "in" %s', loc)
def test_bisect2(self) -> None: s = self.scheduler() random = RandomTable(2, rows=100_000, scheduler=s) stirrer = Stirrer( update_column="_1", delete_rows=100, # update_rows=5, # fixed_step_size=100, scheduler=s, ) stirrer.input[0] = random.output.result t = Table(name=None, dshape="{value: string}", data={"value": [0.5]}) min_value = Constant(table=t, scheduler=s) hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.create_dependent_modules(stirrer, "result") bisect_ = Bisect(column="_1", op=">", hist_index=hist_index, scheduler=s) bisect_.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_.input.limit = min_value.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = bisect_.output.result aio.run(s.start()) idx = stirrer.table.eval("_1>0.5", result_object="index") self.assertEqual(bisect_.table.index, bitmap(idx))
def resize(self, newsize: int, index: Optional[Union[bitmap, List[int]]] = None) -> None: # NB: newsize means how many active rows the table must contain if index is not None: index = bitmap.asbitmap(index) newsize_ = index.max() + 1 if index else 0 if newsize < newsize_: logger.warning(f"Wrong newsize={newsize}, fixed to {newsize_}") newsize = newsize_ assert newsize is not None delta = newsize - len(self.index) # if delta < 0: # return newsize = self.last_id + delta + 1 crt_index = bitmap(self._index) self._resize_rows(newsize, index) del_index = crt_index - self._index if del_index: self.add_deleted(del_index) if delta < 0: return self._storagegroup.attrs[metadata.ATTR_NROWS] = newsize assert newsize is not None for column in self._columns: col = cast(Column, column) col._resize(newsize)
def restricted_range_query(self, lower, upper, only_locs, approximate=APPROX): """ Return the bitmap of only_locs rows in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower only_locs = bitmap.asbitmap(only_locs) pos = np.digitize([lower, upper], self.bins) detail = bitmap() if not approximate: ids = np.array(self.bitmaps[pos[0]] & only_locs, np.int64) values = self.column.loc[ids] if pos[0] == pos[1]: selected = ids[(lower <= values) & (values < upper)] else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos[1]] & only_locs, np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) for bm in self.bitmaps[pos[0] + 1:pos[1]]: detail.update(bm & only_locs) return detail
def compute_updates(self, start: int, now: int, mid: str, cleanup: bool = True) -> Optional[IndexUpdate]: """Compute the updates (delta) that happened to this table since the last call. Parameters ---------- start: integer Start is interpreted as a virtual time for `last time` now: integer Start is interpreted as a virtual time for `now` mid: hashable object An identifier for the object that will ask for updates, usually the name of a slot. Returns ------- updates: None or an IndexUpdate structure which describes the list of rows created, updated, and deleted. """ if self._changes: self._flush_cache() updates = self._changes.compute_updates(start, now, mid, cleanup=cleanup) if updates is None: updates = IndexUpdate(created=bitmap(self.index)) return updates return None
def restricted_range_query( self, lower: float, upper: float, only_locs: Any, approximate: bool = APPROX ) -> bitmap: """ Return the bitmap of only_locs rows in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower only_locs = bitmap.asbitmap(only_locs) pos_lo, pos_up = np.digitize([lower, upper], self.bins) # type: ignore union = bitmap.union( *[(bm & only_locs) for bm in self.bitmaps[pos_lo + 1 : pos_up]] ) if not approximate: detail = bitmap() ids = np.array(self.bitmaps[pos_lo] & only_locs, np.int64) values = self.column.loc[ids] if pos_lo == pos_up: selected = ids[(lower <= values) & (values < upper)] detail.update(selected) else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos_up] & only_locs, np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) union.update(detail) return union
def test_intersection(self) -> None: s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.create_dependent_modules(random, "result") bisect_min = Bisect(column="_1", op=">", hist_index=hist_index, scheduler=s) bisect_min.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_min.input.limit = min_value.output.result bisect_max = Bisect(column="_1", op="<", hist_index=hist_index, scheduler=s) bisect_max.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_max.input.limit = max_value.output.result inter = Intersection(scheduler=s) inter.input[0] = bisect_min.output.result inter.input[0] = bisect_max.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = inter.output.result aio.run(s.start()) assert hist_index.input_module is not None idx = (hist_index.input_module.output["result"].data().eval( "(_1>0.3)&(_1<0.8)", result_object="index")) self.assertEqual(inter.table.index, bitmap(idx))
def test_paging_helper_t(self) -> None: t = Table("table_for_paging", dshape="{a: int, b: float32}", create=True) t.resize(200) _ = np.arange(200) ivalues = np.random.randint(100, size=200) t["a"] = ivalues fvalues = np.array(np.random.rand(200), np.float32) t["b"] = fvalues # import pdb; pdb.set_trace() ph_t = PagingHelper(t) page = ph_t.get_page(0, 10) self.assertEqual(page[0][0], 0) self.assertEqual(page[-1][0], 9) del t.loc[5] ph_t = PagingHelper(t) page = ph_t.get_page(0, 10) self.assertEqual(page[0][0], 0) self.assertEqual(page[-1][0], 10) sel = bitmap(range(10, 75, 2)) print(sel) view = t.loc[sel, :] self.assertTrue(view is not None) assert view is not None ph_t = PagingHelper(view) page = ph_t.get_page(10, 20) self.assertEqual(page[0][0], 30) self.assertEqual(page[-1][0], 48) print(page)
def test_intersection(self): s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.create_dependent_modules(random, 'table') bisect_min = Bisect(column='_1', op='>', hist_index=hist_index, scheduler=s) bisect_min.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_min.input.limit = min_value.output.table bisect_max = Bisect(column='_1', op='<', hist_index=hist_index, scheduler=s) bisect_max.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_max.input.limit = max_value.output.table inter = Intersection(scheduler=s) inter.input.table = bisect_min.output.table inter.input.table = bisect_max.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = inter.output.table s.start() s.join() idx = hist_index.input_module.output['table']\ .data().eval('(_1>0.3)&(_1<0.8)', result_object='index') self.assertEqual(inter.table().selection, bitmap(idx))
def resize(self, newsize, indices=None): """ Change the size if of the IDColumn. When the column grows, return the new identifiers allocated. """ # pylint: disable=arguments-differ oldsize = self.size if oldsize == newsize: assert (indices is None or len(indices) == 0) return None elif oldsize > newsize: todelete = self[newsize:] try: #EAFP newsize_bm = bitmap(todelete) newsize = self._delete_ids(newsize_bm) except OverflowError: newsize_ = todelete[todelete >= 0] newsize = self._delete_ids(newsize_) if newsize is not None: super(IdColumn, self).resize(newsize) self._flush_cache() return None else: # oldsize < newsize incr = newsize - oldsize assert indices is None or len(indices) == incr self._flush_cache() if self._is_identity: newindices = np.arange(oldsize, newsize) # if the new indices are not the same # as expected, allocate the hashtable-based storage. if (indices is not None and not np.array_equal(indices, newindices)): self._really_create_dataset() # indices=indices) return self.resize(newsize, indices) # indices is None or == newindices, super.resize works super(IdColumn, self).resize(newsize) indices = newindices self.add_created(indices) self._last_id += incr self.dataset.attrs[IdColumn.ATTR_LAST_ID] = self._last_id return indices # not _is_identity, code using full dataset/hash table if indices is None: last_id = self._last_id + incr indices = np.arange(self._last_id, last_id, dtype=np.int64) else: indices = np.asarray(indices, dtype=np.int64) if (self._ids_dict is not None and self._ids_dict.contains_any(indices)): raise ValueError('Indices would contain duplicates') last_id = max(self._last_id, int(np.max(indices) + 1)) # TODO reuse free list super(IdColumn, self).resize(newsize) self.dataset[oldsize:] = indices self._update_ids_dict(oldsize, oldsize + incr, indices) indices[:] = np.arange(oldsize, oldsize + incr) self._last_id = last_id self.dataset.attrs[IdColumn.ATTR_LAST_ID] = self._last_id return indices
def _normalize_locs(self, locs): if locs is None: if bool(self._freelist): locs = iter(self) else: locs = iter(self.dataset) elif isinstance(locs, integer_types): locs = [locs] return bitmap(locs)
def test_filter3(self) -> None: s = Scheduler() random = RandomTable(2, rows=100000, scheduler=s) stirrer = Stirrer(update_column="_1", update_rows=5, fixed_step_size=100, scheduler=s) stirrer.input[0] = random.output.result filter_ = FilterMod(expr="_1 > 0.5", scheduler=s) filter_.input[0] = stirrer.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = filter_.output.result aio.run(s.start()) tbl = filter_.get_input_slot("table").data() idx = tbl.eval("_1>0.5", result_object="index") self.assertEqual(filter_.table.index, bitmap(idx)) df = pd.DataFrame(tbl.to_dict(), index=tbl.index.to_array()) dfe = df.eval("_1>0.5") self.assertEqual(filter_.table.index, bitmap(df.index[dfe]))
def _combine_updates(self, update: IndexUpdate, start: int) -> IndexUpdate: # TODO reuse cached results if it matches new_u = IndexUpdate( created=bitmap(update.created), deleted=bitmap(update.deleted), updated=bitmap(update.updated), ) last_u = None # Since bookmarks can share their update slots, # search for a bookmark with a different value for i in range(start, len(self._bookmarks)): upd = self._bookmarks[i].update if upd is last_u: continue new_u.combine(upd) last_u = new_u # TODO cache results to reuse it if possible return new_u
def _slice_to_bitmap(self, sl: slice, fix_loc: bool = True, existing_only: bool = True) -> bitmap: stop = sl.stop or self.last_xid nsl = norm_slice(sl, fix_loc, stop=stop) ret = bitmap(nsl) if existing_only: ret &= self.index return ret
def test_filter(self) -> None: s = Scheduler() random = RandomTable(2, rows=100000, scheduler=s) filter_ = FilterMod(expr="_1 > 0.5", scheduler=s) filter_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = filter_.output.result aio.run(s.start()) idx = (filter_.get_input_slot("table").data().eval( "_1>0.5", result_object="index")) self.assertEqual(filter_.table.index, bitmap(idx))