def get_annotation_sv_ids(self, annotation_id, time_stamp=None): """ Reads the sv ids belonging to an annotation :param annotation_id: uint64 :param time_stamp: None or datetime :return: list of np.uint64s """ if time_stamp is None: time_stamp = datetime.datetime.utcnow() # Adjust time_stamp to bigtable precision time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp)) row = self.table.read_row(serialize_node_id(annotation_id), filter_=time_filter) if row is None: return [] # for entry in row.cells[self.data_family_id][serialize_key("sv_ids")]: # print(entry.timestamp) sv_ids_bin = row.cells[self.data_family_id][serialize_key( "sv_ids")][0].value if len(sv_ids_bin) == 0: return None sv_ids = np.frombuffer(sv_ids_bin, dtype=np.uint64) return sv_ids
def get_annotation_data(self, annotation_id, time_stamp=None): """ Reads the data of a single annotation object :param annotation_id: uint64 :param time_stamp: None or datetime :return: blob """ if time_stamp is None: time_stamp = datetime.datetime.utcnow() # Adjust time_stamp to bigtable precision time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp)) row = self.table.read_row(serialize_node_id(annotation_id), filter_=time_filter) bin_data = row.cells[self.data_family_id][serialize_key( "data")][0].value if len(bin_data) == 0: return None return bin_data
def __init__(self, table, timestamp=None, batch_size=None, transaction=False, wal=_WAL_SENTINEL): if wal is not _WAL_SENTINEL: _WARN(_WAL_WARNING) if batch_size is not None: if transaction: raise TypeError('When batch_size is set, a Batch cannot be ' 'transactional') if batch_size <= 0: raise ValueError('batch_size must be positive') self._table = table self._batch_size = batch_size self._timestamp = self._delete_range = None # Timestamp is in milliseconds, convert to microseconds. if timestamp is not None: self._timestamp = _datetime_from_microseconds(1000 * timestamp) # For deletes, we get the very next timestamp (assuming timestamp # granularity is milliseconds). This is because HappyBase users # expect HBase deletes to go **up to** and **including** the # timestamp while Cloud Bigtable Time Ranges **exclude** the # final timestamp. next_timestamp = self._timestamp + _ONE_MILLISECOND self._delete_range = TimestampRange(end=next_timestamp) self._transaction = transaction # Internal state for tracking mutations. self._row_map = {} self._mutation_count = 0
def test_to_pb(self): from google.cloud.bigtable.row_filters import TimestampRange range_ = TimestampRange() row_filter = self._makeOne(range_) pb_val = row_filter.to_pb() expected_pb = _RowFilterPB(timestamp_range_filter=_TimestampRangePB()) self.assertEqual(pb_val, expected_pb)
def _get(self, uuid: UUID, time: int) -> Optional[bytes]: return self._table.read_row( row_key=uuid.bytes_le, filter_=RowFilterChain(filters=[ TimestampRangeFilter(range_=TimestampRange(end=millis_dt(time + 1))), CellsColumnLimitFilter(num_cells=1) ])).cell_value(column_family_id=FAMILY, column=COLUMN)
def test_delete_cells_with_time_range(self): import datetime from google.cloud._helpers import _EPOCH from google.cloud.bigtable.row_filters import TimestampRange microseconds = 30871000 # Makes sure already milliseconds granularity start = _EPOCH + datetime.timedelta(microseconds=microseconds) time_range = TimestampRange(start=start) self._delete_cells_helper(time_range=time_range)
def _lock_single_annotation(self, annotation_id, operation_id): """ Attempts to lock the latest version of a root node :param annotation_id: uint64 :param operation_id: str an id that is unique to the process asking to lock the root node :return: bool success """ operation_id_b = serialize_key(operation_id) lock_key = serialize_key("lock") # Build a column filter which tests if a lock was set (== lock column # exists) and if it is still valid (timestamp younger than # LOCK_EXPIRED_TIME_DELTA) time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA # Comply to resolution of BigTables TimeRange time_cutoff -= datetime.timedelta( microseconds=time_cutoff.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff)) lock_key_filter = ColumnRangeFilter( column_family_id=self.data_family_id, start_column=lock_key, end_column=lock_key, inclusive_start=True, inclusive_end=True) # Combine filters together chained_filter = RowFilterChain([time_filter, lock_key_filter]) # Get conditional row using the chained filter annotation_row = self.table.row(serialize_node_id(annotation_id), filter_=chained_filter) # Set row lock if condition returns no results (state == False) annotation_row.set_cell(self.data_family_id, lock_key, operation_id_b, state=False) # The lock was acquired when set_cell returns False (state) lock_acquired = not annotation_row.commit() return lock_acquired
def _unlock_annotation(self, annotation_id, operation_id): """ Unlocks a root This is mainly used for cases where multiple roots need to be locked and locking was not sucessful for all of them :param annotation_id: uint64 :param operation_id: str an id that is unique to the process asking to lock the root node :return: bool success """ operation_id_b = serialize_key(operation_id) lock_key = serialize_key("lock") # Build a column filter which tests if a lock was set (== lock column # exists) and if it is still valid (timestamp younger than # LOCK_EXPIRED_TIME_DELTA) and if the given operation_id is still # the active lock holder time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA # Comply to resolution of BigTables TimeRange time_cutoff -= datetime.timedelta( microseconds=time_cutoff.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff)) column_key_filter = ColumnQualifierRegexFilter(lock_key) value_filter = ColumnQualifierRegexFilter(operation_id_b) # Chain these filters together chained_filter = RowFilterChain( [time_filter, column_key_filter, value_filter]) # Get conditional row using the chained filter root_row = self.table.row(serialize_node_id(annotation_id), filter_=chained_filter) # Delete row if conditions are met (state == True) root_row.delete_cell(self.data_family_id, lock_key, state=True) root_row.commit()
def get_time_range_filter(start_time: Optional[datetime.datetime] = None, end_time: Optional[datetime.datetime] = None, end_inclusive: bool = True) -> RowFilter: """ Generates a TimeStampRangeFilter which is inclusive for start and (optionally) end. :param start: :param end: :return: """ # Comply to resolution of BigTables TimeRange if start_time is not None: start_time = get_google_compatible_time_stamp(start_time, round_up=False) if end_time is not None: end_time = get_google_compatible_time_stamp(end_time, round_up=end_inclusive) return TimestampRangeFilter(TimestampRange(start=start_time, end=end_time))
def get_annotation_ids_from_sv(self, sv_id, time_stamp=None): """ Acquires all annotation ids associated with a supervoxel To also read the data of the acquired annotations use `get_annotations_from_sv` :param sv_id: uint64 :param time_stamp: None or datetime :return: list annotation ids """ if time_stamp is None: time_stamp = datetime.datetime.utcnow() # Adjust time_stamp to bigtable precision time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp)) # Read mapped entries with time_stamp row = self.table.read_row(serialize_node_id(sv_id), filter_=time_filter) if row is None: return [] anno_id_entries = row.cells[self.mapping_family_id][serialize_key( "mapped_anno_ids")] anno_ids = [] for entry in anno_id_entries: # print(len(np.frombuffer(entry.value, dtype=np.uint64))) anno_ids.extend(np.frombuffer(entry.value, dtype=np.uint64)) # Resolve changes over time anno_ids, c_anno_ids = np.unique(anno_ids, return_counts=True) # Every anno_id with number of entries % 2 == 0 was removed anno_ids = anno_ids[c_anno_ids % 2 == 1] return anno_ids
def _convert_to_time_range(timestamp=None): """Create a timestamp range from an HBase / HappyBase timestamp. HBase uses timestamp as an argument to specify an exclusive end deadline. Cloud Bigtable also uses exclusive end times, so the behavior matches. :type timestamp: int :param timestamp: (Optional) Timestamp (in milliseconds since the epoch). Intended to be used as the end of an HBase time range, which is exclusive. :rtype: :class:`~google.cloud.bigtable.row.TimestampRange`, :data:`NoneType <types.NoneType>` :returns: The timestamp range corresponding to the passed in ``timestamp``. """ if timestamp is None: return None next_timestamp = _datetime_from_microseconds(1000 * timestamp) return TimestampRange(end=next_timestamp)
def test_constructor_explicit(self): from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigtable.row_filters import TimestampRange table = object() timestamp = 144185290431 batch_size = 42 transaction = False # Must be False when batch_size is non-null batch = self._make_one( table, timestamp=timestamp, batch_size=batch_size, transaction=transaction) self.assertEqual(batch._table, table) self.assertEqual(batch._batch_size, batch_size) self.assertEqual(batch._timestamp, _datetime_from_microseconds(1000 * timestamp)) next_timestamp = _datetime_from_microseconds(1000 * (timestamp + 1)) time_range = TimestampRange(end=next_timestamp) self.assertEqual(batch._delete_range, time_range) self.assertEqual(batch._transaction, transaction) self.assertEqual(batch._row_map, {}) self.assertEqual(batch._mutation_count, 0)
import os import time from pprint import pprint from google.cloud import bigtable from google.cloud.bigtable.row_filters import TimestampRange, TimestampRangeFilter project_id = os.environ.get("PROJECT_ID", "simula-cov19") instance_id = os.environ.get("BT_INSTANCE_ID", "test-bigt") table_id = 'test' now = datetime.datetime.now(datetime.timezone.utc) # get events reported via api from the last day (not event timestamp) start_time = now - datetime.timedelta(days=5) end_time = now ts_range = TimestampRange(start=start_time, end=end_time) row_filter = TimestampRangeFilter(ts_range) # connect to Bigtable client = bigtable.Client(project=project_id) instance = client.instance(instance_id) table = instance.table(table_id) # make our query for row in table.read_rows(filter_=row_filter): print(row.row_key) for cell in row.to_dict().get(b'events:event'): data = json.loads(cell.value.decode('utf8')) pprint(data, indent=2)