def get_first_row(self, start_key, column_families=None, end_key=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() row_set.add_row_range_from_keys(start_key=start_key, start_inclusive=True, end_key=end_key) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 # if rowdata is None: # continue rk = rowdata.row_key.decode("utf-8") if end_key is None and not rk.startswith(start_key): break curr_row_dict = self.partial_row_to_dict(rowdata) return (rk, curr_row_dict)
def _get(self, uuid: UUID, time: int) -> Optional[bytes]: return self._table.read_row( row_key=uuid.bytes_le, filter_=RowFilterChain(filters=[ TimestampRangeFilter(range_=TimestampRange(end=millis_dt(time + 1))), CellsColumnLimitFilter(num_cells=1) ])).cell_value(column_family_id=FAMILY, column=COLUMN)
def row_generator(self, row_keys=None, start_key=None, end_key=None, column_families=None, check_prefix=None): if row_keys is None and start_key is None: raise ValueError("use row_keys or start_key parameter") if start_key is not None and (end_key is None and check_prefix is None): raise ValueError( "use start_key together with end_key or check_prefix") filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() if row_keys: for r in row_keys: row_set.add_row_key(r) else: row_set.add_row_range_from_keys(start_key=start_key, end_key=end_key, start_inclusive=True, end_inclusive=True) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 if rowdata is None: if row_keys: yield (row_keys[i], {}) continue rk = rowdata.row_key.decode("utf-8") if check_prefix: if not rk.startswith(check_prefix): break curr_row_dict = self.partial_row_to_ordered_dict(rowdata) yield (rk, curr_row_dict)
def _filter_chain_helper(column=None, versions=None, timestamp=None, filters=None): """Create filter chain to limit a results set. :type column: str :param column: (Optional) The column (``fam:col``) to be selected with the filter. :type versions: int :param versions: (Optional) The maximum number of cells to return. :type timestamp: int :param timestamp: (Optional) Timestamp (in milliseconds since the epoch). If specified, only cells returned before (or at) the timestamp will be matched. :type filters: list :param filters: (Optional) List of existing filters to be extended. :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The chained filter created, or just a single filter if only one was needed. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to chain. """ if filters is None: filters = [] if column is not None: if isinstance(column, six.binary_type): column = column.decode('utf-8') column_family_id, column_qualifier = column.split(':') fam_filter = FamilyNameRegexFilter(column_family_id) qual_filter = ColumnQualifierRegexFilter(column_qualifier) filters.extend([fam_filter, qual_filter]) if versions is not None: filters.append(CellsColumnLimitFilter(versions)) time_range = _convert_to_time_range(timestamp=timestamp) if time_range is not None: filters.append(TimestampRangeFilter(time_range)) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterChain(filters=filters)
def read_row(self, row_id, column_families=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] res = self._low_level.read_row(row_id.encode("utf-8"), filter_=filter_) if res is None: raise KeyError("row {} not found".format(row_id)) return self.partial_row_to_dict(res)