def build_row_filter(row_key_regex=None, column_families=None, columns=None):
    """
    Build a row filter using a combination of row keys, column families, or
    columns to retrieve.

    Args:
        row_key_regex (:obj:`str`, optional): Regular expression for matching
            row keys. Defaults to None.
        column_families (:obj:`iter` of :obj:`str`, optional): An iterable of
            column families to retrieve. Defaults to None.
        columns (:obj:`iter` of :obj:`str`, optional): An iterable of column
            names or regular expressions for matching columns. Defaults to
            None.

    Returns:
        RowFilter: The built row filter from passed in parameters. If no
            parameters, None is returned.
    """
    if (row_key_regex is not None and
            not isinstance(row_key_regex, six.string_types)):
        raise TypeError('row_key_regex must be a str or unicode type.')
    if (column_families is not None and
            not isinstance(column_families, collections.Sequence)):
        raise TypeError('column_families must be an iterable.')
    if columns is not None and not isinstance(columns, collections.Sequence):
        raise TypeError('columns must be an iterable.')

    filters = []

    # Build a filter for row keys.
    if row_key_regex:
        row_key_filter = RowKeyRegexFilter(row_key_regex)
        filters.append(row_key_filter)

    # Build filters for column families.
    if column_families:
        cf_filters = [ColumnRangeFilter(cf) for cf in column_families]
        if len(cf_filters) > 1:
            filters.append(RowFilterUnion(cf_filters))
        else:
            filters.append(cf_filters[0])

    # Build filters for columns.
    if columns:
        col_filters = [ColumnQualifierRegexFilter(col) for col in columns]
        if len(col_filters) > 1:
            filters.append(RowFilterUnion(col_filters))
        else:
            filters.append(col_filters[0])

    if len(filters) == 1:
        return filters[0]
    else:
        return RowFilterChain(filters=filters) if filters else None
def _columns_filter_helper(columns):
    """Creates a union filter for a list of columns.

    :type columns: list
    :param columns: Iterable containing column names (as strings). Each column
                    name can be either

                      * an entire column family: ``fam`` or ``fam:``
                      * a single column: ``fam:col``

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The union filter created containing all of the matched columns.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to union.
    """
    filters = []
    for column_family_id, column_qualifier in _get_column_pairs(columns):
        fam_filter = FamilyNameRegexFilter(column_family_id)
        if column_qualifier is not None:
            qual_filter = ColumnQualifierRegexFilter(column_qualifier)
            combined_filter = RowFilterChain(filters=[fam_filter, qual_filter])
            filters.append(combined_filter)
        else:
            filters.append(fam_filter)

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterUnion(filters=filters)
Exemplo n.º 3
0
    def get_first_row(self, start_key, column_families=None, end_key=None):
        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        row_set = RowSet()
        row_set.add_row_range_from_keys(start_key=start_key,
                                        start_inclusive=True,
                                        end_key=end_key)

        generator = self._low_level.read_rows(filter_=filter_, row_set=row_set)

        i = -1
        for rowdata in generator:
            i += 1
            # if rowdata is None:
            #     continue
            rk = rowdata.row_key.decode("utf-8")
            if end_key is None and not rk.startswith(start_key):
                break
            curr_row_dict = self.partial_row_to_dict(rowdata)
            return (rk, curr_row_dict)
Exemplo n.º 4
0
    def row_generator(self,
                      row_keys=None,
                      start_key=None,
                      end_key=None,
                      column_families=None,
                      check_prefix=None):
        if row_keys is None and start_key is None:
            raise ValueError("use row_keys or start_key parameter")
        if start_key is not None and (end_key is None
                                      and check_prefix is None):
            raise ValueError(
                "use start_key together with end_key or check_prefix")

        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        row_set = RowSet()
        if row_keys:
            for r in row_keys:
                row_set.add_row_key(r)
        else:
            row_set.add_row_range_from_keys(start_key=start_key,
                                            end_key=end_key,
                                            start_inclusive=True,
                                            end_inclusive=True)

        generator = self._low_level.read_rows(filter_=filter_, row_set=row_set)

        i = -1
        for rowdata in generator:
            i += 1
            if rowdata is None:
                if row_keys:
                    yield (row_keys[i], {})
                continue
            rk = rowdata.row_key.decode("utf-8")
            if check_prefix:
                if not rk.startswith(check_prefix):
                    break
            curr_row_dict = self.partial_row_to_ordered_dict(rowdata)
            yield (rk, curr_row_dict)
Exemplo n.º 5
0
def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator):
    from google.cloud.bigtable.row_filters import ApplyLabelFilter
    from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter
    from google.cloud.bigtable.row_filters import RowFilterChain
    from google.cloud.bigtable.row_filters import RowFilterUnion

    row = data_table.direct_row(ROW_KEY)
    rows_to_delete.append(row)

    cell1, _, cell3, _ = _write_to_row(row, None, row, None)
    row.commit()

    # Combine a label with column 1.
    label1 = "label-red"
    label1_filter = ApplyLabelFilter(label1)
    col1_filter = ColumnQualifierRegexFilter(COL_NAME1)
    chain1 = RowFilterChain(filters=[col1_filter, label1_filter])

    # Combine a label with column 2.
    label2 = "label-blue"
    label2_filter = ApplyLabelFilter(label2)
    col2_filter = ColumnQualifierRegexFilter(COL_NAME2)
    chain2 = RowFilterChain(filters=[col2_filter, label2_filter])

    # Bring our two labeled columns together.
    row_filter = RowFilterUnion(filters=[chain1, chain2])
    partial_row_data = data_table.read_row(ROW_KEY, filter_=row_filter)
    assert partial_row_data.row_key == ROW_KEY

    cells_returned = partial_row_data.cells
    col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1)
    # Make sure COLUMN_FAMILY_ID1 was the only key.
    assert len(cells_returned) == 0

    (cell1_new, ) = col_fam1.pop(COL_NAME1)
    (cell3_new, ) = col_fam1.pop(COL_NAME2)
    # Make sure COL_NAME1 and COL_NAME2 were the only keys.
    assert len(col_fam1) == 0

    # Check that cell1 has matching values and gained a label.
    assert cell1_new.value == cell1.value
    assert cell1_new.timestamp == cell1.timestamp
    assert cell1.labels == []
    assert cell1_new.labels == [label1]

    # Check that cell3 has matching values and gained a label.
    assert cell3_new.value == cell3.value
    assert cell3_new.timestamp == cell3.timestamp
    assert cell3.labels == []
    assert cell3_new.labels == [label2]
Exemplo n.º 6
0
    def test_read_with_label_applied(self):
        self._maybe_emulator_skip("Labels not supported by Bigtable emulator")
        row = self._table.row(ROW_KEY)
        self.rows_to_delete.append(row)

        cell1, _, cell3, _ = self._write_to_row(row, None, row)
        row.commit()

        # Combine a label with column 1.
        label1 = u"label-red"
        label1_filter = ApplyLabelFilter(label1)
        col1_filter = ColumnQualifierRegexFilter(COL_NAME1)
        chain1 = RowFilterChain(filters=[col1_filter, label1_filter])

        # Combine a label with column 2.
        label2 = u"label-blue"
        label2_filter = ApplyLabelFilter(label2)
        col2_filter = ColumnQualifierRegexFilter(COL_NAME2)
        chain2 = RowFilterChain(filters=[col2_filter, label2_filter])

        # Bring our two labeled columns together.
        row_filter = RowFilterUnion(filters=[chain1, chain2])
        partial_row_data = self._table.read_row(ROW_KEY, filter_=row_filter)
        self.assertEqual(partial_row_data.row_key, ROW_KEY)

        cells_returned = partial_row_data.cells
        col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1)
        # Make sure COLUMN_FAMILY_ID1 was the only key.
        self.assertEqual(len(cells_returned), 0)

        cell1_new, = col_fam1.pop(COL_NAME1)
        cell3_new, = col_fam1.pop(COL_NAME2)
        # Make sure COL_NAME1 and COL_NAME2 were the only keys.
        self.assertEqual(len(col_fam1), 0)

        # Check that cell1 has matching values and gained a label.
        self.assertEqual(cell1_new.value, cell1.value)
        self.assertEqual(cell1_new.timestamp, cell1.timestamp)
        self.assertEqual(cell1.labels, [])
        self.assertEqual(cell1_new.labels, [label1])

        # Check that cell3 has matching values and gained a label.
        self.assertEqual(cell3_new.value, cell3.value)
        self.assertEqual(cell3_new.timestamp, cell3.timestamp)
        self.assertEqual(cell3.labels, [])
        self.assertEqual(cell3_new.labels, [label2])
Exemplo n.º 7
0
def get_column_filter(
    columns: Union[Iterable[column_keys._Column], column_keys._Column] = None
) -> RowFilter:
    """ Generates a RowFilter that accepts the specified columns """

    if isinstance(columns, column_keys._Column):
        return ColumnRangeFilter(columns.family_id,
                                 start_column=columns.key,
                                 end_column=columns.key)
    elif len(columns) == 1:
        return ColumnRangeFilter(columns[0].family_id,
                                 start_column=columns[0].key,
                                 end_column=columns[0].key)

    return RowFilterUnion([
        ColumnRangeFilter(col.family_id,
                          start_column=col.key,
                          end_column=col.key) for col in columns
    ])
Exemplo n.º 8
0
    def read_row(self, row_id, column_families=None):
        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        res = self._low_level.read_row(row_id.encode("utf-8"), filter_=filter_)
        if res is None:
            raise KeyError("row {} not found".format(row_id))
        return self.partial_row_to_dict(res)
def _row_keys_filter_helper(row_keys):
    """Creates a union filter for a list of rows.

    :type row_keys: list
    :param row_keys: Iterable containing row keys (as strings).

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The union filter created containing all of the row keys.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to union.
    """
    filters = []
    for row_key in row_keys:
        filters.append(RowKeyRegexFilter(row_key))

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterUnion(filters=filters)