Exemplo n.º 1
0
    def get_filtered_search_results(self, search_type, search_query,
                                    search_filter, **kwargs):
        '''
        Filter search. Provides results for searches that are faceted.

        search_type = one of ['locations', 'servers', 'clients']
        search_query = input query from api
        search_filter = {
            type: ['locations', 'servers', 'clients'],
            value:[id1, id2]
        }
        '''
        if not search_filter['type'] or search_filter['type'] == search_type:
            return []
        table_name = self.get_table_name(search_type, search_filter)

        table_config = get_table_config(self.table_configs, None, table_name)

        all_results = []
        for filter_value in sorted(search_filter['value'], reverse=False):
            # we always want this filter value to be the first key
            key_prefix = du.get_key_field(filter_value, 0, table_config)
            key_prefix += du.BIGTABLE_KEY_DELIM
            # filter only the `meta` column family - for speed.
            tablefilter = FamilyNameRegexFilter('meta')
            all_results += bt.scan_table(table_config,
                                         self.get_pool(),
                                         prefix=key_prefix,
                                         filter=tablefilter,
                                         **kwargs)

        filtered_results = self.filter_results(search_type, search_query,
                                               all_results)

        return self.prepare_filtered_search_results(filtered_results)
def _columns_filter_helper(columns):
    """Creates a union filter for a list of columns.

    :type columns: list
    :param columns: Iterable containing column names (as strings). Each column
                    name can be either

                      * an entire column family: ``fam`` or ``fam:``
                      * a single column: ``fam:col``

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The union filter created containing all of the matched columns.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to union.
    """
    filters = []
    for column_family_id, column_qualifier in _get_column_pairs(columns):
        fam_filter = FamilyNameRegexFilter(column_family_id)
        if column_qualifier is not None:
            qual_filter = ColumnQualifierRegexFilter(column_qualifier)
            combined_filter = RowFilterChain(filters=[fam_filter, qual_filter])
            filters.append(combined_filter)
        else:
            filters.append(fam_filter)

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterUnion(filters=filters)
Exemplo n.º 3
0
    def get_first_row(self, start_key, column_families=None, end_key=None):
        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        row_set = RowSet()
        row_set.add_row_range_from_keys(start_key=start_key,
                                        start_inclusive=True,
                                        end_key=end_key)

        generator = self._low_level.read_rows(filter_=filter_, row_set=row_set)

        i = -1
        for rowdata in generator:
            i += 1
            # if rowdata is None:
            #     continue
            rk = rowdata.row_key.decode("utf-8")
            if end_key is None and not rk.startswith(start_key):
                break
            curr_row_dict = self.partial_row_to_dict(rowdata)
            return (rk, curr_row_dict)
    def test_single_column(self):
        from google.cloud.bigtable.row_filters import FamilyNameRegexFilter

        col_fam = "cf1"
        columns = [col_fam]
        result = self._call_fut(columns)
        expected_result = FamilyNameRegexFilter(col_fam)
        self.assertEqual(result, expected_result)
Exemplo n.º 5
0
    def row_generator(self,
                      row_keys=None,
                      start_key=None,
                      end_key=None,
                      column_families=None,
                      check_prefix=None):
        if row_keys is None and start_key is None:
            raise ValueError("use row_keys or start_key parameter")
        if start_key is not None and (end_key is None
                                      and check_prefix is None):
            raise ValueError(
                "use start_key together with end_key or check_prefix")

        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        row_set = RowSet()
        if row_keys:
            for r in row_keys:
                row_set.add_row_key(r)
        else:
            row_set.add_row_range_from_keys(start_key=start_key,
                                            end_key=end_key,
                                            start_inclusive=True,
                                            end_inclusive=True)

        generator = self._low_level.read_rows(filter_=filter_, row_set=row_set)

        i = -1
        for rowdata in generator:
            i += 1
            if rowdata is None:
                if row_keys:
                    yield (row_keys[i], {})
                continue
            rk = rowdata.row_key.decode("utf-8")
            if check_prefix:
                if not rk.startswith(check_prefix):
                    break
            curr_row_dict = self.partial_row_to_ordered_dict(rowdata)
            yield (rk, curr_row_dict)
def _filter_chain_helper(column=None,
                         versions=None,
                         timestamp=None,
                         filters=None):
    """Create filter chain to limit a results set.

    :type column: str
    :param column: (Optional) The column (``fam:col``) to be selected
                   with the filter.

    :type versions: int
    :param versions: (Optional) The maximum number of cells to return.

    :type timestamp: int
    :param timestamp: (Optional) Timestamp (in milliseconds since the
                      epoch). If specified, only cells returned before (or
                      at) the timestamp will be matched.

    :type filters: list
    :param filters: (Optional) List of existing filters to be extended.

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The chained filter created, or just a single filter if only
              one was needed.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to chain.
    """
    if filters is None:
        filters = []

    if column is not None:
        if isinstance(column, six.binary_type):
            column = column.decode('utf-8')
        column_family_id, column_qualifier = column.split(':')
        fam_filter = FamilyNameRegexFilter(column_family_id)
        qual_filter = ColumnQualifierRegexFilter(column_qualifier)
        filters.extend([fam_filter, qual_filter])
    if versions is not None:
        filters.append(CellsColumnLimitFilter(versions))
    time_range = _convert_to_time_range(timestamp=timestamp)
    if time_range is not None:
        filters.append(TimestampRangeFilter(time_range))

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterChain(filters=filters)
Exemplo n.º 7
0
    def read_row(self, row_id, column_families=None):
        filters = [CellsColumnLimitFilter(1)]
        if column_families is not None:
            c_filters = []
            for c in column_families:
                c_filters.append(FamilyNameRegexFilter(c))
            if len(c_filters) == 1:
                filters.append(c_filters[0])
            elif len(c_filters) > 1:
                filters.append(RowFilterUnion(c_filters))
        if len(filters) > 1:
            filter_ = RowFilterChain(filters=filters)
        else:
            filter_ = filters[0]

        res = self._low_level.read_row(row_id.encode("utf-8"), filter_=filter_)
        if res is None:
            raise KeyError("row {} not found".format(row_id))
        return self.partial_row_to_dict(res)
Exemplo n.º 8
0
def get_bt_results(key_fields, table_config, pool):
    '''
    Scans for and returns big table results based on key fields
    and a table config

    key_fields = key fields for the search
    table_config = the table to scan
    pool = a big table connection pool

    '''
    prefix_key = du.BIGTABLE_KEY_DELIM.join(key_fields)
    results = bt.scan_table(table_config,
                            pool,
                            prefix=prefix_key,
                            limit=1,
                            filter=FamilyNameRegexFilter('meta'))

    result = {}

    if results:
        result = results[0]

    return result
Exemplo n.º 9
0
def get_list_table_results(key_fields, pool, include_data, table_config,
                           metric_name):
    '''
    Helper to query table and create results for list based results

    key_fields = array of key fields.
    pool = connection pool.
    include_data = boolean indicating if data attributes should be
        included in results.
    table_config = configuration file for table to query from.
    '''

    key_fields = du.BIGTABLE_KEY_DELIM.join(key_fields)

    params = {"prefix": key_fields}
    if not include_data:
        params["filter"] = FamilyNameRegexFilter('meta')

    results = []
    results = scan_table(table_config, pool, **params)

    sorted_results = []
    sorted_results = sorted(results, key=sort_by_count, reverse=True)
    return sorted_results