Пример #1
0
 def run_query(self, table_name, column_names, limit, offset, startrow,
               endrow, getOnlyKeys, start_inclusive, end_inclusive):
     starttime = time.time()
     elist = [ERROR_HT]
     client = None
     try:
         client = self.__initConnection()
         row_intervals = [
             ttypes.RowInterval(startrow, start_inclusive, endrow,
                                end_inclusive)
         ]
         cell_intervals = None
         include_deletes = 0
         scan_spec = ttypes.ScanSpec(row_intervals, cell_intervals,
                                     include_deletes, 1, limit + offset, 0,
                                     None, column_names)
         count = 0
         cells = client.get_cells(self.ns, table_name, scan_spec)
         for cell in cells:
             count = count + 1
             if count > offset:
                 if getOnlyKeys:
                     elist += [cell.row_key]
                 else:
                     elist += [cell.value]
     except:
         elist[0] += "Exception thrown while running a scanner"
     endtime = time.time()
     if PROFILING:
         self.logger.debug("HT RUN_QUERY: %s" % str(endtime - starttime))
     self.__closeConnection(client)
     return elist
Пример #2
0
    def batch_get_entity(self, table_name, row_keys, column_names):
        """Allows access to multiple rows with a single call
    
    Args:
      table_name: A str, the table to access.
      row_keys: A list of keys to access.
      column_names: A list of columns to access.
    Raises:
      TypeError: Bad argument types.
    Returns:
      A dictionary of {key:{column_name:value,...}}.
    """

        if not isinstance(table_name, str):
            raise TypeError("Expected str")
        if not isinstance(column_names, list):
            raise TypeError("Expected list")
        if not isinstance(row_keys, list):
            raise TypeError("Expected list")

        row_keys = [self.__encode(row) for row in row_keys]

        ret = {}
        row_intervals = []
        cell_intervals = None
        include_deletes = False
        for row in row_keys:
            row_intervals.append(ttypes.RowInterval(row, True, row, True))

        scan_spec = ttypes.ScanSpec(row_intervals, cell_intervals,
                                    include_deletes, 1, 0, 0, None,
                                    column_names)

        res = self.conn.get_cells(self.ns, table_name, scan_spec)
        for cell in res:
            if self.__decode(cell.key.row) in ret:
                # update the dictionary
                col_dict = ret[self.__decode(cell.key.row)]
            else:
                # first time seen
                col_dict = {}
            col_dict[cell.key.column_family] = cell.value
            ret[self.__decode(cell.key.row)] = col_dict

        # If nothing was returned for any cell, put in empty values.
        for row in row_keys:
            if self.__decode(row) not in ret:
                col_dict = {}
                ret[self.__decode(row)] = col_dict

        return ret
Пример #3
0
    def range_query(self,
                    table_name,
                    column_names,
                    start_key,
                    end_key,
                    limit,
                    offset=0,
                    start_inclusive=True,
                    end_inclusive=True,
                    keys_only=False):
        """ Gets a dense range ordered by keys. Returns an ordered list of 
        a dictionary of [key:{column1:value1, column2:value2},...]
        or a list of keys if keys only.
     
    Args:
      table_name: Name of table to access
      column_names: Columns which get returned within the key range
      start_key: String for which the query starts at
      end_key: String for which the query ends at
      limit: Maximum number of results to return
      offset: Number to cut off from the results [offset:]
      start_inclusive: Boolean if results should include the start_key
      end_inclusive: Boolean if results should include the end_key
      keys_only: Boolean if to only keys and not values
    Raises:
      TypeError: Bad argument types
    Return:
      List of ordered results.
    """
        if not isinstance(table_name, str): raise TypeError("Expected str")
        if not isinstance(column_names, list): raise TypeError("Expected list")
        if not isinstance(start_key, str): raise TypeError("Expected str")
        if not isinstance(end_key, str): raise TypeError("Expected str")
        if not isinstance(limit, int) and not isinstance(limit, long):
            raise TypeError("Expected int or long")
        if not isinstance(offset, int): raise TypeError("Expected int")

        start_key = self.__encode(start_key)
        end_key = self.__encode(end_key)

        # We add two extra rows in case we exclude the start/end keys
        # This makes sure the limit is upheld correctly, where we have
        # to remove the first and last key
        row_count = limit
        if not start_inclusive:
            row_count += 1
        if not end_inclusive:
            row_count += 1

        row_intervals = []
        row_intervals.append(
            ttypes.RowInterval(start_key, start_inclusive, end_key,
                               end_inclusive))

        cell_intervals = None
        include_deletes = False

        scan_spec = ttypes.ScanSpec(
            row_intervals,
            cell_intervals,
            include_deletes,
            1,  # max revisions
            row_count,
            0,
            None,
            column_names)
        res = self.conn.get_cells(self.ns, table_name, scan_spec)

        results = []
        last_row = None
        for cell in res:
            # the current list element needs to be updated
            if cell.key.row == last_row:
                if not keys_only:
                    row_dict = results[-1]
                    col_dict = row_dict[self.__decode(cell.key.row)]
                    col_dict[cell.key.column_family] = cell.value
                    results[-1] = {self.__decode(cell.key.row): col_dict}
            # add a new list element for this item
            else:
                last_row = cell.key.row
                if keys_only:
                    results.append(self.__decode(cell.key.row))
                else:
                    col_dict = {}
                    col_dict[cell.key.column_family] = cell.value
                    results.append({self.__decode(cell.key.row): col_dict})

        if not start_inclusive and len(results) > 0:
            if start_key in results[0]:
                results = results[1:]

        if not end_inclusive and len(results) > 0:
            if end_key in results[-1]:
                results = results[:-1]

        if len(results) > limit:
            results = results[:limit]

        if offset != 0 and offset <= len(results):
            results = results[offset:]

        return results