async def get_data_from_main_db(db_session, user, movie_ids): result_dict = {'has_more_pages': True, 'result_list': []} query = 'SELECT * FROM movie_model WHERE id IN %s' statement = SimpleStatement(query) results_future = db_session.execute_async( statement, parameters=[ValueSequence(movie_ids)]) user_upvote_query = 'SELECT movie_id FROM upvotes_model WHERE user_id=%s and movie_id IN %s' user_upvotes_statement = SimpleStatement(user_upvote_query) user_upvotes_future = db_session.execute_async( user_upvotes_statement, [user, ValueSequence(movie_ids)]) user_upvoted_movies = [] user_upvotes = user_upvotes_future.result() results = results_future.result() for row in user_upvotes: user_upvoted_movies.append(row['movie_id']) result_dict['result_list'] = list( map( lambda result: { 'title': result['title'], 'plot': result['plot'], 'rating': result['rating'], 'genres': result['genres'], 'poster': result['poster'], 'votes': result['votes'], 'id': result['id'], 'user_voted': if_user_voted(user_upvoted_movies, result['id']) }, results)) return result_dict
def batch_get_entity(self, table_name, row_keys, column_names): """ Takes in batches of keys and retrieves their corresponding rows. Args: table_name: The table to access row_keys: A list of keys to access column_names: A list of columns to access Returns: A dictionary of rows and columns/values of those rows. The format looks like such: {key:{column_name:value,...}} Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the batch_get could not be performed due to an error with Cassandra. """ if not isinstance(table_name, str): raise TypeError("Expected a str") if not isinstance(column_names, list): raise TypeError("Expected a list") if not isinstance(row_keys, list): raise TypeError("Expected a list") row_keys_bytes = [bytearray(row_key) for row_key in row_keys] statement = 'SELECT * FROM "{table}" '\ 'WHERE {key} IN %s and {column} IN %s'.format( table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, ) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = (ValueSequence(row_keys_bytes), ValueSequence(column_names)) try: results = self.session.execute(query, parameters=parameters) results_dict = {row_key: {} for row_key in row_keys} for (key, column, value) in results: if key not in results_dict: results_dict[key] = {} results_dict[key][column] = value return results_dict except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): message = 'Exception during batch_get_entity' logging.exception(message) raise AppScaleDBConnectionError(message)
def test_value_sequence(self): """ Test the output of ValueSequences() """ my_user_ids = ('alice', 'bob', 'charles') self.assertEqual(str(ValueSequence(my_user_ids)), "( 'alice' , 'bob' , 'charles' )")
def get_entity(self, table_name, row_key, column_names): error = [ERROR_DEFAULT] list = error row_key = bytearray('/'.join([table_name, row_key])) statement = """ SELECT * FROM "{table}" WHERE {key} = %(key)s AND {column} IN %(columns)s """.format(table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = {'key': row_key, 'columns': ValueSequence(column_names)} try: results = self.session.execute(query, parameters) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): list[0] += 'Unable to fetch entity' return list results_dict = {} for (_, column, value) in results: results_dict[column] = value if not results_dict: list[0] += 'Not found' return list for column in column_names: list.append(results_dict[column]) return list
def _where_clause(query_filters): def stringify_keys(value): if isinstance(value, Key): return value.urlsafe() return value query_string = '' condition_values = {} if query_filters: query_string += ' WHERE' for index in xrange(len(query_filters)): if index > 0: query_string += ' AND' qry_filter = query_filters[index] query_string += ' %s %s' % (qry_filter.name, qry_filter.opsymbol) query_string += ' %(' + qry_filter.name + ')s' if isinstance(query_filters[index], FilterInNode): condition_values.update({ qry_filter.name: ValueSequence([ stringify_keys(qry.value) for qry in qry_filter.value ]) }) else: condition_values.update( {qry_filter.name: stringify_keys(qry_filter.value)}) return query_string, condition_values
def get_entity(self, table_name, row_key, column_names): error = [ERROR_DEFAULT] list = error row_key = bytearray('/'.join([table_name, row_key])) statement = """ SELECT * FROM "{table}" WHERE {key} = %(key)s AND {column} IN %(columns)s """.format(table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = {'key': row_key, 'columns': ValueSequence(column_names)} try: results = self.session.execute(query, parameters) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: raise AppScaleDBConnectionError('Unable to fetch entity') results_dict = {} for (_, column, value) in results: results_dict[column] = value if not results_dict: list[0] += 'Not found' return list for column in column_names: list.append(results_dict[column]) return list
def batch_delete(self, table_name, row_keys, column_names=()): """ Remove a set of rows corresponding to a set of keys. Args: table_name: Table to delete rows from row_keys: A list of keys to remove column_names: Not used Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the batch_delete could not be performed due to an error with Cassandra. """ if not isinstance(table_name, str): raise TypeError("Expected a str") if not isinstance(row_keys, list): raise TypeError("Expected a list") row_keys_bytes = [bytearray(row_key) for row_key in row_keys] statement = 'DELETE FROM "{table}" WHERE {key} IN %s'.\ format( table=table_name, key=ThriftColumn.KEY ) query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) parameters = (ValueSequence(row_keys_bytes), ) try: yield self.tornado_cassandra.execute(query, parameters=parameters) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception during batch_delete' logger.exception(message) raise AppScaleDBConnectionError(message)
def batch_delete(self, table_name, row_keys, column_names=()): """ Remove a set of rows corresponding to a set of keys. Args: table_name: Table to delete rows from row_keys: A list of keys to remove column_names: Not used Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the batch_delete could not be performed due to an error with Cassandra. """ if not isinstance(table_name, str): raise TypeError("Expected a str") if not isinstance(row_keys, list): raise TypeError("Expected a list") row_keys_bytes = [bytearray(row_key) for row_key in row_keys] statement = 'DELETE FROM "{table}" WHERE {key} IN %s'.\ format( table=table_name, key=ThriftColumn.KEY ) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = (ValueSequence(row_keys_bytes), ) try: self.session.execute(query, parameters=parameters) except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): message = 'Exception during batch_delete' logging.exception(message) raise AppScaleDBConnectionError(message)
def list_address_links(self, currency, address, neighbor): session = self.get_session(currency, 'transformed') address_id, address_id_group = \ self.get_address_id_id_group(currency, address) neighbor_id, neighbor_id_group = \ self.get_address_id_id_group(currency, neighbor) if address_id is None or neighbor_id is None: raise RuntimeError("Links between {} and {} not found".format( address, neighbor)) query = "SELECT tx_list FROM address_outgoing_relations WHERE " \ "src_address_id_group = %s AND src_address_id = %s AND " \ "dst_address_id = %s" results = session.execute(query, [address_id_group, address_id, neighbor_id]) if not results.current_rows: return [] txs = [tx_hash for tx_hash in results.current_rows[0].tx_list] query = "SELECT * FROM address_transactions WHERE " \ "address_id_group = %s AND address_id = %s AND " \ "tx_hash IN %s" results1 = session.execute( query, [address_id_group, address_id, ValueSequence(txs)]) results2 = session.execute( query, [neighbor_id_group, neighbor_id, ValueSequence(txs)]) if not results1.current_rows or not results2.current_rows: return [] links = dict() for row in results1.current_rows: hsh = row.tx_hash.hex() links[hsh] = dict() links[hsh]['tx_hash'] = hsh links[hsh]['height'] = row.height links[hsh]['timestamp'] = row.timestamp links[hsh]['input_value'] = row.value for row in results2.current_rows: hsh = row.tx_hash.hex() links[hsh]['output_value'] = row.value return links.values()
def execute_async(self, query, params=[]): if type(query) is str or type(query) is unicode: query = SimpleStatement(query, consistency_level=self.consistency) realParams = [] for p in params: if type(p) in (list, tuple): p = ValueSequence(p) realParams.append(p) return self.cur.execute_async(query, realParams)
def list_addresses_links(currency, address, neighbor): session = get_session(currency, 'transformed') address_id, address_id_group = get_address_id_id_group(currency, address) neighbor_id, neighbor_id_group = get_address_id_id_group( currency, neighbor) if address_id and neighbor_id: query = "SELECT tx_list FROM address_outgoing_relations WHERE " \ "src_address_id_group = %s AND src_address_id = %s AND " \ "dst_address_id = %s" results = session.execute(query, [address_id_group, address_id, neighbor_id]) if results.current_rows: txs = [tx_hash for tx_hash in results.current_rows[0].tx_list] query = "SELECT * FROM address_transactions WHERE " \ "address_id_group = %s AND address_id = %s AND " \ "tx_hash IN %s" results1 = session.execute( query, [address_id_group, address_id, ValueSequence(txs)]) results2 = session.execute( query, [neighbor_id_group, neighbor_id, ValueSequence(txs)]) if results1.current_rows and results2.current_rows: links = dict() for row in results1.current_rows: hsh = row.tx_hash.hex() links[hsh] = dict() links[hsh]['tx_hash'] = hsh links[hsh]['height'] = row.height links[hsh]['timestamp'] = row.timestamp links[hsh]['input_value'] = row.value for row in results2.current_rows: hsh = row.tx_hash.hex() links[hsh]['output_value'] = row.value heights = [e['height'] for e in links.values()] rates = list_rates(currency, heights) return [ Link.from_dict(e, rates[e['height']]).to_dict() for e in links.values() ] return []
def get_movies_from_db2(db_session, user, paging_state): result_dict = {'has_more_pages': True, 'result_list': []} top_movies = get_top_movies_from_redis(10, paging_state) top_ids = [] for movie in top_movies: top_ids.append(uuid.UUID(movie[0].decode("utf-8"))) query = 'SELECT * FROM movie_model WHERE id IN %s' statement = SimpleStatement(query) results = db_session.execute(statement, parameters=[ValueSequence(top_ids)]) user_upvote_query = 'SELECT movie_id FROM upvotes_model WHERE user_id=%s and movie_id IN %s' user_upvotes_statement = SimpleStatement(user_upvote_query) user_upvotes = db_session.execute(user_upvotes_statement, [user, ValueSequence(top_ids)]) user_upvoted_movies = [] for row in user_upvotes: user_upvoted_movies.append(row['movie_id']) for row in results: result_dict['result_list'].append({ 'title': row['title'], 'plot': row['plot'], 'rating': row['rating'], 'genres': row['genres'], 'poster': row['poster'], 'votes': row['votes'], 'id': row['id'], 'user_voted': if_user_voted(user_upvoted_movies, row['id']) }) return result_dict
def recommend_room(email): cluster = Cluster(['127.0.0.1']) session = cluster.connect('plannet') session.row_factory = dict_factory query = 'SELECT interests FROM users WHERE email=\'{}\''.format(email) row = session.execute(query).one() query = ('SELECT * FROM rooms WHERE category in %s') rows = session.execute(query, [ValueSequence(row['interests'])]) cluster.shutdown() return rows
def execute(self, query, params=tuple()): res = None thisCur = self.cur realParams = [] for p in params: if type(p) in (list, tuple): p = ValueSequence(p) realParams.append(p) def queryWith(stmt): nRetry = 0 isSuccess = False while (not isSuccess) and (nRetry < 2): res = None try: res = thisCur.execute(stmt, realParams) isSuccess = True except Exception as e: desc = traceback.format_exc() if 'OperationTimedOut' not in desc: raise e else: gevent.sleep(5) syslog.syslog(syslog.LOG_USER, 'TIMEDOUT RETRYING') nRetry += 1 return res if type(query) is str or type(query) is unicode: q = SimpleStatement(query, consistency_level=self.consistency) else: q = query try: res = queryWith(q) except: if self.backoffConsistency: if (type(query) is str or type(query) is unicode): q = SimpleStatement(query, consistency_level=self.CL_Ingest) else: q = query res = queryWith(q) else: raise return res
def execute_async(self, query, params=[], failureCallback=None): if type(query) is str or type(query) is unicode: query = SimpleStatement(query, consistency_level=self.consistency) realParams = [] for p in params: if type(p) in (list, tuple): p = ValueSequence(p) realParams.append(p) self.nActive += 1 future = self.cur.execute_async(query, realParams) future.add_callbacks(callback=self._logSuccess, errback=self._logError, errback_args=(failureCallback, query, realParams)) return future
def fix_anomaly_status(self, table, meeting_name, timestamps): result_reasons = self.session.execute( f'SELECT anomaly_reason, ml_anomaly_reason FROM {table} ' f'WHERE meeting_name = %s AND datetime IN %s;', (meeting_name, ValueSequence(timestamps))).all() status_values = [ r['anomaly_reason'] != '[]' or bool(r['ml_anomaly_reason']) for r in result_reasons ] update_rows = list(zip(status_values, repeat(meeting_name), timestamps)) fix_stmt = self.session.prepare( f"UPDATE {table} " f"SET anomaly=? " f"WHERE meeting_name=? AND datetime=?;") execute_concurrent_with_args(self.session, fix_stmt, update_rows)
def load_calls_data(self, meeting_name, call_starts): if not call_starts: raise MissingDataError calls = self.session.execute( f'SELECT start_datetime as start, last_update as end ' f'FROM calls ' f'WHERE meeting_name=%s AND start_datetime IN %s;', (meeting_name, ValueSequence(call_starts))).all() calls_dfs = [ self.load_data(meeting_name, call['start'], call['end']) for call in calls ] if not calls_dfs: raise MissingDataError ci_dfs, roster_dfs = zip(*calls_dfs) return pd.concat(ci_dfs), pd.concat(roster_dfs)
def execute(self, query, params=tuple()): res = None realParams = [] for p in params: if type(p) in (list, tuple): p = ValueSequence(p) realParams.append(p) if type(query) is str or type(query) is unicode: q = SimpleStatement(query, consistency_level=self.consistency) else: q = query try: res = self.cur.execute(q, realParams) except Exception as e: self._logError(e, None, q, realParams) raise else: self._logSuccess(res) return res
def msgs_select_all_in(self, message_ids: set) -> ResultSet: return self._execute(StatementKeys.msgs_select_all_in, ValueSequence(list(message_ids)))
def test_sequence_param(self): result = bind_params("%s", (ValueSequence((1, "a", 2.0)), ), Encoder()) self.assertEqual(result, "(1, 'a', 2.0)")
def get_transactions_by_hash(hash): return session.execute( session.prepare('SELECT * FROM transactions where hash in ?'), (ValueSequence(hash), ))
def get_transactions_by_height(ids): return session.execute( session.prepare('SELECT * FROM transactions where block_height in ?'), (ValueSequence(ids), ))
def get_blocks_by_hash(hash): return session.execute( session.prepare('SELECT * FROM blocks where hash in ?'), (ValueSequence(hash), ))
def get_blocks_by_height(ids): return session.execute( session.prepare('SELECT * FROM blocks where height in ?'), (ValueSequence(ids), ))
def batch_get_entity(self, table_name, row_keys, column_names): """ Takes in batches of keys and retrieves their corresponding rows. Args: table_name: The table to access row_keys: A list of keys to access column_names: A list of columns to access Returns: A dictionary of rows and columns/values of those rows. The format looks like such: {key:{column_name:value,...}} Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the batch_get could not be performed due to an error with Cassandra. """ if not isinstance(table_name, str): raise TypeError("Expected a str") if not isinstance(column_names, list): raise TypeError("Expected a list") if not isinstance(row_keys, list): raise TypeError("Expected a list") row_keys_bytes = [bytearray(row_key) for row_key in row_keys] statement = 'SELECT * FROM "{table}" '\ 'WHERE {key} IN %s and {column} IN %s'.format( table=table_name, key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME, ) query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) results = [] # Split the rows up into chunks to reduce the likelihood of timeouts. chunk_indexes = [ (n, n + ENTITY_FETCH_THRESHOLD) for n in xrange(0, len(row_keys_bytes), ENTITY_FETCH_THRESHOLD) ] # TODO: This can be made more efficient by maintaining a constant number # of concurrent requests rather than waiting for each batch to complete. for start, end in chunk_indexes: parameters = (ValueSequence(row_keys_bytes[start:end]), ValueSequence(column_names)) try: batch_results = yield self.tornado_cassandra.execute( query, parameters=parameters) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception during batch_get_entity' logger.exception(message) raise AppScaleDBConnectionError(message) results.extend(list(batch_results)) results_dict = {row_key: {} for row_key in row_keys} for (key, column, value) in results: if key not in results_dict: results_dict[key] = {} results_dict[key][column] = value raise gen.Return(results_dict)
def build_select_stmt(self, quals, columns, allow_filtering, verbose=False): stmt_str = StringIO() usedQuals = {} filteredColumns = [] rowid = None binding_values = [] for col in columns: if col != self.ROWIDCOLUMN: filteredColumns.append(col) if (self.query): stmt_str.write(self.query) else: for col in self.rowIdColumns: if col not in filteredColumns: filteredColumns.append(col) stmt_str.write(u"SELECT {0} FROM {1}.{2}".format( ",".join(map(lambda c: '"{0}"'.format(c), filteredColumns)), self.keyspace, self.columnfamily)) isWhere = None eqRestricted = None rangeUsed = False if self.prepare_select_stmt: formatting_str = '?' else: formatting_str = '%s' for qual in quals: if qual.field_name == self.ROWIDCOLUMN: rowid = qual.value if qual.field_name in self.queryableColumns: qual.componentIdx = self.querableColumnsIdx[ qual.field_name] else: qual.componentIdx = 10000 if rowid is not None: ids = json.loads(rowid) for i in range(0, len(self.rowIdColumns)): columnName = self.rowIdColumns[i] binding_values.append( types_mapper.map_object_to_type( ids[i], self.columnsTypes[columnName])) stmt_str.write(u" WHERE {0}".format(u" AND ".join( map(lambda str: str + u" = " + formatting_str, self.rowIdColumns)))) else: sortedQuals = sorted(quals, key=lambda qual: qual.componentIdx) last_clustering_key_idx = 0 for qual in sortedQuals: # Partition key and clustering column can't be null if qual.componentIdx < self.IDX_QUERY_COST and qual.value is None: return None if ISDEBUG or verbose: logger.log( u"qual field {0}; qual index {1}; qual type {2}; qual operator: {4}; qual value {3}" .format(qual.field_name, qual.componentIdx, type(qual.operator), qual.value, qual.operator)) if qual.operator == "=": if (qual.field_name in self.queryableColumns and self.queryableColumns[qual.field_name] != self.REGULAR_QUERY_COST): if self.queryableColumns[ qual. field_name] == self.CLUSTERING_KEY_QUERY_COST: if last_clustering_key_idx == 0 and qual.componentIdx != self.CLUSTERING_KEY_QUERY_COST: eqRestricted = True elif qual.componentIdx - 1 != last_clustering_key_idx and last_clustering_key_idx != 0: eqRestricted = True if (qual.field_name not in usedQuals and not eqRestricted): usedQuals[qual.field_name] = qual.value if self.queryableColumns[ qual. field_name] == self.CLUSTERING_KEY_QUERY_COST: last_clustering_key_idx = qual.componentIdx formatted = u" {0} = {1} ".format( qual.field_name, formatting_str) binding_values.append( types_mapper.map_object_to_type( qual.value, self.columnsTypes[qual.field_name])) if isWhere: stmt_str.write(u" AND ") stmt_str.write(formatted) else: stmt_str.write(u" WHERE ") stmt_str.write(formatted) isWhere = 1 elif allow_filtering: formatted = u" {0} = {1} ".format( qual.field_name, formatting_str) binding_values.append( types_mapper.map_object_to_type( qual.value, self.columnsTypes[qual.field_name])) if isWhere: stmt_str.write(u" AND ") stmt_str.write(formatted) else: stmt_str.write(u" WHERE ") stmt_str.write(formatted) isWhere = 1 elif allow_filtering: formatted = u" {0} = {1} ".format( qual.field_name, formatting_str) binding_values.append( types_mapper.map_object_to_type( qual.value, self.columnsTypes[qual.field_name])) if isWhere: stmt_str.write(u" AND ") stmt_str.write(formatted) else: stmt_str.write(u" WHERE ") stmt_str.write(formatted) isWhere = 1 # IN operator elif qual.operator == (u"=", True): if (qual.field_name in self.queryableColumns): if (self.queryableColumns[qual.field_name] == self.CLUSTERING_KEY_QUERY_COST or self.queryableColumns[qual.field_name] == self.PARTITION_KEY_QUERY_COST): if (qual.field_name not in usedQuals and not eqRestricted and not rangeUsed): usedQuals[qual.field_name] = qual.value formatted = u"{0} IN {1}".format( qual.field_name, formatting_str) binding_value = [] for el in qual.value: binding_value.append( types_mapper.map_object_to_type( el, self.columnsTypes[ qual.field_name])) if self.prepare_select_stmt: binding_values.append(binding_value) else: binding_values.append( ValueSequence(binding_value)) if isWhere: stmt_str.write(u" AND ") stmt_str.write(formatted) else: stmt_str.write(u" WHERE ") stmt_str.write(formatted) isWhere = 1 elif ( qual.operator == "~" or qual.operator == "~~" ) and qual.field_name in self.indexes and self.indexes[ qual. field_name] == "org.apache.cassandra.index.sasi.SASIIndex": if qual.operator == "~": val = "%{0}%".format(qual.value) else: val = qual.value stmt_str.write(u" AND {0} LIKE {1}".format( qual.field_name, formatting_str)) binding_values.append( types_mapper.map_object_to_type( val, self.columnsTypes[qual.field_name])) else: if (qual.operator == ">" or qual.operator == "<" or qual.operator == ">=" or qual.operator == "<="): if (qual.field_name in self.queryableColumns and (self.queryableColumns[qual.field_name] == self.CLUSTERING_KEY_QUERY_COST # only SASI indexes support <,>,>=,<= or (qual.field_name in self.indexes and self.indexes[qual.field_name] == "org.apache.cassandra.index.sasi.SASIIndex")) or (allow_filtering and self.queryableColumns[qual.field_name] != self.PARTITION_KEY_QUERY_COST)): rangeUsed = True if isWhere: stmt_str.write(u" AND {0} {1} {2}".format( qual.field_name, qual.operator, formatting_str)) binding_values.append( types_mapper.map_object_to_type( qual.value, self.columnsTypes[ qual.field_name])) else: stmt_str.write( u" WHERE {0} {1} {2}".format( qual.field_name, qual.operator, formatting_str)) isWhere = 1 binding_values.append( types_mapper.map_object_to_type( qual.value, self.columnsTypes[ qual.field_name])) if (self.limit): stmt_str.write(u" LIMIT {0}".format(self.limit)) if allow_filtering: stmt_str.write(u" ALLOW FILTERING ") statement = stmt_str.getvalue() stmt_str.close() if ISDEBUG: logger.log(u"CQL query: {0}".format(statement), INFO) return (statement, binding_values, filteredColumns)
def range_query(self, table_name, column_names, start_key, end_key, limit, offset=0, start_inclusive=True, end_inclusive=True, keys_only=False): """ Gets a dense range ordered by keys. Returns an ordered list of a dictionary of [key:{column1:value1, column2:value2},...] or a list of keys if keys only. Args: table_name: Name of table to access column_names: Columns which get returned within the key range start_key: String for which the query starts at end_key: String for which the query ends at limit: Maximum number of results to return offset: Cuts off these many from the results [offset:] start_inclusive: Boolean if results should include the start_key end_inclusive: Boolean if results should include the end_key keys_only: Boolean if to only keys and not values Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the range_query could not be performed due to an error with Cassandra. Returns: An ordered list of dictionaries of key=>columns/values """ if not isinstance(table_name, str): raise TypeError('table_name must be a string') if not isinstance(column_names, list): raise TypeError('column_names must be a list') if not isinstance(start_key, str): raise TypeError('start_key must be a string') if not isinstance(end_key, str): raise TypeError('end_key must be a string') if not isinstance(limit, (int, long)) and limit is not None: raise TypeError('limit must be int, long, or NoneType') if not isinstance(offset, (int, long)): raise TypeError('offset must be int or long') if start_inclusive: gt_compare = '>=' else: gt_compare = '>' if end_inclusive: lt_compare = '<=' else: lt_compare = '<' query_limit = '' if limit is not None: query_limit = 'LIMIT {}'.format(len(column_names) * limit) statement = ('SELECT * FROM "{table}" WHERE ' 'token({key}) {gt_compare} %s AND ' 'token({key}) {lt_compare} %s AND ' '{column} IN %s ' '{limit} ' 'ALLOW FILTERING').format(table=table_name, key=ThriftColumn.KEY, gt_compare=gt_compare, lt_compare=lt_compare, column=ThriftColumn.COLUMN_NAME, limit=query_limit) query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) parameters = (bytearray(start_key), bytearray(end_key), ValueSequence(column_names)) try: results = yield self.tornado_cassandra.execute( query, parameters=parameters) results_list = [] current_item = {} current_key = None for (key, column, value) in results: if keys_only: results_list.append(key) continue if key != current_key: if current_item: results_list.append({current_key: current_item}) current_item = {} current_key = key current_item[column] = value if current_item: results_list.append({current_key: current_item}) raise gen.Return(results_list[offset:]) except dbconstants.TRANSIENT_CASSANDRA_ERRORS: message = 'Exception during range_query' logger.exception(message) raise AppScaleDBConnectionError(message)
def range_query(self, table_name, column_names, start_key, end_key, limit, offset=0, start_inclusive=True, end_inclusive=True, keys_only=False): """ Gets a dense range ordered by keys. Returns an ordered list of a dictionary of [key:{column1:value1, column2:value2},...] or a list of keys if keys only. Args: table_name: Name of table to access column_names: Columns which get returned within the key range start_key: String for which the query starts at end_key: String for which the query ends at limit: Maximum number of results to return offset: Cuts off these many from the results [offset:] start_inclusive: Boolean if results should include the start_key end_inclusive: Boolean if results should include the end_key keys_only: Boolean if to only keys and not values Raises: TypeError: If an argument passed in was not of the expected type. AppScaleDBConnectionError: If the range_query could not be performed due to an error with Cassandra. Returns: An ordered list of dictionaries of key=>columns/values """ if not isinstance(table_name, str): raise TypeError("Expected a str") if not isinstance(column_names, list): raise TypeError("Expected a list") if not isinstance(start_key, str): raise TypeError("Expected a str") if not isinstance(end_key, str): raise TypeError("Expected a str") if not isinstance(limit, int) and not isinstance(limit, long): raise TypeError("Expected an int or long") if not isinstance(offset, int) and not isinstance(offset, long): raise TypeError("Expected an int or long") if start_inclusive: gt_compare = '>=' else: gt_compare = '>' if end_inclusive: lt_compare = '<=' else: lt_compare = '<' statement = 'SELECT * FROM "{table}" WHERE '\ 'token({key}) {gt_compare} %s AND '\ 'token({key}) {lt_compare} %s AND '\ '{column} IN %s '\ 'LIMIT {limit} '\ 'ALLOW FILTERING'.format( table=table_name, key=ThriftColumn.KEY, gt_compare=gt_compare, lt_compare=lt_compare, column=ThriftColumn.COLUMN_NAME, limit=len(column_names) * limit ) query = SimpleStatement(statement, retry_policy=self.retry_policy) parameters = (bytearray(start_key), bytearray(end_key), ValueSequence(column_names)) try: results = self.session.execute(query, parameters=parameters) results_list = [] current_item = {} current_key = None for (key, column, value) in results: if keys_only: results_list.append(key) continue if key != current_key: if current_item: results_list.append({current_key: current_item}) current_item = {} current_key = key current_item[column] = value if current_item: results_list.append({current_key: current_item}) return results_list[offset:] except (cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut): message = 'Exception during range_query' logging.exception(message) raise AppScaleDBConnectionError(message)