def from_key(cls, key): project_id = decode_str(key.app()) namespace = None if key.has_name_space(): namespace = decode_str(key.name_space()) path = Path.flatten(key.path()) return cls(project_id, namespace, path)
def _get_perfect_index(self, tr, query): project_id = decode_str(query.app()) namespace = decode_str(query.name_space()) filter_props = group_filters(query) order_info = get_order_info(query) prop_names = [filter_prop.name for filter_prop in filter_props] prop_names.extend([ prop_name for prop_name, _ in order_info if prop_name not in prop_names ]) prop_names.extend([ decode_str(prop_name) for prop_name in query.property_name_list() if prop_name not in prop_names ]) if not query.has_kind(): if not all(prop_name == KEY_PROP for prop_name in prop_names): raise BadRequest( u'kind must be specified when filtering or ordering ' u'properties other than __key__') kindless_index = yield self._kindless_index( tr, project_id, namespace) raise gen.Return(kindless_index) kind = decode_str(query.kind()) if all(prop_name == KEY_PROP for prop_name in prop_names): kind_index = yield self._kind_index(tr, project_id, namespace, kind) raise gen.Return(kind_index) if sum(prop_name != KEY_PROP for prop_name in prop_names) == 1: prop_name = next(prop_name for prop_name in prop_names if prop_name != KEY_PROP) ordered_prop = prop_name in [ order_name for order_name, _ in order_info ] if not query.has_ancestor() or not ordered_prop: single_prop_index = yield self._single_prop_index( tr, project_id, namespace, decode_str(query.kind()), prop_name) raise gen.Return(single_prop_index) queryable = [ index.to_pb() for index in (yield self._composite_index_manager.get_definitions( tr, project_id)) if index.ready ] index_pb = FindIndexToUse(query, queryable) if index_pb is not None: composite_index = yield self._composite_index( tr, project_id, index_pb.id(), namespace) raise gen.Return(composite_index)
def _get_index_keys(self, tr, entity, commit_versionstamp=None): has_index = commit_versionstamp is None project_id = decode_str(entity.key().app()) namespace = decode_str(entity.key().name_space()) path = Path.flatten(entity.key().path()) kind = path[-2] stats = IndexStatsSummary() kindless_index = yield self._kindless_index(tr, project_id, namespace) kind_index = yield self._kind_index(tr, project_id, namespace, kind) composite_indexes = yield self._get_indexes(tr, project_id, namespace, kind) kindless_key = kindless_index.encode_key(path, commit_versionstamp) kind_key = kind_index.encode_key(path, commit_versionstamp) stats.add_kindless_key(kindless_key, has_index) stats.add_kind_key(kind_key, has_index) all_keys = [kindless_key, kind_key] entity_prop_names = [] for prop in entity.property_list(): prop_name = decode_str(prop.name()) entity_prop_names.append(prop_name) index = yield self._single_prop_index(tr, project_id, namespace, kind, prop_name) prop_key = index.encode_key(prop.value(), path, commit_versionstamp) stats.add_prop_key(prop, prop_key, has_index) all_keys.append(prop_key) scatter_val = get_scatter_val(path) if scatter_val is not None: index = yield self._single_prop_index(tr, project_id, namespace, kind, SCATTER_PROP) all_keys.append( index.encode_key(scatter_val, path, commit_versionstamp)) for index in composite_indexes: # If the entity does not have the relevant props for the index, skip it. if not all(index_prop_name in entity_prop_names for index_prop_name in index.prop_names): continue composite_keys = index.encode_keys(entity.property_list(), path, commit_versionstamp) stats.add_composite_keys(index.id, composite_keys, has_index) all_keys.extend(composite_keys) raise gen.Return((all_keys, stats))
def get_order_info(query): filter_props = group_filters(query) # Orders on equality filters can be ignored. equality_props = [prop.name for prop in filter_props if prop.equality] relevant_orders = [ order for order in query.order_list() if order.property() not in equality_props ] order_info = [] for filter_prop in filter_props: if filter_prop.equality: continue direction = next((order.direction() for order in relevant_orders if order.property() == filter_prop.name), Query_Order.ASCENDING) order_info.append((filter_prop.name, direction)) filter_prop_names = [prop.name for prop in filter_props] order_info.extend([(decode_str(order.property()), order.direction()) for order in relevant_orders if order.property() not in filter_prop_names]) return tuple(order_info)
def rollback_transaction(self, project_id, txid): project_id = decode_str(project_id) logger.debug(u'Rolling back {}:{}'.format(project_id, txid)) tr = self._db.create_transaction() yield self._tx_manager.delete(tr, project_id, txid) yield self._tornado_fdb.commit(tr)
def log_query(self, tr, project_id, query): txid = query.transaction().handle() namespace = decode_str(query.name_space()) if not query.has_ancestor(): raise BadRequest(u'Queries in a transaction must specify an ancestor') tx_dir = yield self._tx_metadata(tr, project_id) tr[tx_dir.encode_query_key(txid, namespace, query.ancestor().path())] = b''
def apply_props(prop_list, subtract=False): for prop_pb in prop_list: prop_type = stats_prop_type(prop_pb) prop_name = decode_str(prop_pb.name()) fields = entity_stats[namespace][kind][prop_type][prop_name] delta = CountBytes(1, len(prop_pb.Encode())) if subtract: fields -= delta else: fields += delta
def dynamic_delete(self, project_id, delete_request, retries=5): logger.debug(u'delete_request:\n{}'.format(delete_request)) project_id = decode_str(project_id) tr = self._db.create_transaction() if delete_request.has_transaction(): yield self._tx_manager.log_deletes(tr, project_id, delete_request) deletes = [(VersionEntry.from_key(key), None, None) for key in delete_request.key_list()] else: # Eliminate multiple deletes to the same key. deletes_by_key = { key.Encode(): key for key in delete_request.key_list() } deletes = yield [ self._delete(tr, key) for key in six.itervalues(deletes_by_key) ] old_entries = [ old_entry for old_entry, _, _ in deletes if old_entry.present ] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: pass elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: logger.error('Unable to determine commit result. Retrying.') else: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.dynamic_delete(project_id, delete_request, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [(old_entry, None, stats) for old_entry, _, stats in deletes if stats is not None] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) # TODO: Once the Cassandra backend is removed, populate a delete response. for old_entry, new_version, _ in deletes: logger.debug(u'new_version: {}'.format(new_version))
def _index_details(self, tr, project_id, index_id): project_indexes = yield self._composite_index_manager.get_definitions( tr, project_id) index_def = next( (ds_index for ds_index in project_indexes if ds_index.id == index_id), None) if index_def is None: raise InternalError(u'Unable to retrieve index details') order_info = tuple((decode_str(prop.name), prop.to_pb().direction()) for prop in index_def.properties) raise gen.Return((index_def.kind, index_def.ancestor, order_info))
def _enforce_max_groups(mutations): """ Raises an exception if too many groups were modified. """ mutated_groups = set() for mutation in mutations: if isinstance(mutation, entity_pb.Reference): key = mutation else: key = mutation.key() namespace = decode_str(key.name_space()) flat_group = (namespace, ) + Path.flatten(key.path())[:2] mutated_groups.add(flat_group) if len(mutated_groups) > 25: raise BadRequest( u'Too many entity groups modified in transaction')
def dynamic_get(self, project_id, get_request, get_response): logger.debug(u'get_request:\n{}'.format(get_request)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = None if get_request.has_transaction(): yield self._tx_manager.log_lookups(tr, project_id, get_request) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_read_stamps = yield [ self._gc.safe_read_versionstamp(tr, key) for key in get_request.key_list() ] safe_read_stamps = [ vs for vs in safe_read_stamps if vs is not None ] read_versionstamp = TransactionID.decode( get_request.transaction().handle())[1] if any(safe_versionstamp > read_versionstamp for safe_versionstamp in safe_read_stamps): raise BadRequest(u'The specified transaction has expired') futures = [] for key in get_request.key_list(): futures.append( self._data_manager.get_latest(tr, key, read_versionstamp, snapshot=True)) version_entries = yield futures # If this read is in a transaction, logging the RPC is a mutation. yield self._tornado_fdb.commit(tr) for entry in version_entries: response_entity = get_response.add_entity() response_entity.set_version(entry.version) if entry.has_entity: response_entity.mutable_entity().MergeFrom(entry.decoded) else: response_entity.mutable_key().MergeFrom(entry.key) logger.debug(u'fetched paths: {}'.format( [entry.path for entry in version_entries if entry.has_entity]))
def apply_txn_changes(self, project_id, txid, retries=5): logger.debug(u'Applying {}:{}'.format(project_id, txid)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = TransactionID.decode(txid)[1] lookups, queried_groups, mutations = yield self._tx_manager.get_metadata( tr, project_id, txid) try: writes = yield self._apply_mutations(tr, project_id, queried_groups, mutations, lookups, read_versionstamp) finally: yield self._tx_manager.delete(tr, project_id, txid) versionstamp_future = None old_entries = [ old_entry for old_entry, _, _ in writes if old_entry.present ] if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.apply_txn_changes(project_id, txid, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [(old_entry, FDBDatastore._filter_version(new_entry), index_stats) for old_entry, new_entry, index_stats in writes if index_stats is not None] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) logger.debug(u'Finished applying {}:{}'.format(project_id, txid))
def group_filters(query): filter_props = [] for query_filter in query.filter_list(): if query_filter.property_size() != 1: raise BadRequest(u'Each filter must have exactly one property') prop = query_filter.property(0) prop_name = decode_str(prop.name()) filter_info = (query_filter.op(), prop.value()) if filter_props and filter_props[-1].name == prop_name: filter_props[-1].filters.append(filter_info) else: filter_props.append(FilterProperty(prop_name, [filter_info])) # Since the filter list can come in any order, put inequality filters last. inequality_index = None for index, filter_prop in enumerate(filter_props): if not filter_prop.equality: inequality_index = index break if inequality_index is not None: inequality_prop = filter_props.pop(inequality_index) filter_props.append(inequality_prop) # Put key filters last. key_index = None for index, filter_prop in enumerate(filter_props): if filter_prop.name == KEY_PROP: key_index = index break if key_index is not None: key_prop = filter_props.pop(key_index) filter_props.append(key_prop) for filter_prop in filter_props[:-1]: if filter_prop.name == KEY_PROP: raise BadRequest( u'Only the last filter property can be on {}'.format(KEY_PROP)) if not filter_prop.equality: raise BadRequest( u'All but the last property must be equality filters') return tuple(filter_props)
def apply_txn_changes(self, project_id, txid, retries=5): logger.debug(u'Applying {}:{}'.format(project_id, txid)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = TransactionID.decode(txid)[1] lookups, queried_groups, mutations = yield self._tx_manager.get_metadata( tr, project_id, txid) try: old_entries = yield self._apply_mutations(tr, project_id, queried_groups, mutations, lookups, read_versionstamp) finally: yield self._tx_manager.delete(tr, project_id, txid) versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.apply_txn_changes(project_id, txid, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) logger.debug(u'Finished applying {}:{}'.format(project_id, txid))
def _dynamic_run_query(self, query, query_result): logger.debug(u'query: {}'.format(query)) project_id = decode_str(query.app()) tr = self._db.create_transaction() read_versionstamp = None if query.has_transaction(): yield self._tx_manager.log_query(tr, project_id, query) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_versionstamp = yield self._gc.safe_read_versionstamp( tr, query.ancestor()) read_versionstamp = TransactionID.decode( query.transaction().handle())[1] if (safe_versionstamp is not None and safe_versionstamp > read_versionstamp): raise BadRequest(u'The specified transaction has expired') fetch_data = self._index_manager.include_data(query) rpc_limit, check_more_results = self._index_manager.rpc_limit(query) iterator = yield self._index_manager.get_iterator( tr, query, read_versionstamp) for prop_name in query.property_name_list(): prop_name = decode_str(prop_name) if prop_name not in iterator.prop_names: raise BadRequest( u'Projections on {} are not supported'.format(prop_name)) data_futures = [] if fetch_data else None unique_keys = set() results = [] entries_fetched = 0 skipped_results = 0 cursor = None while True: remainder = rpc_limit - entries_fetched iter_offset = max(query.offset() - entries_fetched, 0) entries, more_iterator_results = yield iterator.next_page() entries_fetched += len(entries) if not entries and more_iterator_results: continue if not entries and not more_iterator_results: break skipped_results += min(len(entries), iter_offset) suitable_entries = entries[iter_offset:remainder] if entries[:remainder]: cursor = entries[:remainder][-1] if not fetch_data and not query.keys_only(): results.extend( [entry.prop_result() for entry in suitable_entries]) continue for entry in suitable_entries: if entry.path in unique_keys: continue unique_keys.add(entry.path) if fetch_data: data_futures.append( self._data_manager.get_entry(tr, entry, snapshot=True)) else: results.append(entry.key_result()) if not more_iterator_results: break if fetch_data: entity_results = yield data_futures results = [entity.encoded for entity in entity_results] else: results = [result.Encode() for result in results] yield self._tornado_fdb.commit(tr) query_result.result_list().extend(results) # TODO: Figure out how ndb multi queries use compiled cursors. if query.compile(): ordered_props = tuple(prop_name for prop_name, _ in get_order_info(query) if prop_name != KEY_PROP) mutable_cursor = query_result.mutable_compiled_cursor() if cursor is not None: mutable_cursor.MergeFrom(cursor.cursor_result(ordered_props)) more_results = check_more_results and entries_fetched > rpc_limit query_result.set_more_results(more_results) if skipped_results: query_result.set_skipped_results(skipped_results) if query.keys_only(): query_result.set_keys_only(True) logger.debug(u'{} results'.format(len(query_result.result_list())))
def _encode_keys(self, keys): return b''.join( [Text.encode(decode_str(key.name_space())) + Path.pack(key.path()) for key in keys])
def setup_transaction(self, project_id, is_xg): project_id = decode_str(project_id) txid = yield self._tx_manager.create(project_id) logger.debug(u'Started new transaction: {}:{}'.format( project_id, txid)) raise gen.Return(txid)
def update_composite_index(self, project_id, index): project_id = decode_str(project_id) yield self._index_manager.update_composite_index(project_id, index)
def dynamic_put(self, project_id, put_request, put_response, retries=5): # logger.debug(u'put_request:\n{}'.format(put_request)) project_id = decode_str(project_id) # TODO: Enforce max key length (100 elements). # Enforce max element size (1500 bytes). # Enforce max kind size (1500 bytes). # Enforce key name regex (reserved names match __.*__). if put_request.auto_id_policy() != put_request.CURRENT: raise BadRequest(u'Sequential allocator is not implemented') tr = self._db.create_transaction() if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) writes = { self._collapsible_id(entity): (VersionEntry.from_key(entity.key()), VersionEntry.from_key(entity.key()), None) for entity in put_request.entity_list() } else: # Eliminate multiple puts to the same key. puts_by_key = { self._collapsible_id(entity): entity for entity in put_request.entity_list() } writes = yield { key: self._upsert(tr, entity) for key, entity in six.iteritems(puts_by_key) } old_entries = [ old_entry for old_entry, _, _ in six.itervalues(writes) if old_entry.present ] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: pass elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: logger.error('Unable to determine commit result. Retrying.') else: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.dynamic_put(project_id, put_request, put_response, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [ (old_entry, new_entry, index_stats) for old_entry, new_entry, index_stats in six.itervalues(writes) if index_stats is not None ] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) for entity in put_request.entity_list(): write_entry = writes[self._collapsible_id(entity)][1] put_response.add_key().CopyFrom(write_entry.key) if write_entry.version != ABSENT_VERSION: put_response.add_version(write_entry.version)
def get_iterator(self, tr, query, read_versionstamp=None): project_id = decode_str(query.app()) namespace = decode_str(query.name_space()) filter_props = group_filters(query) ancestor_path = tuple() if query.has_ancestor(): ancestor_path = Path.flatten(query.ancestor().path()) start_cursor = None if query.has_compiled_cursor(): start_cursor = ListCursor(query)._GetLastResult() end_cursor = None if query.has_end_compiled_cursor(): end_compiled = query.end_compiled_cursor() end_cursor = ListCursor(query)._DecodeCompiledCursor( end_compiled)[0] rpc_limit, check_more_results = self.rpc_limit(query) fetch_limit = rpc_limit if check_more_results: fetch_limit += 1 if query.has_kind() and query.kind() == u'__namespace__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( NamespaceIterator(tr, self._tornado_fdb, project_dir)) elif query.has_kind() and query.kind() == u'__kind__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( KindIterator(tr, self._tornado_fdb, project_dir, namespace)) elif query.has_kind() and query.kind() == u'__property__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( PropertyIterator(tr, self._tornado_fdb, project_dir, namespace)) index = yield self._get_perfect_index(tr, query) reverse = get_scan_direction(query, index) == Query_Order.DESCENDING if index is None: if not all(prop.equality for prop in filter_props): raise BadRequest(u'Query not supported') indexes = [] equality_props = [ filter_prop for filter_prop in filter_props if filter_prop.name == KEY_PROP ] if len(equality_props) > 1: raise BadRequest(u'Only one equality key filter is supported') equality_prop = next(iter(equality_props), None) other_props = [ filter_prop for filter_prop in filter_props if filter_prop.name != KEY_PROP ] for filter_prop in other_props: index = yield self._single_prop_index(tr, project_id, namespace, decode_str(query.kind()), filter_prop.name) for op, value in filter_prop.filters: tmp_filter_prop = FilterProperty(filter_prop.name, [(op, value)]) if equality_prop is not None: tmp_filter_props = (tmp_filter_prop, equality_prop) else: tmp_filter_props = (tmp_filter_prop, ) slice = index.get_slice(tmp_filter_props, ancestor_path, start_cursor, end_cursor) indexes.append([index, slice, filter_prop.name, value]) raise gen.Return( MergeJoinIterator(tr, self._tornado_fdb, filter_props, indexes, fetch_limit, read_versionstamp, ancestor_path, snapshot=True)) equality_prop = next( (filter_prop for filter_prop in filter_props if filter_prop.equality), None) if equality_prop is not None and len(equality_prop.filters) > 1: indexes = [] for op, value in equality_prop.filters: tmp_filter_props = [] for filter_prop in filter_props: if filter_prop.name == equality_prop.name: tmp_filter_props.append( FilterProperty(filter_prop.name, [(op, value)])) else: tmp_filter_props.append(filter_prop) desired_slice = index.get_slice(tmp_filter_props, ancestor_path, start_cursor, end_cursor, reverse) indexes.append( [index, desired_slice, equality_prop.name, value]) raise gen.Return( MergeJoinIterator(tr, self._tornado_fdb, filter_props, indexes, fetch_limit, read_versionstamp, ancestor_path, snapshot=True)) desired_slice = index.get_slice(filter_props, ancestor_path, start_cursor, end_cursor, reverse) iterator = IndexIterator(tr, self._tornado_fdb, index, desired_slice, fetch_limit, reverse, read_versionstamp, snapshot=True) raise gen.Return(iterator)
def _data_ns_from_key(self, tr, key): project_id = decode_str(key.app()) namespace = decode_str(key.name_space()) data_ns = yield self._data_ns(tr, project_id, namespace) raise gen.Return(data_ns)
def _safe_read_dir_from_key(self, tr, key): project_id = decode_str(key.app()) namespace = decode_str(key.name_space()) safe_read_dir = yield self._safe_read_dir(tr, project_id, namespace) raise gen.Return(safe_read_dir)
def add_prop_key(self, prop_pb, key, has_index): prop_type = stats_prop_type(prop_pb) prop_name = decode_str(prop_pb.name()) self.single_prop[prop_type][prop_name] += CountBytes( 1, self._stored_length(key, has_index))