def __init__(self, tr, tornado_fdb, index, key_slice, fetch_limit, reverse, read_versionstamp=None, snapshot=False): self.index = index self._result_iterator = ResultIterator( tr, tornado_fdb, key_slice, fetch_limit, reverse, snapshot=snapshot) self._read_versionstamp = read_versionstamp self._done = False
class IndexIterator(object): """ Returns pages of index entry results. It ignores Key-Values that do not apply to the given read_versionstamp. It converts usable Key-Values to IndexEntry objects. """ def __init__(self, tr, tornado_fdb, index, key_slice, fetch_limit, reverse, read_versionstamp=None, snapshot=False): self.index = index self._result_iterator = ResultIterator( tr, tornado_fdb, key_slice, fetch_limit, reverse, snapshot=snapshot) self._read_versionstamp = read_versionstamp self._done = False @property def prop_names(self): return self.index.prop_names @property def start_key(self): return self._result_iterator.slice.start.key @gen.coroutine def next_page(self): if self._done: raise gen.Return(([], False)) results, more_results = yield self._result_iterator.next_page() usable_entries = [] for result in results: entry = self.index.decode(result) if not self._usable(entry): self._result_iterator.increase_limit() more_results = not self._result_iterator.done_with_range continue usable_entries.append(entry) if not more_results: self._done = True raise gen.Return((usable_entries, more_results)) def _usable(self, entry): if self._read_versionstamp and entry.deleted_versionstamp: return (entry.commit_versionstamp < self._read_versionstamp < entry.deleted_versionstamp) elif self._read_versionstamp: return entry.commit_versionstamp < self._read_versionstamp else: return entry.deleted_versionstamp is None
def _last_version(self, tr, data_ns, desired_slice, include_data=True, snapshot=False): """ Gets the most recent entity data for a given slice. Args: tr: An FDB transaction. data_ns: A DataNamespace. desired_slice: A slice specifying the start and stop keys. include_data: A boolean indicating that all chunks should be fetched. snapshot: If True, the read will not cause a transaction conflict. Returns: A VersionEntry or None. """ results, count, more_results = yield self._tornado_fdb.get_range( tr, desired_slice, limit=1, reverse=True, snapshot=snapshot) if not results: return last_chunk = results[0] entry = data_ns.decode([last_chunk]) if not include_data or not entry.present or entry.complete: raise gen.Return(entry) # Retrieve the remaining chunks. version_slice = data_ns.get_slice(entry.path, entry.commit_versionstamp) remaining = slice(version_slice.start, first_gt_or_equal(last_chunk.key)) results = yield ResultIterator(tr, self._tornado_fdb, remaining).list() raise gen.Return(data_ns.decode(results + [last_chunk]))
def get_version_from_path(self, tr, project_id, namespace, path, commit_versionstamp, snapshot=False): """ Gets the entity data for a specific version. Args: tr: An FDB transaction. project_id: A string specifying the project ID. namespace: A string specifying the namespace. path: A tuple or protobuf path object. commit_versionstamp: A 10-byte string specyfing the FDB commit versionstamp. snapshot: If True, the read will not cause a transaction conflict. Returns: A VersionEntry or None. """ data_ns = yield self._data_ns(tr, project_id, namespace) desired_slice = data_ns.get_slice(path, commit_versionstamp) results = yield ResultIterator(tr, self._tornado_fdb, desired_slice, snapshot=snapshot).list() raise gen.Return(data_ns.decode(results))
def _groom_range(self, tr, index, byte_num, safe_versionstamp, tx_deadline): iterator = ResultIterator(tr, self._tornado_fdb, index.get_slice(byte_num, safe_versionstamp)) deleted = 0 while True: results, more = yield iterator.next_page() for result in results: index_entry = index.decode(result) version_entry = yield self._data_manager.get_version_from_path( tr, index_entry.project_id, index_entry.namespace, index_entry.path, index_entry.original_versionstamp) yield self._hard_delete(tr, version_entry, index_entry.deleted_versionstamp) deleted += 1 if not more or monotonic.monotonic() > tx_deadline: break raise gen.Return(deleted)
def get_metadata(self, tr, project_id, txid): tx_dir = yield self._tx_metadata(tr, project_id) results = yield ResultIterator(tr, self._tornado_fdb, tx_dir.get_txid_slice(txid)).list() scatter_val, tx_start_versionstamp = TransactionID.decode(txid) if (not results or results[0].key != tx_dir.encode_start_key(scatter_val, tx_start_versionstamp)): raise BadRequest(u'Transaction not found') raise gen.Return(tx_dir.decode_metadata(txid, results[1:]))
def _periodic_summary(self): while True: try: yield self._summary_lock.acquire() tr = self._db.create_transaction() deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 last_summarized = {} # TODO: This can be made async. project_ids = self._directory_cache.root_dir.list(tr) summarized_projects = [] for project_id in project_ids: stats_dir = yield self._project_stats_dir(tr, project_id) last_vs_key = stats_dir.encode_last_versionstamp()[0] last_versionstamp = yield self._tornado_fdb.get( tr, last_vs_key, snapshot=True) if (not last_versionstamp.present() or last_versionstamp.value == self._last_summarized.get(project_id)): continue last_summarized[project_id] = last_versionstamp.value results = yield ResultIterator( tr, self._tornado_fdb, stats_dir.directory.range(), snapshot=True).list() project_stats, last_timestamp = stats_dir.decode(results) entities = fill_entities(project_id, project_stats, last_timestamp) for pos in range(0, len(entities), self.BATCH_SIZE): yield [self._ds_access._upsert(tr, entity) for entity in entities[pos:pos + self.BATCH_SIZE]] if monotonic.monotonic() > deadline: yield self._tornado_fdb.commit(tr) tr = self._db.create_transaction() deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 summarized_projects.append(project_id) yield self._tornado_fdb.commit(tr) self._last_summarized.update(last_summarized) if summarized_projects: logger.debug(u'Finished summarizing stats for ' u'{}'.format(summarized_projects)) yield gen.sleep(self.SUMMARY_INTERVAL) except Exception: logger.exception(u'Unexpected error while summarizing stats') yield gen.sleep(random.random() * 20)