def _iter(self, key, lo, hi, prefix, reverse, max, include): """Setup a woeful chain of iterators that yields index entries. """ txn = self.store._txn_context.get() it = iterators.BasicIterator(txn, self.prefix) return iterators.from_args(it, key, lo, hi, prefix, reverse, max, include, None)
def _iter(self, key, lo, hi, prefix, reverse, max_, include, max_phys): it = self.strategy.iter(self.store._txn_context.get()) return iterators.from_args(it, key, lo, hi, prefix, reverse, max_, include, max_phys)
def batch(self, lo=None, hi=None, prefix=None, max_recs=None, max_bytes=None, max_keylen=None, preserve=True, max_phys=None, grouper=None): """ Search the key range *lo..hi* for individual records, combining them into a batches. Returns `(found, made, last_key)` indicating the number of records combined, the number of batches produced, and the last key visited before `max_phys` was exceeded. Batch size is controlled via `max_recs` and `max_bytes`; at least one must not be ``None``. Larger sizes may cause pathological behaviour in the storage engine (for example, space inefficiency). Since batches are fully decompressed before any member may be accessed via :py:meth:`get() <Collection.get>` or :py:meth:`iteritems() <Collection.iteritems>`, larger sizes may slow decompression, waste IO bandwidth, and temporarily use more RAM. `lo`: Lowest search key. `hi`: Highest search key. `max_recs`: Maximum number of records contained by any single batch. When this count is reached, the current batch is saved and a new one is created. `max_bytes`: Maximum size in bytes of the batch record's value after compression, or ``None`` for no maximum size. When not ``None``, values are recompressed after each member is appended, in order to test if `max_bytes` has been reached. This is inefficient, but provides the best guarantee of final record size. Single records are skipped if they exceed this size when compressed individually. `preserve`: If ``True``, then existing batch records in the database are left untouched. When one is found within `lo..hi`, the currently building batch is finished and the found batch is skipped over. If ``False``, found batches are exploded and their members contribute to the currently building batch. `max_phys`: Maximum number of physical keys to visit in any particular call. A collection may be incrementally batched by repeatedly invoking :py:meth:`Collection.batch` with `max` set, and `lo` set to `last_key` of the previous run, until `found` returns ``0``. This allows batching to complete over several transactions without blocking other users. `grouper`: Specifies a grouping function used to decide when to avoid compressing unrelated records. The function is passed a record's value. A new batch is triggered each time the function's return value changes. """ assert max_bytes or max_recs, 'max_bytes and/or max_recs is required.' txn = self.store._txn_context.get() it = self.ITERATOR_CLASS(txn, self.prefix, self.compressor) groupval = object() items = [] for r in iterators.from_args(it, None, lo, hi, prefix, False, None, True, max_phys): if preserve and len(r.keys) > 1: self._write_batch(txn, items, self.compressor) else: txn.delete(keylib.packs(r.key, self.prefix)) items.append((r.key, r.data)) if max_bytes: _, encoded = self._prepare_batch(items) if len(encoded) > max_bytes: items.pop() self._write_batch(txn, items) items.append((r.key, r.data)) done = max_recs and len(items) == max_recs if (not done) and grouper: val = grouper(self.encoder.unpack(r.key, r.data)) done = val != groupval groupval = val if done: self._write_batch(txn, items) self._write_batch(txn, items)