def _prepare_doc(self, doc): if self.load_attachments: obj = BlobHelper(doc, self.couchdb) doc["_attachments"] = { name: { "content_type": meta["content_type"], "content": obj.fetch_attachment(name), } for name, meta in doc["_attachments"].items() }
def __init__(self, doc, database, exclude_attachments=False): self._attachments = {} self.attachments = {} self.database = database _attachments = doc.get("_attachments", None) or {} _attachments.update(doc.get("external_blobs", None) or {}) if _attachments: if not exclude_attachments: self._attachments = _attachments obj = BlobHelper(doc, database, None) self.attachments = {k: obj.fetch_attachment(k) for k in _attachments} if doc.get("_attachments"): doc["_attachments"] = {} if "external_blobs" in doc: doc["external_blobs"] = {} self.doc = doc del self.doc['_rev']
def migrate(slug, doc_type_map, doc_migrator_class, filename=None, reset=False, max_retry=2): """Migrate blobs :param slug: Migration name. :param doc_type_map: Dict of `doc_type_name: model_class` pairs. :param doc_migrator_class: A `BaseDocMigrator` subclass used to migrate documents. :param filename: File path for intermediate storage of migration data. :param reset: Reset existing migration state (if any), causing all documents to be reconsidered for migration, if this is true. This does not reset the django migration flag. flag, which is set when the migration completes successfully. :param max_retry: Number of times to retry migrating a document before giving up. :returns: A tuple `(<num migrated>, <num skipped>)` """ couchdb = next(iter(doc_type_map.values())).get_db() assert all(m.get_db() is couchdb for m in doc_type_map.values()), \ "documents must live in same couch db: %s" % repr(doc_type_map) dirpath = None if filename is None: dirpath = mkdtemp() filename = os.path.join(dirpath, "export.txt") def encode_content(data): if isinstance(data, unicode): data = data.encode("utf-8") return b64encode(data) total = sum(get_doc_count_by_type(couchdb, doc_type) for doc_type in doc_type_map) print("Migrating {} documents: {}...".format( total, ", ".join(sorted(doc_type_map)) )) migrated = 0 skipped = 0 visited = 0 start = datetime.now() doc_migrator = doc_migrator_class() load_attachments = doc_migrator.load_attachments blobs_key = doc_migrator.blobs_key iter_key = slug + "-blob-migration" docs_by_type = ResumableDocsByTypeIterator(couchdb, doc_type_map, iter_key) if reset: docs_by_type.discard_state() with open(filename, 'wb') as f: for doc in docs_by_type: visited += 1 if doc.get(blobs_key): if load_attachments: obj = BlobHelper(doc, couchdb) doc["_attachments"] = { name: { "content_type": meta["content_type"], "content": obj.fetch_attachment(name), } for name, meta in doc["_attachments"].items() } # make copy with encoded attachments for JSON dump backup_doc = dict(doc) backup_doc["_attachments"] = { name: { "content_type": meta["content_type"], "content": encode_content(meta["content"]), } for name, meta in doc["_attachments"].items() } else: backup_doc = doc f.write('{}\n'.format(json.dumps(backup_doc))) f.flush() ok = doc_migrator.migrate(doc, couchdb) if ok: migrated += 1 else: try: docs_by_type.retry(doc, max_retry) except TooManyRetries: print("Skip: {doc_type} {_id}".format(**doc)) skipped += 1 if (migrated + skipped) % 100 == 0: elapsed = datetime.now() - start remaining = elapsed / visited * total print("Migrated {}/{} of {} documents in {} ({} remaining)" .format(migrated, visited, total, elapsed, remaining)) doc_migrator.after_migration() if dirpath is not None: os.remove(filename) os.rmdir(dirpath) print("Migrated {}/{} of {} documents ({} previously migrated, {} had no attachments)." .format( migrated, visited, total, total - visited, visited - (migrated + skipped) )) if skipped: print(MIGRATIONS_SKIPPED_WARNING.format(skipped)) else: BlobMigrationState.objects.get_or_create(slug=slug)[0].save() return migrated, skipped