def handle(self, **options): verify_only = options.get("verify_only", False) skip_verify = options.get("skip_verify", False) if verify_only and skip_verify: raise CommandError("verify_only and skip_verify are mutually exclusive") self.doc_count = get_doc_count_by_type(self.couch_db(), self.couch_doc_type()) self.diff_count = 0 self.doc_index = 0 logger.info("Found {} {} docs and {} {} models".format( self.doc_count, self.couch_doc_type(), self.sql_class().objects.count(), self.sql_class().__name__, )) for doc in get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()]): self.doc_index += 1 if not verify_only: self._migrate_doc(doc) if not skip_verify: self._verify_doc(doc, exit=not verify_only) logger.info(f"Processed {self.doc_index} documents") if not skip_verify: logger.info(f"Found {self.diff_count} differences")
def _migrate_linked_apps(apps, schema_editor): app_db = LinkedApplication.get_db() linked_apps = get_all_docs_with_doc_types( app_db, ['LinkedApplication', 'LinkedApplication-Deleted'] ) errors = [] for app_doc in linked_apps: remote_details = None remote_url = app_doc.pop('remote_url_base', None) if remote_url: auth = app_doc.pop('remote_auth', {}) remote_details = RemoteLinkDetails( remote_url, auth.get('username'), auth.get('api_key'), ) master_domain = app_doc.pop('master_domain', None) if not master_domain and not remote_url: master_domain = get_app(None, app_doc['master']).domain try: DomainLink.link_domains(app_doc['domain'], master_domain, remote_details) except DomainLinkError as e: errors.append(str(e)) else: app_db.save_doc(app_doc) _assert = soft_assert('{}@dimagi.com'.format('skelly'), exponential_backoff=False) _assert(not errors, 'Errors migrating linked apps to linked domain', { 'errors': errors })
def iter_attachments(self): docs = get_all_docs_with_doc_types(Application.get_db(), apps_migration.doc_types) for doc in with_progress_bar(docs, length=self.docs_count): if '_attachments' in doc: for filename, info in doc['_attachments'].items(): yield doc, filename, info
def get_all_hq_group_export_configs(): from corehq.apps.reports.models import HQGroupExportConfiguration return imap( HQGroupExportConfiguration.wrap, get_all_docs_with_doc_types( HQGroupExportConfiguration.get_db(), ('HQGroupExportConfiguration',) ) )
def bulk_migrate(source_db, target_db, doc_types): with IterDB(target_db, new_edits=False) as iter_db: for doc in get_all_docs_with_doc_types(source_db, doc_types): # It turns out that Cloudant does not support attachments=true # on views or on _all_docs, only on single doc gets, so we have # to manually re-query for the full doc + attachments. # (And I think there's literally no other way.) doc = _insert_attachments(source_db, doc) iter_db.save(doc)
def bulk_migrate(source_db, target_db, doc_types): with IterDB(target_db, new_edits=False, chunksize=25) as iter_db: for doc in get_all_docs_with_doc_types(source_db, doc_types): # It turns out that Cloudant does not support attachments=true # on views or on _all_docs, only on single doc gets, so we have # to manually re-query for the full doc + attachments. # (And I think there's literally no other way.) doc = _insert_attachments(source_db, doc) iter_db.save(doc)
def bulk_migrate(source_db, target_db, doc_types, filename): with open(filename, "w") as f: for doc in get_all_docs_with_doc_types(source_db, doc_types): f.write("{}\n".format(json.dumps(doc))) with open(filename, "r") as f: with IterDB(target_db, new_edits=False) as iter_db: for line in f: doc = json.loads(line) iter_db.save(doc)
def _populate_master_domain(apps, schema_editor): app_db = Application.get_db() for app in get_all_docs_with_doc_types(app_db, ['LinkedApplication']): if not app.get('master_domain', None): master_domain = app.pop('remote_domain', None) if not master_domain: try: master_app = app_db.get(app['master']) master_domain = master_app['domain'] except ResourceNotFound: pass app['master_domain'] = master_domain app_db.save_doc(app)
def bulk_migrate(source_db, target_db, doc_types, filename): with open(filename, 'w') as f: for doc in get_all_docs_with_doc_types(source_db, doc_types): # It turns out that Cloudant does not support attachments=true # on views or on _all_docs, only on single doc gets # instead, we have to fetch each attachment individually # (And I think there's literally no other way.) insert_attachment(source_db, doc) f.write('{}\n'.format(json.dumps(doc))) with open(filename, 'r') as f: with IterDB(target_db, new_edits=False) as iter_db: for line in f: doc = json.loads(line) iter_db.save(doc)
def migrate(slug, doc_types, migrate_func, filename=None): """Migrate blobs :param doc_types: List of couch model classes to be migrated. :param filename: File path for intermediate storage of migration data. :param migrate_func: A function `func(filename, type_map, total)` returning a tuple `(<num migrated>, <num skipped>)`. If `<num skipped>` is non-zero the migration will be considered failed (a migration state record will not be saved). `<num migrated>` need not match the original `total` passed in. This could happen, for example, if a document is deleted during the migration (and should not cause migration failure). :returns: A tuple `(<num migrated>, <num skipped>)` """ couchdb = doc_types[0].get_db() assert all(t.get_db() is couchdb for t in doc_types[1:]), repr(doc_types) type_map = {cls.__name__: cls for cls in doc_types} dirpath = None if filename is None: dirpath = mkdtemp() filename = os.path.join(dirpath, "export.txt") print("Loading documents: {}...".format(", ".join(type_map))) total = 0 with open(filename, 'w') as f: for doc in get_all_docs_with_doc_types(couchdb, list(type_map)): if doc.get(migrate_func.blobs_key): f.write('{}\n'.format(json.dumps(doc))) total += 1 migrated, skips = migrate_func(filename, type_map, total) if dirpath is not None: os.remove(filename) os.rmdir(dirpath) print("Migrated {} documents.".format(migrated - skips)) if skips: print(MIGRATIONS_SKIPPED_WARNING.format(skips)) else: BlobMigrationState.objects.get_or_create(slug=slug)[0].save() return migrated - skips, skips
def handle(self, **options): doc_count = get_doc_count_by_type(self.couch_db(), self.couch_doc_type()) logger.info("Found {} {} docs and {} {} models".format( doc_count, self.couch_doc_type(), self.sql_class().objects.count(), self.sql_class().__name__, )) doc_index = 0 for doc in get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()]): doc_index += 1 logger.info("Looking at {} doc #{} of {} with id {}".format( self.couch_doc_type(), doc_index, doc_count, doc["_id"])) with transaction.atomic(): model, created = self.update_or_create_sql_object(doc) logger.info("{} model for doc with id {}".format( "Creating" if created else "Updated", doc["_id"]))
def handle(self, dry_run=False, **options): log_prefix = "[DRY RUN] " if dry_run else "" logger.info("{}Found {} {} docs and {} {} models".format( log_prefix, get_doc_count_by_type(self.couch_db(), self.couch_doc_type()), self.couch_doc_type(), self.sql_class().objects.count(), self.sql_class().__name__, )) for doc in get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()]): logger.info("{}Looking at doc with key {}".format(log_prefix, self.doc_key(doc))) with transaction.atomic(): model, created = self.update_or_create_sql_object(doc) if not dry_run: logger.info("{}{} model for doc with key {}".format(log_prefix, "Created" if created else "Updated", self.doc_key(doc))) model.save() elif created: model.delete()
def handle(self, doc_type, **options): attrs = options.get('attrs', []) db = couch_config.get_db(options.get('db', None)) blank_counts = defaultdict(lambda: 0) max_lengths = defaultdict(lambda: 0) print("Found {} {} docs\n".format(get_doc_count_by_type(db, doc_type), doc_type)) docs = get_all_docs_with_doc_types(db, [doc_type]) for doc in docs: for attr in attrs: if doc.get(attr): max_lengths[attr] = max(len(doc[attr]), max_lengths[attr]) else: blank_counts[attr] += 1 for attr in attrs: print("{} is {} blank and has max length of {}".format( attr, 'sometimes' if blank_counts[attr] else 'never', max_lengths[attr]))
def get_all_forms_in_all_domains(): return [ XFormInstance.wrap(doc) for doc in get_all_docs_with_doc_types(XFormInstance.get_db(), ['XFormInstance']) ]
def _get_all_couch_docs_for_model(self): return get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()])
def get_all_forms_in_all_domains(): return [ XFormInstance.wrap(doc) for doc in get_all_docs_with_doc_types( XFormInstance.get_db(), ['XFormInstance']) ]
def delete_all_groups(): all_groups = list( get_all_docs_with_doc_types(Group.get_db(), ['Group', 'Group-Deleted'])) Group.get_db().delete_docs(all_groups)
def get_all_hq_group_export_configs(): from corehq.apps.reports.models import HQGroupExportConfiguration return imap( HQGroupExportConfiguration.wrap, get_all_docs_with_doc_types(HQGroupExportConfiguration.get_db(), ('HQGroupExportConfiguration', )))
def tearDown(self): SQLCommtrackConfig.objects.all().delete() for doc in get_all_docs_with_doc_types(self.db, ['CommtrackConfig']): CommtrackConfig.wrap(doc).delete() super().tearDown()
def delete_all_groups(): all_groups = list(get_all_docs_with_doc_types(Group.get_db(), ['Group'])) Group.get_db().delete_docs(all_groups)
def tearDown(self): SQLCustomDataFieldsDefinition.objects.all().delete() for doc in get_all_docs_with_doc_types(self.db, ['CustomDataFieldsDefinition']): CustomDataFieldsDefinition.wrap(doc).delete() super().tearDown()
def tearDown(self): SQLMobileAuthKeyRecord.objects.all().delete() for doc in get_all_docs_with_doc_types(self.db, ['MobileAuthKeyRecord']): MobileAuthKeyRecord.wrap(doc).delete() super().tearDown()