def run(self): for doc, count in iter(self.queue.get, None): try: if self.exclude_types and doc["doc_type"] in self.exclude_types: print " SKIPPED (excluded type: %s). Synced %s/%s docs (%s: %s)" % \ (doc["doc_type"], count, self.total, doc["doc_type"], doc["_id"]) else: if not self.simulate: for i in reversed(range(5)): try: dt = DocumentTransform(doc, self.sourcedb, self.exclude_attachments) break except RequestError as r: if i == 0: _soft_assert(False, 'Copy domain failed after 5 tries with {}'.format(r)) raise for i in reversed(range(5)): try: save(dt, self.targetdb) break except (ResourceConflict, ParserError, TypeError) as e: if i == 0: _soft_assert(False, 'Copy domain failed after 5 tries with {}'.format(e)) raise print " Synced %s/%s docs (%s: %s)" % (count, self.total, doc["doc_type"], doc["_id"]) except Exception, e: self.err_log.write('%s\n' % doc["_id"]) print " Document %s failed! Error is: %s %s" % (doc["_id"], e.__class__.__name__, e)
def copy_domain(self, sourcedb, domain): print "Copying domain doc" result = sourcedb.view("domain/domains", key=domain, reduce=False, include_docs=True).first() if result and "doc" in result: domain_doc = Domain.wrap(result["doc"]) dt = DocumentTransform(domain_doc, sourcedb) save(dt, self.targetdb) else: print "Domain doc not found for domain %s." % domain
def handle(self, *args, **options): raise CommandError( 'copy_group_data is currently broken. ' 'Ask Danny or Ethan to fix it along the lines of ' 'https://github.com/dimagi/commcare-hq/pull/9180/files#diff-9d976dc051a36a028c6604581dfbce5dR95' ) if len(args) != 2: raise CommandError('Usage is copy_group_data %s' % self.args) sourcedb = Database(args[0]) group_id = args[1] exclude_user_owned = options["exclude_user_owned"] print 'getting group' group = Group.wrap(sourcedb.get(group_id)) group.save(force_update=True) print 'getting domain' domain = Domain.wrap( sourcedb.view('domain/domains', key=group.domain, include_docs=True, reduce=False, limit=1).one()['doc']) dt = DocumentTransform(domain._obj, sourcedb) save(dt, Domain.get_db()) owners = [group_id] if not exclude_user_owned: owners.extend(group.users) print 'getting case ids' with OverrideDB(CommCareCase, sourcedb): case_ids = get_case_ids_in_domain_by_owner(domain.name, owner_id__in=owners) xform_ids = set() print 'copying %s cases' % len(case_ids) for i, subset in enumerate(chunked(case_ids, CHUNK_SIZE)): print i * CHUNK_SIZE cases = [ CommCareCase.wrap(case['doc']) for case in sourcedb.all_docs( keys=list(subset), include_docs=True, ) ] for case in cases:
def handle(self, *args, **options): if len(args) < 2 or len(args) > 3: raise CommandError('Usage is copy_doc %s' % self.args) sourcedb = Database(args[0]) app_id = args[1] domain = args[2] if len(args) == 3 else None app_json = sourcedb.get(app_id) if domain: app_json['domain'] = domain dt = DocumentTransform(app_json, sourcedb) save(dt, get_db())
def run(self): for doc, count in iter(self.queue.get, None): try: if self.exclude_types and doc["doc_type"] in self.exclude_types: print " SKIPPED (excluded type: %s). Synced %s/%s docs (%s: %s)" % \ (doc["doc_type"], count, self.total, doc["doc_type"], doc["_id"]) else: if not self.simulate: dt = DocumentTransform(doc, self.sourcedb) save(dt, self.targetdb) print " Synced %s/%s docs (%s: %s)" % (count, self.total, doc["doc_type"], doc["_id"]) except Exception, e: print " Document %s failed! Error is: %s" % (doc["_id"], e)
def copy_domain(self, sourcedb, domain): print "Copying domain doc" result = sourcedb.view("domain/domains", key=domain, reduce=False, include_docs=True).first() if result and 'doc' in result: domain_doc = Domain.wrap(result['doc']) dt = DocumentTransform(domain_doc, sourcedb) save(dt, self.targetdb) else: print "Domain doc not found for domain %s." % domain
def copy_domain(self, source_couch, domain): print "Copying domain doc" sourcedb = source_couch.get_db_for_class(Domain) result = sourcedb.view("domain/domains", key=domain, reduce=False, include_docs=True).first() if result and 'doc' in result: domain_doc = Domain.wrap(result['doc']) dt = DocumentTransform(domain_doc._obj, sourcedb) save(dt, self.targetdb.get_db_for_doc_type(domain_doc['doc_type'])) else: print "Domain doc not found for domain %s." % domain
def copy_doc(doc, count, sourcedb, target_couch, exclude_types, total, simulate, exclude_attachments): if exclude_types and doc["doc_type"] in exclude_types: print(" SKIPPED (excluded type: %s). Synced %s/%s docs (%s: %s)" % \ (doc["doc_type"], count, total, doc["doc_type"], doc["_id"])) else: if not simulate: dt = DocumentTransform(doc, sourcedb, exclude_attachments) for i in reversed(list(range(5))): try: save(dt, target_couch) break except (ResourceConflict, ParserError, TypeError): if i == 0: raise print(" Synced %s/%s docs (%s: %s)" % (count, total, doc["doc_type"], doc["_id"]))
def copy_doc(doc, count, sourcedb, target_couch, exclude_types, total, simulate, exclude_attachments): if exclude_types and doc["doc_type"] in exclude_types: print(" SKIPPED (excluded type: %s). Synced %s/%s docs (%s: %s)" % \ (doc["doc_type"], count, total, doc["doc_type"], doc["_id"])) else: if not simulate: dt = DocumentTransform(doc, sourcedb, exclude_attachments) for i in reversed(list(range(5))): try: save(dt, target_couch) break except (ResourceConflict, TypeError): if i == 0: raise print(" Synced %s/%s docs (%s: %s)" % (count, total, doc["doc_type"], doc["_id"]))
def copy_domain(self, source_couch, domain): print "Copying domain doc" sourcedb = source_couch.get_db_for_class(Domain) result = sourcedb.view( "domain/domains", key=domain, reduce=False, include_docs=True ).first() if result and 'doc' in result: domain_doc = Domain.wrap(result['doc']) dt = DocumentTransform(domain_doc._obj, sourcedb) save(dt, self.targetdb.get_db_for_doc_type(domain_doc['doc_type'])) else: print "Domain doc not found for domain %s." % domain
def handle(self, sourcedb, doc_ids_or_file, domain, **options): sourcedb = Database(sourcedb) if os.path.isfile(doc_ids_or_file): with open(doc_ids_or_file) as f: doc_ids = f.read().splitlines() else: doc_ids = doc_ids_or_file.split(',') print("Starting copy of {} docs".format(len(doc_ids))) for doc_id in doc_ids: print('Copying doc: {}'.format(doc_id)) doc_json = sourcedb.get(doc_id) if domain: doc_json['domain'] = domain dt = DocumentTransform(doc_json, sourcedb) save(dt, get_db())
def handle(self, *args, **options): if len(args) != 2: raise CommandError('Usage is copy_group_data %s' % self.args) sourcedb = Database(args[0]) group_id = args[1] exclude_user_owned = options["exclude_user_owned"] print 'getting group' group = Group.wrap(sourcedb.get(group_id)) group.save(force_update=True) print 'getting domain' domain = Domain.wrap( sourcedb.view('domain/domains', key=group.domain, include_docs=True, reduce=False, limit=1).one()['doc'] ) dt = DocumentTransform(domain._obj, sourcedb) save(dt, Domain.get_db()) owners = [group_id] if not exclude_user_owned: owners.extend(group.users) print 'getting case ids' with OverrideDB(CommCareCase, sourcedb): case_ids = get_case_ids_in_domain_by_owner( domain.name, owner_id__in=owners) xform_ids = set() print 'copying %s cases' % len(case_ids) for i, subset in enumerate(chunked(case_ids, CHUNK_SIZE)): print i * CHUNK_SIZE cases = [CommCareCase.wrap(case['doc']) for case in sourcedb.all_docs( keys=list(subset), include_docs=True, )] for case in cases:
def copy_docs(self, sourcedb, domain, startkey, endkey, simulate, type=None, since=None): doc_ids = [result["id"] for result in sourcedb.view("domain/docs", startkey=startkey, endkey=endkey, reduce=False)] total = len(doc_ids) count = 0 targetdb = get_db() msg = "Found %s matching documents in domain: %s" % (total, domain) msg += " of type: %s" % (type) if type else "" msg += " since: %s" % (since) if since else "" print msg for doc in iter_docs(sourcedb, doc_ids): try: count += 1 if not simulate: dt = DocumentTransform(doc, sourcedb) save(dt, targetdb) print " Synced %s/%s docs (%s: %s)" % (count, total, doc["doc_type"], doc["_id"]) except Exception, e: print " Document %s failed! Error is: %s" % (doc["_id"], e)
def handle(self, *args, **options): if len(args) < 2 or len(args) > 3: raise CommandError('Usage is copy_doc %s' % self.args) sourcedb = Database(args[0]) doc_ids_or_file = args[1] domain = args[2] if len(args) == 3 else None if os.path.isfile(doc_ids_or_file): with open(doc_ids_or_file) as f: doc_ids = f.read().splitlines() else: doc_ids = doc_ids_or_file.split(',') print "Starting copy of {} docs".format(len(doc_ids)) for doc_id in doc_ids: print 'Copying doc: {}'.format(doc_id) doc_json = sourcedb.get(doc_id) if domain: doc_json['domain'] = domain dt = DocumentTransform(doc_json, sourcedb) save(dt, get_db())
def handle(self, *args, **options): if len(args) < 2 or len(args) > 3: raise CommandError('Usage is copy_doc %s' % self.args) sourcedb = Database(args[0]) doc_ids_or_file = args[1] domain = args[2] if len(args) == 3 else None if os.path.isfile(doc_ids_or_file): with open(doc_ids_or_file) as f: doc_ids = f.read().splitlines() else: doc_ids = doc_ids_or_file.split(',') print("Starting copy of {} docs".format(len(doc_ids))) for doc_id in doc_ids: print('Copying doc: {}'.format(doc_id)) doc_json = sourcedb.get(doc_id) if domain: doc_json['domain'] = domain dt = DocumentTransform(doc_json, sourcedb) save(dt, get_db())
def copy_doc(doc, count, sourcedb, targetdb, exclude_types, total, simulate, exclude_attachments): if exclude_types and doc["doc_type"] in exclude_types: print " SKIPPED (excluded type: %s). Synced %s/%s docs (%s: %s)" % \ (doc["doc_type"], count, total, doc["doc_type"], doc["_id"]) else: if not simulate: for i in reversed(range(5)): try: dt = DocumentTransform(doc, sourcedb, exclude_attachments) break except RequestError as r: if i == 0: _soft_assert(False, 'Copy domain failed after 5 tries with {}'.format(r)) raise for i in reversed(range(5)): try: save(dt, targetdb) break except (ResourceConflict, ParserError, TypeError) as e: if i == 0: _soft_assert(False, 'Copy domain failed after 5 tries with {}'.format(e)) raise print " Synced %s/%s docs (%s: %s)" % (count, total, doc["doc_type"], doc["_id"])
def handle(self, *args, **options): if len(args) != 2: raise CommandError('Usage is copy_domain %s' % self.args) sourcedb = Database(args[0]) domain = args[1].strip() all_docs = sourcedb.view("domain/docs", startkey=[domain], endkey=[domain, {}], reduce=False) total = len(all_docs) count = 0 targetdb = get_db() print "found %s matching documents in domain: %s" % (total, domain) for row in all_docs: try: count += 1 dt = DocumentTransform(sourcedb.get(row["id"]), sourcedb) save(dt, targetdb) print "Synced %s/%s docs (%s: %s)" % (count, total, row["key"][1], row["id"]) except Exception, e: print "Document %s failed! Error is: %s" % (row["id"], e)
def handle(self, *args, **options): raise CommandError( 'copy_group_data is currently broken. ' 'Ask Danny or Ethan to fix it along the lines of ' 'https://github.com/dimagi/commcare-hq/pull/9180/files#diff-9d976dc051a36a028c6604581dfbce5dR95' ) if len(args) != 2: raise CommandError('Usage is copy_group_data %s' % self.args) sourcedb = Database(args[0]) group_id = args[1] exclude_user_owned = options["exclude_user_owned"] print 'getting group' group = Group.wrap(sourcedb.get(group_id)) group.save(force_update=True) print 'getting domain' domain = Domain.wrap( sourcedb.view('domain/domains', key=group.domain, include_docs=True, reduce=False, limit=1).one()['doc'] ) dt = DocumentTransform(domain._obj, sourcedb) save(dt, Domain.get_db()) owners = [group_id] if not exclude_user_owned: owners.extend(group.users) print 'getting case ids' with OverrideDB(CommCareCase, sourcedb): case_ids = get_case_ids_in_domain_by_owner( domain.name, owner_id__in=owners) xform_ids = set() print 'copying %s cases' % len(case_ids) for i, subset in enumerate(chunked(case_ids, CHUNK_SIZE)): print i * CHUNK_SIZE cases = [CommCareCase.wrap(case['doc']) for case in sourcedb.all_docs( keys=list(subset), include_docs=True, )] for case in cases: xform_ids.update(case.xform_ids) self.lenient_bulk_save(CommCareCase, cases) if not exclude_user_owned: # also grab submissions that may not have included any case data for user_id in group.users: xform_ids.update(res['id'] for res in sourcedb.view( 'all_forms/view', startkey=['submission user', domain.name, user_id], endkey=['submission user', domain.name, user_id, {}], reduce=False )) print 'copying %s xforms' % len(xform_ids) user_ids = set(group.users) def form_wrapper(row): doc = row['doc'] doc.pop('_attachments', None) doc.pop('external_blobs', None) return XFormInstance.wrap(doc) for i, subset in enumerate(chunked(xform_ids, CHUNK_SIZE)): print i * CHUNK_SIZE xforms = sourcedb.all_docs( keys=list(subset), include_docs=True, wrapper=form_wrapper, ).all() self.lenient_bulk_save(XFormInstance, xforms) for xform in xforms: user_id = xform.metadata.userID user_ids.add(user_id) print 'copying %s users' % len(user_ids) def wrap_user(row): try: doc = row['doc'] except KeyError: logging.exception('trouble with user result %r' % row) return None try: return CouchUser.wrap_correctly(doc) except Exception: logging.exception('trouble with user %s' % doc['_id']) return None users = sourcedb.all_docs( keys=list(user_ids), include_docs=True, wrapper=wrap_user, ).all() role_ids = set([]) for user in filter(lambda u: u is not None, users): # if we use bulk save, django user doesn't get sync'd domain_membership = user.get_domain_membership(domain.name) if domain_membership and domain_membership.role_id: role_ids.add(user.domain_membership.role_id) user.save(force_update=True) print 'copying %s roles' % len(role_ids) for i, subset in enumerate(chunked(role_ids, CHUNK_SIZE)): roles = [UserRole.wrap(role['doc']) for role in sourcedb.all_docs( keys=list(subset), include_docs=True, )] self.lenient_bulk_save(UserRole, roles) if options['include_sync_logs']: print 'copying sync logs' for user_id in user_ids: log_ids = [res['id'] for res in sourcedb.view("phone/sync_logs_by_user", startkey=[user_id, {}], endkey=[user_id], descending=True, reduce=False, include_docs=True )] print 'user: %s, logs: %s' % (user_id, len(log_ids)) for i, subset in enumerate(chunked(log_ids, CHUNK_SIZE)): print i * CHUNK_SIZE logs = [SyncLog.wrap(log['doc']) for log in sourcedb.all_docs( keys=list(subset), include_docs=True, )] self.lenient_bulk_save(SyncLog, logs)
def handle(self, *args, **options): if len(args) != 2: raise CommandError('Usage is copy_group_data %s' % self.args) sourcedb = Database(args[0]) group_id = args[1] exclude_user_owned = options["exclude_user_owned"] print 'getting group' group = Group.wrap(sourcedb.get(group_id)) group.save(force_update=True) print 'getting domain' domain = Domain.wrap( sourcedb.view('domain/domains', key=group.domain, include_docs=True, reduce=False, limit=1).one()['doc'] ) dt = DocumentTransform(domain._obj, sourcedb) save(dt, Domain.get_db()) owners = [group_id] if not exclude_user_owned: owners.extend(group.users) def keys_for_owner(domain, owner_id): return [ [domain, owner_id, False], [domain, owner_id, True], ] def get_case_ids(owners): keys = list(itertools.chain(*[keys_for_owner(domain.name, owner_id) for owner_id in owners])) results = sourcedb.view( 'hqcase/by_owner', keys=keys, reduce=False, include_docs=False, ) return [res['id'] for res in results] print 'getting case ids' case_ids = get_case_ids(owners) xform_ids = set() print 'copying %s cases' % len(case_ids) for i, subset in enumerate(chunked(case_ids, CHUNK_SIZE)): print i * CHUNK_SIZE cases = [CommCareCase.wrap(case['doc']) for case in sourcedb.all_docs( keys=list(subset), include_docs=True, )] for case in cases: xform_ids.update(case.xform_ids) self.lenient_bulk_save(CommCareCase, cases) if not exclude_user_owned: # also grab submissions that may not have included any case data for user_id in group.users: xform_ids.update(res['id'] for res in sourcedb.view( 'reports_forms/all_forms', startkey=['submission user', domain.name, user_id], endkey=['submission user', domain.name, user_id, {}], reduce=False )) print 'copying %s xforms' % len(xform_ids) user_ids = set(group.users) def form_wrapper(row): doc = row['doc'] doc.pop('_attachments', None) return XFormInstance.wrap(doc) for i, subset in enumerate(chunked(xform_ids, CHUNK_SIZE)): print i * CHUNK_SIZE xforms = sourcedb.all_docs( keys=list(subset), include_docs=True, wrapper=form_wrapper, ).all() self.lenient_bulk_save(XFormInstance, xforms) for xform in xforms: user_id = xform.metadata.userID user_ids.add(user_id) print 'copying %s users' % len(user_ids) def wrap_user(row): try: doc = row['doc'] except KeyError: logging.exception('trouble with user result %r' % row) return None try: return CouchUser.wrap_correctly(doc) except Exception: logging.exception('trouble with user %s' % doc['_id']) return None users = sourcedb.all_docs( keys=list(user_ids), include_docs=True, wrapper=wrap_user, ).all() role_ids = set([]) for user in filter(lambda u: u is not None, users): # if we use bulk save, django user doesn't get sync'd if user.get_domain_membership(domain.name).role_id: role_ids.add(user.domain_membership.role_id) user.save(force_update=True) print 'copying %s roles' % len(role_ids) for i, subset in enumerate(chunked(role_ids, CHUNK_SIZE)): roles = [UserRole.wrap(role['doc']) for role in sourcedb.all_docs( keys=list(subset), include_docs=True, )] self.lenient_bulk_save(UserRole, roles) if options['include_sync_logs']: print 'copying sync logs' for user_id in user_ids: log_ids = [res['id'] for res in sourcedb.view("phone/sync_logs_by_user", startkey=[user_id, {}], endkey=[user_id], descending=True, reduce=False, include_docs=True )] print 'user: %s, logs: %s' % (user_id, len(log_ids)) for i, subset in enumerate(chunked(log_ids, CHUNK_SIZE)): print i * CHUNK_SIZE logs = [SyncLog.wrap(log['doc']) for log in sourcedb.all_docs( keys=list(subset), include_docs=True, )] self.lenient_bulk_save(SyncLog, logs)