def setUp(self): for form in get_docs([DOMAIN, "http://www.commcarehq.org/export/test"]): XFormInstance.wrap(form).delete() dom = create_domain(DOMAIN) self.couch_user = WebUser.create(None, "test", "foobar") self.couch_user.add_domain_membership(DOMAIN, is_admin=True) self.couch_user.save()
def _get_new_form_json(xml, xform_id): form_json = convert_xform_to_json(xml) with force_phone_timezones_should_be_processed(): adjust_datetimes(form_json) # this is actually in-place because of how jsonobject works scrub_meta(XFormInstance.wrap({'form': form_json, '_id': xform_id})) return form_json
def deidentify_form(doctransform): assert(doctransform.doc["doc_type"] == "XFormInstance") form = XFormInstance.wrap(doctransform.doc) xml = doctransform.attachments.get("form.xml", "") if form.xmlns in FORM_CONFIG: rootxml = etree.XML(xml) for proppath, generatorfunc in FORM_CONFIG[form.xmlns].items(): parts = proppath.split("/") node = form.form xmlnode = rootxml for i, p in enumerate(parts): if p in node: xml_index = "{%(ns)s}%(val)s" % {"ns": form.xmlns, "val": p} if i == len(parts) - 1: # override prop on the last step val = str(generatorfunc()) node[p] = val xmlnode.find(xml_index).text = val else: # or drill in node = node[p] # NOTE: currently will not work with repeated nodes xmlnode = xmlnode.find(xml_index) else: # no index to the property, so assume we don't # need to touch it break doctransform.doc = form._doc doctransform.attachments["form.xml"] = etree.tostring(rootxml) return doctransform else: # if we didn't know how to deidentify it, we don't want # to return anything, to prevent potentially identified # data from sneaking in return None
def _fix_replacement_form_problem_in_couch(doc): """Fix replacement form created by swap_duplicate_xforms The replacement form was incorrectly created with "problem" text, which causes it to be counted as an error form, and that messes up the diff counts at the end of this migration. NOTE the replacement form's _id does not match instanceID in its form.xml. That issue is not resolved here. See: - corehq/apps/cleanup/management/commands/swap_duplicate_xforms.py - couchforms/_design/views/all_submissions_by_domain/map.js """ problem = doc["problem"] assert problem.startswith(PROBLEM_TEMPLATE_START), doc assert doc["doc_type"] == "XFormInstance", doc deprecated_id = problem[len(PROBLEM_TEMPLATE_START):].split(" on ", 1)[0] form = XFormInstance.wrap(doc) form.deprecated_form_id = deprecated_id form.history.append( XFormOperation( user="******", date=datetime.utcnow(), operation="Resolved bad duplicate form during couch-to-sql " "migration. Original problem: %s" % problem, )) form.problem = None old_form = XFormInstance.get(deprecated_id) if old_form.initial_processing_complete and not form.initial_processing_complete: form.initial_processing_complete = True form.save() return form.to_json()
def swap_doc_types(self, log_file, bad_xform_id, duplicate_xform_id, domain, dry_run): bad_xform = XFormInstance.get(bad_xform_id) # confirm that the doc hasn't already been fixed: bad_xform_problem = None try: bad_xform_problem = bad_xform.problem or "" except AttributeError: pass if bad_xform_problem: if re.match(PROBLEM_TEMPLATE_START, bad_xform_problem): self.log_already_fixed(log_file, bad_xform_id, domain) return duplicate_xform = XFormInstance.get(duplicate_xform_id) now = datetime.now().isoformat() # Convert the XFormInstance to an XFormDuplicate bad_xform.doc_type = XFormDuplicate.__name__ bad_xform.problem = BAD_FORM_PROBLEM_TEMPLATE.format(duplicate_xform_id, now) bad_xform = XFormDuplicate.wrap(bad_xform.to_json()) # Convert the XFormDuplicate to an XFormInstance duplicate_xform.problem = FIXED_FORM_PROBLEM_TEMPLATE.format( id_=bad_xform_id, datetime_=now ) duplicate_xform.doc_type = XFormInstance.__name__ duplicate_xform = XFormInstance.wrap(duplicate_xform.to_json()) self.log_swap(log_file, bad_xform_id, domain, duplicate_xform_id, dry_run) if not dry_run: duplicate_xform.save() bad_xform.save()
def _get_submissions_for_patient_by_date(patient, visit_dates, schema='http://dev.commcarehq.org/pact/dots_form'): """Argument: Patient django object, visit date Will return a view result of all submissions by patient where the key is the patient pact_id return value: [pact_id, year, month, day]=>submission""" keys = [] date_key_map = {} #t2 = datetime.now() for visit_date in visit_dates: day_of_week = visit_date.isoweekday()-1 yearstart = visit_date.year monthstart = visit_date.month datestart = visit_date.day #get the xform count for that day key = [patient.couchdoc.pact_id, yearstart, monthstart, datestart, schema] keys.append(key) key_str = ''.join([str(x) for x in key]) date_key_map[key_str] = visit_date submit_reduction = XFormInstance.view('pactcarehq/all_submits_by_patient_date', keys=keys) #d2 = datetime.now()-t2 #print "\tSingle Patient data query QUERY: %d.%d" % (d2.seconds, d2.microseconds/1000) #t3 = datetime.now() ret = {} #a return value of date ordered submissions by for row in submit_reduction: key = row['key'] key_str = ''.join([str(x) for x in key]) submits = row['value'] date = date_key_map[key_str] ret[date] = [XFormInstance.wrap(x) for x in submits] #d3 = datetime.now()-t3 #print "\tSingle Patient data query HASHING: %d.%d" % (d3.seconds, d3.microseconds/1000) return ret
def rows(self): domain = self.report.filter_values['domain'] startdate = self.report.filter_values['startdate'] enddate = self.report.filter_values['enddate'] key_base = 'submission xmlns user' # todo this will do one couch view hit per relevant user. could be optimized to sql or something if desired user_ids = self.report.get_user_ids() rows = [] for user in user_ids: last_submission = XFormInstance.get_db().view( 'reports_forms/all_forms', startkey=[key_base, domain, self.xmlns, user, enddate], endkey=[key_base, domain, self.xmlns, user, startdate], limit=1, reduce=False, include_docs=True, descending=True, ).one() if last_submission: wrapped = XFormInstance.wrap(last_submission['doc']) user_row = [wrapped.xpath(path) for path in self.column_slugs] else: user_row = [NO_VALUE] * len(self.column_slugs) rows.append((user, user_row)) # format formatted_rows = list(self.report.format.format_output(rows)) # transpose return [[_(col)] + [r[i] for r in formatted_rows] for i, col in enumerate(self.column_slugs)]
def handle(self, *args, **options): if len(args) < 2: print "please specify a filepath and an archiving_user" return filepath = args[0] archiving_user = args[1] try: form_ids = open(filepath) except Exception as e: print "there was an issue opening the file: %s" % e return try: form_ids = [f[0] for f in csv.reader(form_ids)] except Exception as e: print "there was an issue reading the file %s" % e return for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): try: xform = XFormInstance.wrap(xform_doc) xform.archive(user_id=archiving_user) print "Archived form %s in domain %s" % ( xform._id, xform.domain ) except Exception as e: print "Issue archiving XFORM %s for domain %s: %s" % ( xform_doc['_id'], xform_doc['domain'], e )
def handle(self, *args, **options): if len(args) < 2: print "please specify a filepath and an archiving_user" return filepath = args[0] archiving_user = args[1] try: form_ids = open(filepath) except Exception as e: print "there was an issue opening the file: %s" % e return try: form_ids = [f[0] for f in csv.reader(form_ids)] except Exception as e: print "there was an issue reading the file %s" % e return for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): try: xform = XFormInstance.wrap(xform_doc) xform.archive(user=archiving_user) print "Archived form %s in domain %s" % (xform._id, xform.domain) except Exception as e: print "Issue archiving XFORM %s for domain %s: %s" % ( xform_doc['_id'], xform_doc['domain'], e)
def archive_forms(domain, user, uploaded_data): response = {"errors": [], "success": []} form_ids = [row.get("form_id") for row in uploaded_data] missing_forms = set(form_ids) for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): xform = XFormInstance.wrap(xform_doc) missing_forms.discard(xform["_id"]) if xform["domain"] != domain: response["errors"].append( _(u"XFORM {form_id} does not belong to domain {domain}").format( form_id=xform["_id"], domain=xform["domain"] ) ) continue xform_string = _(u"XFORM {form_id} for domain {domain} by user '{username}'").format( form_id=xform["_id"], domain=xform["domain"], username=user.username ) try: xform.archive(user=user.username) response["success"].append(_(u"Successfully archived {form}").format(form=xform_string)) except Exception as e: response["errors"].append(_(u"Could not archive {form}: {error}").format(form=xform_string, error=e)) for missing_form_id in missing_forms: response["errors"].append(_(u"Could not find XForm {form_id}").format(form_id=missing_form_id)) return response
def test_normal_form_with_problem_and_case_updates(self): bad_form = submit_form_locally(TEST_FORM, self.domain_name).xform assert bad_form._id == "test-form", bad_form form = XFormInstance.wrap(bad_form.to_json()) form._id = "new-form" form._rev = None form.problem = FIXED_FORM_PROBLEM_TEMPLATE.format( id_="test-form", datetime_="a day long ago") assert len(form.external_blobs) == 1, form.external_blobs form.external_blobs.pop("form.xml") with bad_form.fetch_attachment("form.xml", stream=True) as xml: form.put_attachment(xml, "form.xml", content_type="text/xml") form.save() bad_form.doc_type = "XFormDuplicate" bad_form.problem = BAD_FORM_PROBLEM_TEMPLATE.format( "new-form", "a day long ago") bad_form.save() case = self._get_case("test-case") self.assertEqual(case.xform_ids, ["test-form"]) self._do_migration_and_assert_flags(self.domain_name) case = self._get_case("test-case") self.assertEqual(case.xform_ids, ["new-form"]) self._compare_diffs([]) form = FormAccessors(self.domain_name).get_form('new-form') self.assertEqual(form.deprecated_form_id, "test-form") self.assertIsNone(form.problem)
def rows(self): domain = self.report.filter_values['domain'] startdate = self.report.filter_values['startdate'] enddate = self.report.filter_values['enddate'] key_base = 'submission xmlns user' # todo this will do one couch view hit per relevant user. could be optimized to sql or something if desired user_ids = self.report.get_user_ids() rows = [] for user in user_ids: last_submission = XFormInstance.get_db().view('all_forms/view', startkey=[key_base, domain, self.xmlns, user, enddate], endkey=[key_base, domain, self.xmlns, user, startdate], limit=1, reduce=False, include_docs=True, descending=True, ).one() if last_submission: wrapped = XFormInstance.wrap(last_submission['doc']) user_row = [wrapped.get_data(path) for path in self.column_slugs] else: user_row = [NO_VALUE] * len(self.column_slugs) rows.append((user, user_row)) # format formatted_rows = list(self.report.format.format_output(rows)) # transpose return [[_(col)] + [r[i] for r in formatted_rows] for i, col in enumerate(self.column_slugs)]
def _save_form_and_case(test): form = XFormInstance.wrap(_get_doc_data('bug_form.json')) form.save() test.addCleanup(form.delete) case = CommCareCase.wrap(_get_doc_data('bug_case.json')) case.save() test.addCleanup(case.delete) return form, case
def _process_main_forms(self): last_received_on = datetime.min # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000, run_timestamp=self.run_timestamp) pool = Pool(15) self._rebuild_queues(pool) # process main forms (including cases and ledgers) changes = self._get_resumable_iterator(['XFormInstance'], 'main_forms') # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000) for change in self._with_progress(['XFormInstance'], changes): log.debug('Processing doc: {}({})'.format('XFormInstance', change.id)) form = change.get_document() if form.get('problem'): if six.text_type( form['problem']).startswith(PROBLEM_TEMPLATE_START): form = _fix_replacement_form_problem_in_couch(form) else: self.errors_with_normal_doc_type.append(change.id) continue try: wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received self._try_to_process_form(wrapped_form, pool) self._try_to_process_queues(pool) except Exception: log.exception("Error migrating form %s", change.id) # finish up the queues once all changes have been iterated through update_interval = timedelta(seconds=10) next_check = datetime.now() while self.queues.has_next(): wrapped_form = self.queues.get_next() if wrapped_form: pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) else: gevent.sleep(0.01) # swap greenlets remaining_items = self.queues.remaining_items + len(pool) now = datetime.now() if now > next_check: log.info('Waiting on {} docs'.format(remaining_items)) next_check += update_interval while not pool.join(timeout=10): log.info('Waiting on {} docs'.format(len(pool))) self._log_main_forms_processed_count()
def forms_by_case(self): assert self.cases is not None, "SharedDataProvider was not instantiated with cases" all_form_ids = chain(*(case.xform_ids for case in self.cases)) forms_by_case = defaultdict(list) for form in iter_docs(XFormInstance.get_db(), all_form_ids): if form["xmlns"] in OPM_XMLNSs: case_id = form["form"]["case"]["@case_id"] forms_by_case[case_id].append(XFormInstance.wrap(form)) return forms_by_case
def test_error_with_normal_doc_type_migration(self): submit_form_locally( """<data xmlns="example.com/foo"> <meta> <instanceID>im-a-bad-form</instanceID> </meta> <case case_id="" xmlns="http://commcarehq.org/case/transaction/v2"> <update><foo>bar</foo></update> </case> </data>""", self.domain_name, ) form = FormAccessors(self.domain_name).get_form('im-a-bad-form') form_json = form.to_json() form_json['doc_type'] = 'XFormInstance' XFormInstance.wrap(form_json).save() self._do_migration_and_assert_flags(self.domain_name) self.assertEqual(1, len(self._get_form_ids('XFormError'))) self._compare_diffs([])
def forms_by_case(self): assert self.cases is not None, \ "SharedDataProvider was not instantiated with cases" all_form_ids = chain(*(case.xform_ids for case in self.cases)) forms_by_case = defaultdict(list) for form in iter_docs(XFormInstance.get_db(), all_form_ids): if form['xmlns'] in OPM_XMLNSs: case_id = form['form']['case']['@case_id'] forms_by_case[case_id].append(XFormInstance.wrap(form)) return forms_by_case
def _process_main_forms(domain): last_received_on = datetime.min # process main forms (including cases and ledgers) for change in _get_main_form_iterator(domain).iter_all_changes(): form = change.get_document() wrapped_form = XFormInstance.wrap(form) form_received = iso_string_to_datetime(form['received_on']) assert last_received_on <= form_received last_received_on = form_received print 'processing form {}: {}'.format(form['_id'], form_received) _migrate_form_and_associated_models(domain, wrapped_form)
def _process_main_forms(self): last_received_on = datetime.min # process main forms (including cases and ledgers) changes = _get_main_form_iterator(self.domain).iter_all_changes() # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000) pool = Pool(15) for change in self._with_progress(['XFormInstance'], changes): self.log_debug('Processing doc: {}({})'.format( 'XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received case_ids = get_case_ids_from_form(wrapped_form) if case_ids: # if this form involves a case check if we can process it if self.queues.try_obj(case_ids, wrapped_form): pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) elif self.queues.full: sleep(0.01) # swap greenlets else: # if not, just go ahead and process it pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) # regularly check if we can empty the queues while True: new_wrapped_form = self.queues.get_next() if not new_wrapped_form: break pool.spawn(self._migrate_form_and_associated_models_async, new_wrapped_form) # finish up the queues once all changes have been iterated through while self.queues.has_next(): wrapped_form = self.queues.get_next() if wrapped_form: pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) else: sleep(0.01) # swap greenlets remaining_items = self.queues.remaining_items + len(pool) if remaining_items % 10 == 0: self.log_info('Waiting on {} docs'.format(remaining_items)) while not pool.join(timeout=10): self.log_info('Waiting on {} docs'.format(len(pool)))
def _compare_forms(self, actual_json, expected_json, msg): expected_json.update({ 'domain': self.domain, 'received_on': actual_json['received_on'], '_rev': actual_json['_rev'], 'initial_processing_complete': True, '#export_tag': actual_json['#export_tag'], 'auth_context': actual_json['auth_context'], }) expected_json = XFormInstance.wrap(expected_json).to_json() self.assertDictEqual(actual_json, expected_json, msg)
def _get_form_or_404(id): # maybe this should be a more general utility a-la-django's get_object_or_404 try: xform_json = XFormInstance.get_db().get(id) except ResourceNotFound: raise Http404() if xform_json.get('doc_type') not in ('XFormInstance',): raise Http404() return XFormInstance.wrap(xform_json)
def archive_or_restore_forms(domain, user, form_ids, archive_or_restore, task=None, from_excel=False): response = { 'errors': [], 'success': [], } missing_forms = set(form_ids) success_count = 0 if task: DownloadBase.set_progress(task, 0, len(form_ids)) for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): xform = XFormInstance.wrap(xform_doc) missing_forms.discard(xform['_id']) if xform['domain'] != domain: response['errors'].append(_(u"XFORM {form_id} does not belong to domain {domain}").format( form_id=xform['_id'], domain=xform['domain'])) continue xform_string = _(u"XFORM {form_id} for domain {domain} by user '{username}'").format( form_id=xform['_id'], domain=xform['domain'], username=user.username) try: if archive_or_restore.is_archive_mode(): xform.archive(user_id=user.username) message = _(u"Successfully archived {form}").format(form=xform_string) else: xform.unarchive(user_id=user.username) message = _(u"Successfully unarchived {form}").format(form=xform_string) response['success'].append(message) success_count = success_count + 1 except Exception as e: response['errors'].append(_(u"Could not archive {form}: {error}").format( form=xform_string, error=e)) if task: DownloadBase.set_progress(task, success_count, len(form_ids)) for missing_form_id in missing_forms: response['errors'].append( _(u"Could not find XForm {form_id}").format(form_id=missing_form_id)) if from_excel: return response response["success_count_msg"] = _("{success_msg} {count} form(s)".format( success_msg=archive_or_restore.success_text, count=success_count)) return {"messages": response}
def _get_error_submissions_without_xmlns(): query = (FormES().xmlns( 'undefined').remove_default_filter("is_xform_instance").filter( NOT(doc_type('xforminstance'))).source(['_id'])) result = query.run() total_error_submissions = result.total error_submissions = ( XFormInstance.wrap(i) for i in iter_docs(XFormInstance.get_db(), (x['_id'] for x in result.hits))) return total_error_submissions, error_submissions
def process_xform(self, doc): """Process XFormInstance document asynchronously""" form_id = doc["_id"] log.debug('Processing doc: XFormInstance(%s)', form_id) try: wrapped_form = XFormInstance.wrap(doc) except Exception: log.exception("Error migrating form %s", form_id) self.statedb.save_form_diffs(doc, {}) else: self._try_to_process_form(wrapped_form) self._try_to_empty_queues()
def _compare_forms(self, actual_json, expected_json, msg): expected_json.update({ 'domain': self.domain, 'received_on': actual_json['received_on'], '_rev': actual_json['_rev'], 'initial_processing_complete': True, '#export_tag': actual_json['#export_tag'], 'auth_context': actual_json['auth_context'], }) for name, meta in actual_json.get("external_blobs", {}).iteritems(): expected_json["external_blobs"][name]["id"] = meta["id"] expected_json = XFormInstance.wrap(expected_json).to_json() self.assertDictEqual(actual_json, expected_json, msg)
def _compare_forms(self, actual_json, expected_json, msg): expected_json.update( { "domain": self.domain, "received_on": actual_json["received_on"], "_rev": actual_json["_rev"], "initial_processing_complete": True, "#export_tag": actual_json["#export_tag"], "auth_context": actual_json["auth_context"], } ) expected_json = XFormInstance.wrap(expected_json).to_json() self.assertDictEqual(actual_json, expected_json, msg)
def _get_error_submissions_without_xmlns(): query = (FormES() .xmlns('undefined') .remove_default_filter("is_xform_instance") .filter(NOT(doc_type('xforminstance'))) .source(['_id'])) result = query.run() total_error_submissions = result.total error_submissions = ( XFormInstance.wrap(i) for i in iter_docs(XFormInstance.get_db(), (x['_id'] for x in result.hits)) ) return total_error_submissions, error_submissions
def _process_main_forms(self): last_received_on = datetime.min # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000, run_timestamp=self.run_timestamp) pool = Pool(15) self._rebuild_queues(pool) # process main forms (including cases and ledgers) changes = self._get_resumable_iterator(['XFormInstance'], 'main_forms') # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000) for change in self._with_progress(['XFormInstance'], changes): log.debug('Processing doc: {}({})'.format('XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue try: wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received self._try_to_process_form(wrapped_form, pool) self._try_to_process_queues(pool) except Exception: log.exception("Error migrating form %s", change.id) # finish up the queues once all changes have been iterated through update_interval = timedelta(seconds=10) next_check = datetime.now() while self.queues.has_next(): wrapped_form = self.queues.get_next() if wrapped_form: pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) else: gevent.sleep(0.01) # swap greenlets remaining_items = self.queues.remaining_items + len(pool) now = datetime.now() if now > next_check: log.info('Waiting on {} docs'.format(remaining_items)) next_check += update_interval while not pool.join(timeout=10): log.info('Waiting on {} docs'.format(len(pool))) self._log_main_forms_processed_count()
def form_multimedia_export(request, domain, app_id): try: xmlns = request.GET["xmlns"] startdate = request.GET["startdate"] enddate = request.GET["enddate"] zip_name = request.GET.get("name", None) except KeyError: return HttpResponseBadRequest() def filename(form, question_id, extension): return "%s-%s-%s-%s.%s" % (form['form']['@name'], unidecode(question_id), form['form']['meta']['username'], form['_id'], extension) key = [domain, app_id, xmlns] stream_file = cStringIO.StringIO() zf = zipfile.ZipFile(stream_file, mode='w', compression=zipfile.ZIP_STORED) size = 22 # overhead for a zipfile unknown_number = 0 form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False)} for form in iter_docs(XFormInstance.get_db(), form_ids): f = XFormInstance.wrap(form) if not zip_name: zip_name = unidecode(form['form']['@name']) for key in form['_attachments'].keys(): if form['_attachments'][key]['content_type'] == 'text/xml': continue extension = unicode(os.path.splitext(key)[1]) try: question_id = unicode('-'.join(find_question_id(form['form'], key))) except TypeError: question_id = unicode('unknown' + str(unknown_number)) unknown_number += 1 fname = filename(form, question_id, extension) zi = zipfile.ZipInfo(fname, parse(form['received_on']).timetuple()) zf.writestr(zi, f.fetch_attachment(key, stream=True).read()) # includes overhead for file in zipfile size += f['_attachments'][key]['length'] + 88 + 2 * len(fname) zf.close() response = HttpResponse(stream_file.getvalue(), mimetype="application/zip") response['Content-Length'] = size response['Content-Disposition'] = 'attachment; filename=%s.zip' % zip_name return response
def _process_main_forms(self): last_received_on = datetime.min # process main forms (including cases and ledgers) changes = _get_main_form_iterator(self.domain).iter_all_changes() for change in self._with_progress(['XFormInstance'], changes): self.log_debug('Processing doc: {}({})'.format('XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received self._migrate_form_and_associated_models(wrapped_form)
class BiharCase(CommCareCase): doc_type = 'CommCareCase' def dump_json(self): return { 'case': self.to_json(), 'forms': [f.to_json() for f in self.get_forms()] } @classmethod def from_dump(cls, json_dump): case = BiharCase.wrap(json_dump['case']) case._forms = [XFormInstance.wrap(f) for f in json_dump['forms']] case._forms_cache = dict((f._id, f) for f in case._forms) return case
def handle(self, *args, **options): xform_db = XFormInstance.get_db() for domain in get_indicator_domains(): namespaces = get_namespaces(domain) indicators = [] for namespace in namespaces: indicators.extend(FormIndicatorDefinition.get_all(namespace, domain)) form_ids = get_form_ids_by_type(domain, 'XFormInstance', start=datetime.date(2013, 8, 1), end=datetime.date(2013, 10, 15)) for doc in iter_docs(xform_db, form_ids): xfrom_doc = XFormInstance.wrap(doc) xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
def _process_main_forms(self): last_received_on = datetime.min # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000, run_timestamp=self.run_timestamp) pool = Pool(15) self._rebuild_queues(pool) # process main forms (including cases and ledgers) changes = _get_main_form_iterator(self.domain).iter_all_changes( resumable_key=self._get_resumable_iterator_key('main_forms')) # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000) for change in self._with_progress(['XFormInstance'], changes): self.log_debug('Processing doc: {}({})'.format( 'XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received self._try_to_process_form(wrapped_form, pool) self._try_to_process_queues(pool) # finish up the queues once all changes have been iterated through while self.queues.has_next(): wrapped_form = self.queues.get_next() if wrapped_form: pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) else: gevent.sleep(0.01) # swap greenlets remaining_items = self.queues.remaining_items + len(pool) if remaining_items % 10 == 0: self.log_info('Waiting on {} docs'.format(remaining_items)) while not pool.join(timeout=10): self.log_info('Waiting on {} docs'.format(len(pool))) self._log_main_forms_processed_count()
def get_submissions_without_xmlns(): submissions = XFormInstance.get_db().view( 'couchforms/by_xmlns', key="undefined", include_docs=False, reduce=False, ).all() total_submissions = len(submissions) submission_id_generator = (s['id'] for s in submissions) submissions_doc_generator = ( XFormInstance.wrap(i) for i in iter_docs(XFormInstance.get_db(), submission_id_generator)) total_error_submissions, error_submissions_doc_generator = _get_error_submissions_without_xmlns( ) return (total_submissions + total_error_submissions, chain(submissions_doc_generator, error_submissions_doc_generator))
def process_xform(self, doc): """Process XFormInstance document asynchronously""" form_id = doc["_id"] log.debug('Processing doc: XFormInstance(%s)', form_id) if doc.get('problem'): if six.text_type( doc['problem']).startswith(PROBLEM_TEMPLATE_START): doc = _fix_replacement_form_problem_in_couch(doc) else: self.statedb.add_problem_form(form_id) return try: wrapped_form = XFormInstance.wrap(doc) except Exception: log.exception("Error migrating form %s", form_id) self._try_to_process_form(wrapped_form) self._try_to_empty_queues()
def prepare_planning_db(domain): db_filepath = get_planning_db_filepath(domain) planning_db = PlanningDB.init(db_filepath) xform_ids = get_form_ids_by_type(domain, 'XFormInstance') xform_db = XFormInstance.get_db() for i, xform in enumerate(iter_docs(xform_db, xform_ids)): xform_id = xform['_id'] case_actions_by_case_id = collections.defaultdict(list) try: xml = _get_submission_xml(xform, xform_db) except ResourceNotFound: continue new_form_json = _get_new_form_json(xml, xform_id) case_updates = get_case_updates(new_form_json) xform_copy = deepcopy(xform) xform_copy['form'] = new_form_json xformdoc = XFormInstance.wrap(xform_copy) xformdoc_json = xformdoc.to_json() planning_db.add_form(xform_id, xformdoc_json) planning_db.add_diffs('form', xform_id, json_diff(xform, xformdoc_json)) case_actions = [ (case_update.id, action.xform_id, action.to_json()) for case_update in case_updates for action in case_update.get_case_actions(xformdoc) ] stock_report_helpers, stock_case_actions = get_stock_actions(xformdoc) case_actions.extend(stock_case_actions) for case_id, xform_id, case_action in case_actions: case_actions_by_case_id[case_id].append((xform_id, case_action)) for case_id, case_actions in case_actions_by_case_id.items(): planning_db.ensure_case(case_id) planning_db.add_case_actions(case_id, case_actions) planning_db.add_stock_report_helpers([ stock_report_helper.to_json() for stock_report_helper in stock_report_helpers ]) return prepare_case_json(planning_db)
def get_submissions_without_xmlns(): submissions = XFormInstance.get_db().view( 'couchforms/by_xmlns', key="undefined", include_docs=False, reduce=False, ).all() total_submissions = len(submissions) submission_id_generator = (s['id'] for s in submissions) submissions_doc_generator = ( XFormInstance.wrap(i) for i in iter_docs(XFormInstance.get_db(), submission_id_generator) ) total_error_submissions, error_submissions_doc_generator = _get_error_submissions_without_xmlns() return ( total_submissions + total_error_submissions, chain(submissions_doc_generator, error_submissions_doc_generator) )
def new_form_from_old(cls, existing_form, xml, value_responses_map, user_id): from corehq.form_processor.parsers.form import apply_deprecation new_form = XFormInstance.wrap(existing_form.to_json()) for question, response in six.iteritems(value_responses_map): data = new_form.form_data i = XFormQuestionValueIterator(question) for (qid, repeat_index) in i: data = data[qid] if repeat_index is not None: data = data[repeat_index] data[i.last()] = response new_xml = etree.tostring(xml) new_form._deferred_blobs = None # will be re-populated by apply_deprecation new_form.external_blobs.clear() # will be re-populated by apply_deprecation new_form.deferred_put_attachment(new_xml, "form.xml", content_type="text/xml") existing_form, new_form = apply_deprecation(existing_form, new_form) return (existing_form, new_form)
def new_form_from_old(cls, existing_form, xml, value_responses_map, user_id): from corehq.form_processor.parsers.form import apply_deprecation new_form = XFormInstance.wrap(existing_form.to_json()) for question, response in six.iteritems(value_responses_map): data = new_form.form_data i = XFormQuestionValueIterator(question) for (qid, repeat_index) in i: data = data[qid] if repeat_index is not None: data = data[repeat_index] data[i.last()] = response new_form._deferred_blobs = None # will be re-populated by apply_deprecation new_form.external_blobs.clear( ) # will be re-populated by apply_deprecation existing_form, new_form = apply_deprecation(existing_form, new_form) return (existing_form, new_form)
def _process_main_forms(self): last_received_on = datetime.min # process main forms (including cases and ledgers) changes = _get_main_form_iterator(self.domain).iter_all_changes() for change in self._with_progress(['XFormInstance'], changes): self.log_debug('Processing doc: {}({})'.format('XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received try: self._migrate_form_and_associated_models(wrapped_form) except: self.log_error("Unable to migrate form: {}".format(change.id)) raise
def handle(self, *args, **options): xform_db = XFormInstance.get_db() for domain in get_indicator_domains(): namespaces = get_namespaces(domain) indicators = [] for namespace in namespaces: indicators.extend( FormIndicatorDefinition.get_all(namespace, domain)) key = [domain, "by_type", "XFormInstance"] data = xform_db.view('couchforms/all_submissions_by_domain', startkey=key + ["2013-08-01"], endkey=key + ["2013-10-15"], reduce=False, include_docs=False).all() form_ids = [d['id'] for d in data] for doc in iter_docs(xform_db, form_ids): xfrom_doc = XFormInstance.wrap(doc) xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
def archive_forms(domain, user, uploaded_data): response = { 'errors': [], 'success': [], } form_ids = [row.get('form_id') for row in uploaded_data] missing_forms = set(form_ids) for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): xform = XFormInstance.wrap(xform_doc) missing_forms.discard(xform['_id']) if xform['domain'] != domain: response['errors'].append( _(u"XFORM {form_id} does not belong to domain {domain}"). format(form_id=xform['_id'], domain=xform['domain'])) continue xform_string = _( u"XFORM {form_id} for domain {domain} by user '{username}'" ).format(form_id=xform['_id'], domain=xform['domain'], username=user.username) try: xform.archive(user=user.username) response['success'].append( _(u"Successfully archived {form}").format(form=xform_string)) except Exception as e: response['errors'].append( _(u"Could not archive {form}: {error}").format( form=xform_string, error=e)) for missing_form_id in missing_forms: response['errors'].append( _(u"Could not find XForm {form_id}").format( form_id=missing_form_id)) return response
def handle(self, *args, **options): xform_db = XFormInstance.get_db() for domain in get_indicator_domains(): namespaces = get_namespaces(domain) indicators = [] for namespace in namespaces: indicators.extend(FormIndicatorDefinition.get_all(namespace, domain)) key = [domain, "by_type", "XFormInstance"] data = xform_db.view( 'couchforms/all_submissions_by_domain', startkey=key+["2013-08-01"], endkey=key+["2013-10-15"], reduce=False, include_docs=False ).all() form_ids = [d['id'] for d in data] for doc in iter_docs(xform_db, form_ids): xfrom_doc = XFormInstance.wrap(doc) xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
def _process_skipped_forms(self): """process forms skipped by a previous migration note: does not diff cases """ migrated = 0 with self.counter('skipped_forms', 'XFormInstance.id') as add_form: for doc in self._iter_skipped_forms(): try: form = XFormInstance.wrap(doc) except Exception: log.exception("Error wrapping form %s", doc) else: self._migrate_form_and_associated_models(form) add_form() migrated += 1 if migrated % 100 == 0: log.info("migrated %s previously skipped forms", migrated) if not self.stopper.clean_break: self.counter.pop("XFormInstance.id") log.info("finished migrating %s previously skipped forms", migrated)
def handle(self, filepath, archiving_user, **options): try: form_ids = open(filepath) except Exception as e: print("there was an issue opening the file: %s" % e) return try: form_ids = [f[0] for f in csv.reader(form_ids)] except Exception as e: print("there was an issue reading the file %s" % e) return for xform_doc in iter_docs(XFormInstance.get_db(), form_ids): try: xform = XFormInstance.wrap(xform_doc) xform.archive(user_id=archiving_user) print("Archived form %s in domain %s" % (xform._id, xform.domain)) except Exception as e: print("Issue archiving XFORM %s for domain %s: %s" % (xform_doc['_id'], xform_doc['domain'], e))
def _process_missing_forms(self): """process forms missed by a previous migration""" migrated = 0 with self.counter('missing_forms', 'XFormInstance.id') as add_form: for doc_type, doc in _iter_missing_forms(self.statedb, self.stopper): try: form = XFormInstance.wrap(doc) except Exception: log.exception("Error wrapping form %s", doc) else: proc = doc_type not in UNPROCESSED_DOC_TYPES self._migrate_form(form, get_case_ids(form), form_is_processed=proc) self.statedb.doc_not_missing(doc_type, form.form_id) add_form() migrated += 1 if migrated % 100 == 0: log.info("migrated %s previously missed forms", migrated) log.info("finished migrating %s previously missed forms", migrated)
def deidentify_form(doctransform): assert (doctransform.doc["doc_type"] == "XFormInstance") form = XFormInstance.wrap(doctransform.doc) xml = doctransform.attachments.get("form.xml", "") if form.xmlns in FORM_CONFIG: rootxml = etree.XML(xml) for proppath, generatorfunc in FORM_CONFIG[form.xmlns].items(): parts = proppath.split("/") node = form.form xmlnode = rootxml for i, p in enumerate(parts): if p in node: xml_index = "{%(ns)s}%(val)s" % { "ns": form.xmlns, "val": p } if i == len(parts) - 1: # override prop on the last step val = str(generatorfunc()) node[p] = val xmlnode.find(xml_index).text = val else: # or drill in node = node[p] # NOTE: currently will not work with repeated nodes xmlnode = xmlnode.find(xml_index) else: # no index to the property, so assume we don't # need to touch it break doctransform.doc = form._doc doctransform.attachments["form.xml"] = etree.tostring(rootxml) return doctransform else: # if we didn't know how to deidentify it, we don't want # to return anything, to prevent potentially identified # data from sneaking in return None
def _process_skipped_forms(self, cached=False): """process forms skipped by a previous migration note: does not diff cases """ migrated = 0 with self.counter('skipped_forms', 'XFormInstance.id') as add_form: skipped = _iter_skipped_forms(self.statedb, self.stopper, cached) for doc_type, doc in skipped: try: form = XFormInstance.wrap(doc) except Exception: log.exception("Error wrapping form %s", doc) else: self._migrate_form_and_associated_models(form) if cached: self.statedb.doc_not_missing(doc_type, form.form_id) add_form() migrated += 1 if migrated % 100 == 0: log.info("migrated %s previously skipped forms", migrated) log.info("finished migrating %s previously skipped forms", migrated)
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id): def find_question_id(form, value): for k, v in form.iteritems(): if isinstance(v, dict): ret = find_question_id(v, value) if ret: return [k] + ret else: if v == value: return [k] return None def filename(form_info, question_id, extension): fname = u"%s-%s-%s-%s%s" if form_info["cases"]: fname = u"-".join(form_info["cases"]) + u"-" + fname return fname % (form_info["name"], unidecode(question_id), form_info["user"], form_info["id"], extension) case_ids = set() def extract_form_info(form, properties=None, case_ids=case_ids): unknown_number = 0 meta = form["form"].get("meta", dict()) # get case ids case_blocks = extract_case_blocks(form) cases = {c["@case_id"] for c in case_blocks} case_ids |= cases form_info = { "form": form, "attachments": list(), "name": form["form"].get("@name", "unknown form"), "user": meta.get("username", "unknown_user"), "cases": cases, "id": form["_id"], } for k, v in form["_attachments"].iteritems(): if v["content_type"] == "text/xml": continue try: question_id = unicode(u"-".join(find_question_id(form["form"], k))) except TypeError: question_id = unicode(u"unknown" + unicode(unknown_number)) unknown_number += 1 if not properties or question_id in properties: extension = unicode(os.path.splitext(k)[1]) form_info["attachments"].append( { "size": v["length"], "name": k, "question_id": question_id, "extension": extension, "timestamp": parse(form["received_on"]).timetuple(), } ) return form_info key = [domain, app_id, xmlns] form_ids = { f["id"] for f in XFormInstance.get_db().view( "attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False ) } properties = set() if export_id: schema = FormExportSchema.get(export_id) for table in schema.tables: # - in question id is replaced by . in excel exports properties |= {c.display.replace(".", "-") for c in table.columns} if not app_id: zip_name = "Unrelated Form" forms_info = list() for form in iter_docs(XFormInstance.get_db(), form_ids): if not zip_name: zip_name = unidecode(form["form"].get("@name", "unknown form")) forms_info.append(extract_form_info(form, properties)) num_forms = len(forms_info) DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms) # get case names case_id_to_name = {c: c for c in case_ids} for case in iter_docs(CommCareCase.get_db(), case_ids): if case["name"]: case_id_to_name[case["_id"]] = case["name"] use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: params = "_".join(map(str, [xmlns, startdate, enddate, export_id, num_forms])) fname = "{}-{}".format(app_id, hashlib.md5(params).hexdigest()) fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname) else: _, fpath = tempfile.mkstemp() if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there with open(fpath, "wb") as zfile: with zipfile.ZipFile(zfile, "w") as z: for form_number, form_info in enumerate(forms_info): f = XFormInstance.wrap(form_info["form"]) form_info["cases"] = {case_id_to_name[case_id] for case_id in form_info["cases"]} for a in form_info["attachments"]: fname = filename(form_info, a["question_id"], a["extension"]) zi = zipfile.ZipInfo(fname, a["timestamp"]) z.writestr(zi, f.fetch_attachment(a["name"], stream=True).read(), zipfile.ZIP_STORED) DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms) common_kwargs = dict( mimetype="application/zip", content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name), download_id=download_id, ) if use_transfer: expose_file_download(fpath, use_transfer=use_transfer, **common_kwargs) else: expose_cached_download( FileWrapper(open(fpath)), expiry=(1 * 60 * 60), file_extension=file_extention_from_filename(fpath), **common_kwargs ) DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
def _clear_docs(self): config = ExportConfiguration(XFormInstance.get_db(), [DOMAIN, "http://www.commcarehq.org/export/test"]) for form in config.get_docs(): XFormInstance.wrap(form).delete()
def form_wrapper(row): doc = row["doc"] doc.pop("_attachments", None) return XFormInstance.wrap(doc)
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id): def find_question_id(form, value): for k, v in form.iteritems(): if isinstance(v, dict): ret = find_question_id(v, value) if ret: return [k] + ret else: if v == value: return [k] return None def filename(form_info, question_id, extension): fname = u"%s-%s-%s-%s%s" if form_info['cases']: fname = u'-'.join(form_info['cases']) + u'-' + fname return fname % (form_info['name'], unidecode(question_id), form_info['user'], form_info['id'], extension) case_ids = set() def extract_form_info(form, properties=None, case_ids=case_ids): unknown_number = 0 meta = form['form'].get('meta', dict()) # get case ids case_blocks = extract_case_blocks(form) cases = {c['@case_id'] for c in case_blocks} case_ids |= cases form_info = { 'form': form, 'attachments': list(), 'name': form['form'].get('@name', 'unknown form'), 'user': meta.get('username', 'unknown_user'), 'cases': cases, 'id': form['_id'] } for k, v in form['_attachments'].iteritems(): if v['content_type'] == 'text/xml': continue try: question_id = unicode(u'-'.join(find_question_id(form['form'], k))) except TypeError: question_id = unicode(u'unknown' + unicode(unknown_number)) unknown_number += 1 if not properties or question_id in properties: extension = unicode(os.path.splitext(k)[1]) form_info['attachments'].append({ 'size': v['length'], 'name': k, 'question_id': question_id, 'extension': extension, 'timestamp': parse(form['received_on']).timetuple(), }) return form_info key = [domain, app_id, xmlns] form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False)} properties = set() if export_id: schema = FormExportSchema.get(export_id) for table in schema.tables: # - in question id is replaced by . in excel exports properties |= {c.display.replace('.', '-') for c in table.columns} if not app_id: zip_name = 'Unrelated Form' forms_info = list() for form in iter_docs(XFormInstance.get_db(), form_ids): if not zip_name: zip_name = unidecode(form['form'].get('@name', 'unknown form')) forms_info.append(extract_form_info(form, properties)) num_forms = len(forms_info) DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms) # get case names case_id_to_name = {c: c for c in case_ids} for case in iter_docs(CommCareCase.get_db(), case_ids): if case['name']: case_id_to_name[case['_id']] = case['name'] use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: params = '_'.join(map(str, [xmlns, startdate, enddate, export_id, num_forms])) fname = '{}-{}'.format(app_id, hashlib.md5(params).hexdigest()) fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname) else: _, fpath = tempfile.mkstemp() if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there with open(fpath, 'wb') as zfile: with zipfile.ZipFile(zfile, 'w') as z: for form_number, form_info in enumerate(forms_info): f = XFormInstance.wrap(form_info['form']) form_info['cases'] = {case_id_to_name[case_id] for case_id in form_info['cases']} for a in form_info['attachments']: fname = filename(form_info, a['question_id'], a['extension']) zi = zipfile.ZipInfo(fname, a['timestamp']) z.writestr(zi, f.fetch_attachment(a['name'], stream=True).read(), zipfile.ZIP_STORED) DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms) common_kwargs = dict( mimetype='application/zip', content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name), download_id=download_id, ) if use_transfer: expose_file_download( fpath, use_transfer=use_transfer, **common_kwargs ) else: expose_cached_download( FileWrapper(open(fpath)), expiry=(1 * 60 * 60), file_extension=file_extention_from_filename(fpath), **common_kwargs ) DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
def form_wrapper(row): doc = row['doc'] doc.pop('_attachments', None) doc.pop('external_blobs', None) return XFormInstance.wrap(doc)