def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id): def find_question_id(form, value): for k, v in form.iteritems(): if isinstance(v, dict): ret = find_question_id(v, value) if ret: return [k] + ret else: if v == value: return [k] return None def filename(form_info, question_id, extension): fname = u"%s-%s-%s-%s%s" if form_info["cases"]: fname = u"-".join(form_info["cases"]) + u"-" + fname return fname % (form_info["name"], unidecode(question_id), form_info["user"], form_info["id"], extension) case_ids = set() def extract_form_info(form, properties=None, case_ids=case_ids): unknown_number = 0 meta = form["form"].get("meta", dict()) # get case ids case_blocks = extract_case_blocks(form) cases = {c["@case_id"] for c in case_blocks} case_ids |= cases form_info = { "form": form, "attachments": list(), "name": form["form"].get("@name", "unknown form"), "user": meta.get("username", "unknown_user"), "cases": cases, "id": form["_id"], } for k, v in form["_attachments"].iteritems(): if v["content_type"] == "text/xml": continue try: question_id = unicode(u"-".join(find_question_id(form["form"], k))) except TypeError: question_id = unicode(u"unknown" + unicode(unknown_number)) unknown_number += 1 if not properties or question_id in properties: extension = unicode(os.path.splitext(k)[1]) form_info["attachments"].append( { "size": v["length"], "name": k, "question_id": question_id, "extension": extension, "timestamp": parse(form["received_on"]).timetuple(), } ) return form_info key = [domain, app_id, xmlns] form_ids = { f["id"] for f in XFormInstance.get_db().view( "attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False ) } properties = set() if export_id: schema = FormExportSchema.get(export_id) for table in schema.tables: # - in question id is replaced by . in excel exports properties |= {c.display.replace(".", "-") for c in table.columns} if not app_id: zip_name = "Unrelated Form" forms_info = list() for form in iter_docs(XFormInstance.get_db(), form_ids): if not zip_name: zip_name = unidecode(form["form"].get("@name", "unknown form")) forms_info.append(extract_form_info(form, properties)) num_forms = len(forms_info) DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms) # get case names case_id_to_name = {c: c for c in case_ids} for case in iter_docs(CommCareCase.get_db(), case_ids): if case["name"]: case_id_to_name[case["_id"]] = case["name"] use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: params = "_".join(map(str, [xmlns, startdate, enddate, export_id, num_forms])) fname = "{}-{}".format(app_id, hashlib.md5(params).hexdigest()) fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname) else: _, fpath = tempfile.mkstemp() if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there with open(fpath, "wb") as zfile: with zipfile.ZipFile(zfile, "w") as z: for form_number, form_info in enumerate(forms_info): f = XFormInstance.wrap(form_info["form"]) form_info["cases"] = {case_id_to_name[case_id] for case_id in form_info["cases"]} for a in form_info["attachments"]: fname = filename(form_info, a["question_id"], a["extension"]) zi = zipfile.ZipInfo(fname, a["timestamp"]) z.writestr(zi, f.fetch_attachment(a["name"], stream=True).read(), zipfile.ZIP_STORED) DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms) common_kwargs = dict( mimetype="application/zip", content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name), download_id=download_id, ) if use_transfer: expose_file_download(fpath, use_transfer=use_transfer, **common_kwargs) else: expose_cached_download( FileWrapper(open(fpath)), expiry=(1 * 60 * 60), file_extension=file_extention_from_filename(fpath), **common_kwargs ) DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id): def find_question_id(form, value): for k, v in form.iteritems(): if isinstance(v, dict): ret = find_question_id(v, value) if ret: return [k] + ret else: if v == value: return [k] return None def filename(form_info, question_id, extension): fname = u"%s-%s-%s-%s%s" if form_info['cases']: fname = u'-'.join(form_info['cases']) + u'-' + fname return fname % (form_info['name'], unidecode(question_id), form_info['user'], form_info['id'], extension) case_ids = set() def extract_form_info(form, properties=None, case_ids=case_ids): unknown_number = 0 meta = form['form'].get('meta', dict()) # get case ids case_blocks = extract_case_blocks(form) cases = {c['@case_id'] for c in case_blocks} case_ids |= cases form_info = { 'form': form, 'attachments': list(), 'name': form['form'].get('@name', 'unknown form'), 'user': meta.get('username', 'unknown_user'), 'cases': cases, 'id': form['_id'] } for k, v in form['_attachments'].iteritems(): if v['content_type'] == 'text/xml': continue try: question_id = unicode(u'-'.join(find_question_id(form['form'], k))) except TypeError: question_id = unicode(u'unknown' + unicode(unknown_number)) unknown_number += 1 if not properties or question_id in properties: extension = unicode(os.path.splitext(k)[1]) form_info['attachments'].append({ 'size': v['length'], 'name': k, 'question_id': question_id, 'extension': extension, 'timestamp': parse(form['received_on']).timetuple(), }) return form_info key = [domain, app_id, xmlns] form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False)} properties = set() if export_id: schema = FormExportSchema.get(export_id) for table in schema.tables: # - in question id is replaced by . in excel exports properties |= {c.display.replace('.', '-') for c in table.columns} if not app_id: zip_name = 'Unrelated Form' forms_info = list() for form in iter_docs(XFormInstance.get_db(), form_ids): if not zip_name: zip_name = unidecode(form['form'].get('@name', 'unknown form')) forms_info.append(extract_form_info(form, properties)) num_forms = len(forms_info) DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms) # get case names case_id_to_name = {c: c for c in case_ids} for case in iter_docs(CommCareCase.get_db(), case_ids): if case['name']: case_id_to_name[case['_id']] = case['name'] use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: params = '_'.join(map(str, [xmlns, startdate, enddate, export_id, num_forms])) fname = '{}-{}'.format(app_id, hashlib.md5(params).hexdigest()) fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname) else: _, fpath = tempfile.mkstemp() if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there with open(fpath, 'wb') as zfile: with zipfile.ZipFile(zfile, 'w') as z: for form_number, form_info in enumerate(forms_info): f = XFormInstance.wrap(form_info['form']) form_info['cases'] = {case_id_to_name[case_id] for case_id in form_info['cases']} for a in form_info['attachments']: fname = filename(form_info, a['question_id'], a['extension']) zi = zipfile.ZipInfo(fname, a['timestamp']) z.writestr(zi, f.fetch_attachment(a['name'], stream=True).read(), zipfile.ZIP_STORED) DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms) common_kwargs = dict( mimetype='application/zip', content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name), download_id=download_id, ) if use_transfer: expose_file_download( fpath, use_transfer=use_transfer, **common_kwargs ) else: expose_cached_download( FileWrapper(open(fpath)), expiry=(1 * 60 * 60), **common_kwargs ) DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
def form_multimedia_export(request, domain): try: xmlns = request.GET["xmlns"] startdate = request.GET["startdate"] enddate = request.GET["enddate"] app_id = request.GET.get("app_id", None) export_id = request.GET.get("export_id", None) zip_name = request.GET.get("name", None) except KeyError: return HttpResponseBadRequest() def filename(form, question_id, extension): return "%s-%s-%s-%s%s" % (form['form'].get('@name', 'unknown form'), unidecode(question_id), form['form']['meta'].get('username', 'unknown user'), form['_id'], extension) if not app_id: zip_name = 'Unrelated Form' key = [domain, app_id, xmlns] stream_file = cStringIO.StringIO() zf = zipfile.ZipFile(stream_file, mode='w', compression=zipfile.ZIP_STORED) size = 22 # overhead for a zipfile unknown_number = 0 form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False)} properties = set() if export_id: schema = FormExportSchema.get(export_id) for table in schema.tables: # - in question id is replaced by . in excel exports properties |= {c.display.replace('.', '-') for c in table.columns} for form in iter_docs(XFormInstance.get_db(), form_ids): f = XFormInstance.wrap(form) if not zip_name: zip_name = unidecode(form['form'].get('@name', 'unknown form')) for key in form['_attachments'].keys(): if form['_attachments'][key]['content_type'] == 'text/xml': continue extension = unicode(os.path.splitext(key)[1]) try: question_id = unicode('-'.join(find_question_id(form['form'], key))) except TypeError: question_id = unicode('unknown' + str(unknown_number)) unknown_number += 1 if not properties or question_id in properties: fname = filename(form, question_id, extension) zi = zipfile.ZipInfo(fname, parse(form['received_on']).timetuple()) zf.writestr(zi, f.fetch_attachment(key, stream=True).read()) # includes overhead for file in zipfile size += f['_attachments'][key]['length'] + 88 + 2 * len(fname) zf.close() response = HttpResponse(stream_file.getvalue(), mimetype="application/zip") response['Content-Length'] = size response['Content-Disposition'] = 'attachment; filename=%s.zip' % zip_name return response