def import_products(domain, download, task): messages = [] products = [] data = download.get_content().split('\n') processed = 0 total_rows = len(data) - 1 reader = csv.DictReader(data) for row in reader: try: p = Product.from_csv(row) if p: if p.domain: if p.domain != domain: messages.append( _("Product {product_name} belongs to another domain and was not updated").format( product_name=p.name ) ) continue else: p.domain = domain products.append(p) if task: processed += 1 DownloadBase.set_progress(task, processed, total_rows) except Exception, e: messages.append(str(e))
def prepare_form_multimedia(request, domain): """Gets the download_id for the multimedia zip and sends it to the exportDownloadService in download_export.ng.js to begin polling for the zip file download. """ form_or_case = request.POST.get('form_or_case') sms_export = json.loads(request.POST.get('sms_export')) permissions = ExportsPermissionsManager(form_or_case, domain, request.couch_user) permissions.access_download_export_or_404() view_helper = DownloadExportViewHelper.get(request, domain, form_or_case, sms_export) filter_form_data = json.loads(request.POST.get('form_data')) export_specs = json.loads(request.POST.get('exports')) try: filter_form = view_helper.get_filter_form(filter_form_data) except ExportFormValidationException: return json_response({ 'error': _("Please check that you've submitted all required filters."), }) download = DownloadBase() export_object = view_helper.get_export(export_specs[0]['export_id']) task_kwargs = filter_form.get_multimedia_task_kwargs(export_object, download.download_id, filter_form_data) from corehq.apps.reports.tasks import build_form_multimedia_zip download.set_task(build_form_multimedia_zip.delay(**task_kwargs)) return json_response({ 'success': True, 'download_id': download.download_id, })
def fixture_upload_async(domain, download_id, replace): task = fixture_upload_async DownloadBase.set_progress(task, 0, 100) download_ref = DownloadBase.get(download_id) result = upload_fixture_file(domain, download_ref.get_filename(), replace, task) DownloadBase.set_progress(task, 100, 100) return {"messages": result}
def prepare_form_multimedia(self, in_data): """Gets the download_id for the multimedia zip and sends it to the exportDownloadService in download_export.ng.js to begin polling for the zip file download. """ try: filter_form_data, export_specs = self._get_form_data_and_specs(in_data) filter_form = FilterFormExportDownloadForm( self.domain_object, self.timezone, filter_form_data ) if not filter_form.is_valid(): raise ExportFormValidationException( _("Please check that you've submitted all required filters.") ) download = DownloadBase() export_object = self.get_export_schema(self.domain, export_specs[0]['export_id']) task_kwargs = filter_form.get_multimedia_task_kwargs( export_object, download.download_id ) from corehq.apps.reports.tasks import build_form_multimedia_zip download.set_task(build_form_multimedia_zip.delay(**task_kwargs)) except Exception as e: return format_angular_error(e) return format_angular_success({ 'download_id': download.download_id, })
def add_progress(self, count=1): self.progress += count if self.task: DownloadBase.set_progress(self.task, self.progress, self.total_rows) if datetime.now() > self.last_update + timedelta(seconds=5): self.log("processed %s / %s", self.progress, self.total_rows) self.last_update = datetime.now()
def _increment_progress(self): if self._location_count is None: self._location_count = SQLLocation.active_objects.filter(domain=self.domain).count() self._progress_update_chunksize = max(10, self._location_count // 100) self._locations_exported += 1 if self._locations_exported % self._progress_update_chunksize == 0: DownloadBase.set_progress(self.async_task, self._locations_exported, self._location_count)
def toggle_demo_mode(request, domain, user_id): user = CommCareUser.get_by_user_id(user_id, domain) demo_mode = request.POST.get('demo_mode', 'no') demo_mode = True if demo_mode == 'yes' else False edit_user_url = reverse(EditCommCareUserView.urlname, args=[domain, user_id]) # handle bad POST param if user.is_demo_user == demo_mode: warning = _("User is already in Demo mode!") if user.is_demo_user else _("User is not in Demo mode!") messages.warning(request, warning) return HttpResponseRedirect(edit_user_url) if demo_mode: download = DownloadBase() res = turn_on_demo_mode_task.delay(user.get_id, domain) download.set_task(res) return HttpResponseRedirect( reverse( DemoRestoreStatusView.urlname, args=[domain, download.download_id, user_id] ) ) else: from corehq.apps.app_manager.views.utils import unset_practice_mode_configured_apps, \ get_practice_mode_configured_apps # if the user is being used as practice user on any apps, check/ask for confirmation apps = get_practice_mode_configured_apps(domain) confirm_turn_off = True if (request.POST.get('confirm_turn_off', 'no')) == 'yes' else False if apps and not confirm_turn_off: return HttpResponseRedirect(reverse(ConfirmTurnOffDemoModeView.urlname, args=[domain, user_id])) turn_off_demo_mode(user) unset_practice_mode_configured_apps(domain, user.get_id) messages.success(request, _("Successfully turned off demo mode!")) return HttpResponseRedirect(edit_user_url)
def write_export_instance(writer, export_instance, documents, progress_tracker=None): """ Write rows to the given open _Writer. Rows will be written to each table in the export instance for each of the given documents. :param writer: An open _Writer :param export_instance: An ExportInstance :param documents: An iterable yielding documents :param progress_tracker: A task for soil to track progress against :return: None """ if progress_tracker: DownloadBase.set_progress(progress_tracker, 0, documents.count) start = _time_in_milliseconds() total_bytes = 0 total_rows = 0 compute_total = 0 write_total = 0 for row_number, doc in enumerate(documents): total_bytes += sys.getsizeof(doc) for table in export_instance.selected_tables: compute_start = _time_in_milliseconds() try: rows = table.get_rows( doc, row_number, split_columns=export_instance.split_multiselects, transform_dates=export_instance.transform_dates, ) except Exception as e: notify_exception(None, "Error exporting doc", details={ 'domain': export_instance.domain, 'export_instance_id': export_instance.get_id, 'export_table': table.label, 'doc_id': doc.get('_id'), }) e.sentry_capture = False raise compute_total += _time_in_milliseconds() - compute_start write_start = _time_in_milliseconds() for row in rows: # It might be bad to write one row at a time when you can do more (from a performance perspective) # Regardless, we should handle the batching of rows in the _Writer class, not here. writer.write(table, row) write_total += _time_in_milliseconds() - write_start total_rows += len(rows) if progress_tracker: DownloadBase.set_progress(progress_tracker, row_number + 1, documents.count) end = _time_in_milliseconds() tags = ['format:{}'.format(writer.format)] _record_datadog_export_write_rows(write_total, total_bytes, total_rows, tags) _record_datadog_export_compute_rows(compute_total, total_bytes, total_rows, tags) _record_datadog_export_duration(end - start, total_bytes, total_rows, tags) _record_export_duration(end - start, export_instance)
def location_importer_job_poll(request, domain, download_id, template="locations/manage/partials/status.html"): download_data = DownloadBase.get(download_id) is_ready = False if download_data is None: download_data = DownloadBase(download_id=download_id) try: if download_data.task.failed(): return HttpResponseServerError() except (TypeError, NotImplementedError): # no result backend / improperly configured pass alive = True if heartbeat_enabled(): alive = is_alive() context = RequestContext(request) if download_data.task.state == 'SUCCESS': is_ready = True context['result'] = download_data.task.result.get('messages') context['is_ready'] = is_ready context['is_alive'] = alive context['progress'] = download_data.get_progress() context['download_id'] = download_id return render_to_response(template, context_instance=context)
def toggle_demo_mode(request, domain, user_id): user = CommCareUser.get_by_user_id(user_id, domain) demo_mode = request.POST.get('demo_mode', 'no') demo_mode = True if demo_mode == 'yes' else False edit_user_url = reverse(EditCommCareUserView.urlname, args=[domain, user_id]) # handle bad POST param if user.is_demo_user == demo_mode: warning = _("User is already in Demo mode!") if user.is_demo_user else _("User is not in Demo mode!") messages.warning(request, warning) return HttpResponseRedirect(edit_user_url) if demo_mode: download = DownloadBase() res = turn_on_demo_mode_task.delay(user, domain) download.set_task(res) return HttpResponseRedirect( reverse( DemoRestoreStatusView.urlname, args=[domain, download.download_id, user_id] ) ) else: turn_off_demo_mode(user) messages.success(request, _("Successfully turned off demo mode!")) return HttpResponseRedirect(edit_user_url)
def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5(_human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components(schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables(get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write(self.remap_tables(get_formatted_rows( doc, updated_schema, include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None
def prime_restore(user_ids, version=V1, cache_timeout=None, overwrite_cache=False): from corehq.apps.ota.views import get_restore_response total = len(user_ids) DownloadBase.set_progress(prime_restore, 0, total) ret = {'messages': []} for i, user_id in enumerate(user_ids): try: couch_user = CommCareUser.get(user_id) except ResourceNotFound: ret['messages'].append('User not found: {}'.format(user_id)) continue try: get_restore_response( couch_user.domain, couch_user, since=None, version=version, force_cache=True, cache_timeout=cache_timeout, overwrite_cache=overwrite_cache ) except Exception as e: ret['messages'].append('Error processing user: {}'.format(str(e))) DownloadBase.set_progress(prime_restore, i + 1, total) return ret
def export(schema_index, file, format=Format.XLS_2007, previous_export_id=None, filter=None, max_column_size=2000, separator='|', export_object=None, process=None): """ Exports data from couch documents matching a given tag to a file. Returns true if it finds data, otherwise nothing """ config, updated_schema, export_schema_checkpoint = get_export_components(schema_index, previous_export_id, filter) # transform docs onto output and save if config: writer = get_writer(format) # open the doc and the headers formatted_headers = get_headers(updated_schema, separator=separator) writer.open(formatted_headers, file, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if export_object and export_object.transform: doc = export_object.transform(doc) writer.write(format_tables(create_intermediate_tables(doc, updated_schema), include_headers=False, separator=separator)) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() return export_schema_checkpoint
def _write_export_instance(writer, export_instance, documents, progress_tracker=None): """ Write rows to the given open _Writer. Rows will be written to each table in the export instance for each of the given documents. :param writer: An open _Writer :param export_instance: An ExportInstance :param documents: A ScanResult, or if progress_tracker is None, any iterable yielding documents :param progress_tracker: A task for soil to track progress against :return: None """ if progress_tracker: DownloadBase.set_progress(progress_tracker, 0, documents.count) for row_number, doc in enumerate(documents): for table in export_instance.selected_tables: rows = table.get_rows( doc, row_number, split_columns=export_instance.split_multiselects, transform_dates=export_instance.transform_dates, ) for row in rows: # It might be bad to write one row at a time when you can do more (from a performance perspective) # Regardless, we should handle the batching of rows in the _Writer class, not here. writer.write(table, row) if progress_tracker: DownloadBase.set_progress(progress_tracker, row_number + 1, documents.count)
def import_locations(domain, worksheet, update_existing=False, task=None): fields = worksheet.headers data = list(worksheet) hierarchy_fields = [] loc_types = defined_location_types(domain) for field in fields: if field in loc_types: hierarchy_fields.append(field) else: break property_fields = fields[len(hierarchy_fields):] if not hierarchy_fields: yield 'missing location hierarchy-related fields in left columns. aborting import' return loc_cache = LocationCache(domain) for index, loc in enumerate(data): if task: DownloadBase.set_progress(task, index, len(data)) for m in import_location(domain, loc, hierarchy_fields, property_fields, update_existing, loc_cache): yield m
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id, export_is_legacy): form_ids = _get_form_ids(domain, app_id, xmlns, startdate, enddate, export_is_legacy) properties = _get_export_properties(export_id, export_is_legacy) if not app_id: zip_name = 'Unrelated Form' forms_info = list() for form in FormAccessors(domain).iter_forms(form_ids): if not zip_name: zip_name = unidecode(form.name or 'unknown form') forms_info.append(_extract_form_attachment_info(form, properties)) num_forms = len(forms_info) DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms) use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: fpath = _get_download_file_path(xmlns, startdate, enddate, export_id, app_id, num_forms) else: _, fpath = tempfile.mkstemp() _write_attachments_to_file(fpath, use_transfer, num_forms, forms_info) _expose_download(fpath, use_transfer, zip_name, download_id, num_forms)
def export_all_form_metadata_async(req, domain): datespan = req.datespan if req.GET.get("startdate") and req.GET.get("enddate") else None group_id = req.GET.get("group") ufilter = UserTypeFilter.get_user_filter(req)[0] users = util.get_all_users_by_domain( domain=domain, group=group_id, user_filter=ufilter, simplified=True, include_inactive=True ) user_ids = filter(None, [u["user_id"] for u in users]) format = req.GET.get("format", Format.XLS_2007) filename = "%s_forms" % domain download = DownloadBase() download.set_task(create_metadata_export.delay( download.download_id, domain, format=format, filename=filename, datespan=datespan, user_ids=user_ids, )) return download.get_start_response()
def bulk_upload_async(domain, user_specs, group_specs): from corehq.apps.users.bulkupload import create_or_update_users_and_groups task = bulk_upload_async DownloadBase.set_progress(task, 0, 100) results = create_or_update_users_and_groups(domain, user_specs, group_specs, task=task) DownloadBase.set_progress(task, 100, 100) return {"messages": results}
def turn_on_demo_mode_task(couch_user, domain): from corehq.apps.ota.utils import turn_on_demo_mode DownloadBase.set_progress(turn_on_demo_mode_task, 0, 100) results = turn_on_demo_mode(couch_user, domain) DownloadBase.set_progress(turn_on_demo_mode_task, 100, 100) return {"messages": results}
def fixture_upload_async(domain, download_id, replace): task = fixture_upload_async DownloadBase.set_progress(task, 0, 100) download_ref = DownloadBase.get(download_id) result = safe_fixture_upload(domain, download_ref, replace, task) DownloadBase.set_progress(task, 100, 100) return { 'messages': result, }
def __init__(self, task, file_ref_id): self.task = task self.progress = 0 if self.task: DownloadBase.set_progress(self.task, 0, 100) download_ref = DownloadBase.get(file_ref_id) self.workbook = WorkbookJSONReader(download_ref.get_filename())
def import_products_async(domain, file_ref_id): task = import_products_async DownloadBase.set_progress(task, 0, 100) download_ref = DownloadBase.get(file_ref_id) results = import_products(domain, download_ref, task) DownloadBase.set_progress(task, 100, 100) return { 'messages': results }
def prepare_export(self, export_tags, export_filter): self.generate_bulk_files(export_tags, export_filter) download = DownloadBase() couchexport.tasks.bulk_export_async.delay( self, download.download_id, domain=self.domain ) return download.get_start_response()
def export_data_async(self, format=None, **kwargs): format = format or self.default_format download = DownloadBase() download.set_task(couchexport.tasks.export_async.delay( self, download.download_id, format=format, **kwargs )) return download.get_start_response()
def get_download_task(self, export_tags, export_filter): self.generate_bulk_files(export_tags, export_filter) download = DownloadBase() download.set_task(couchexport.tasks.bulk_export_async.delay( self, download.download_id, domain=self.domain )) return download
def __init__(self, task, file_ref_id): self.task = task self.progress = 0 if self.task: DownloadBase.set_progress(self.task, 0, 100) download_ref = DownloadBase.get(file_ref_id) if download_ref is None: raise UnknownFileRefException("Could not find file wih ref %s. It may have expired" % file_ref_id) self.workbook = WorkbookJSONReader(download_ref.get_filename())
def export_all_form_metadata_async(req, domain): format = req.GET.get("format", Format.XLS_2007) filename = "%s_forms" % domain download = DownloadBase() download.set_task(create_metadata_export.delay( download.download_id, domain, format=format, filename=filename, )) return download.get_start_response()
def download_item_lists(request, domain): """Asynchronously serve excel download for edit_lookup_tables """ download = DownloadBase() download.set_task(fixture_download_async.delay( prepare_fixture_download, table_ids=request.GET.getlist("table_id"), domain=domain, download_id=download.download_id, )) return download.get_start_response()
def import_locations_async(domain, file_ref_id): importer = MultiExcelImporter(import_locations_async, file_ref_id) task = import_locations_async DownloadBase.set_progress(task, 0, 100) results = new_locations_import(domain, importer) DownloadBase.set_progress(task, 100, 100) importer.mark_complete() return { 'messages': results }
def get_export_download(export_instances, filters, filename=None): from corehq.apps.export.tasks import populate_export_download_task download = DownloadBase() download.set_task(populate_export_download_task.delay( export_instances, filters, download.download_id, filename=filename )) return download
def _update_progress(table_count, item_count, items_in_table): if task: processed = table_count * 10 + (10 * item_count / items_in_table) DownloadBase.set_progress(task, processed, 10 * total_tables)
def create_files_for_ccz(build, build_profile_id, include_multimedia_files=True, include_index_files=True, download_id=None, compress_zip=False, filename="commcare.zip", download_targeted_version=False, task=None, expose_link=False): """ :param task: celery task whose progress needs to be set when being run asynchronously by celery :param expose_link: expose downloadable link for the file created :return: path to the ccz file """ compression = zipfile.ZIP_DEFLATED if compress_zip else zipfile.ZIP_STORED current_progress = 10 # early on indicate something is happening file_progress = 50.0 # arbitrarily say building files takes half the total time DownloadBase.set_progress(task, current_progress, 100) fpath = _get_file_path(build, include_multimedia_files, include_index_files, build_profile_id, download_targeted_version) # Don't rebuild the file if it is already there if not (os.path.isfile(fpath) and settings.SHARED_DRIVE_CONF.transfer_enabled): files, errors, file_count = _build_ccz_files( build, build_profile_id, include_multimedia_files, include_index_files, download_id, compress_zip, filename, download_targeted_version) file_cache = _zip_files_for_ccz(fpath, files, current_progress, file_progress, file_count, compression, task) if toggles.LOCALE_ID_INTEGRITY.enabled(build.domain): locale_errors = find_missing_locale_ids_in_ccz(file_cache) if locale_errors: errors.extend(locale_errors) notify_exception( None, message= "CCZ missing locale ids from default/app_strings.txt", details={ 'domain': build.domain, 'app_id': build.id, 'errors': locale_errors }) if include_index_files and include_multimedia_files: multimedia_errors = check_ccz_multimedia_integrity( build.domain, fpath) errors.extend(multimedia_errors) if multimedia_errors: notify_exception(None, message="CCZ missing multimedia files", details={ 'domain': build.domain, 'app_id': build.id, 'errors': multimedia_errors }) if errors: os.remove(fpath) raise Exception('\t' + '\t'.join(errors)) else: DownloadBase.set_progress(task, current_progress + file_progress, 100) if expose_link: _expose_download_link(fpath, filename, compress_zip, download_id) DownloadBase.set_progress(task, 100, 100) return fpath
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): row_count = spreadsheet.get_num_rows() columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = num_chunks = 0 errors = importer_util.ImportErrorDetail() prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} name_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(caseblocks): err = False if caseblocks: try: form = submit_case_blocks( [cb.as_string() for cb in caseblocks], domain, username, user_id, )[0] if form.is_error: errors.add(error=ImportErrors.ImportErrorMessage, row_number=form.problem) except Exception: err = True errors.add(error=ImportErrors.ImportErrorMessage, row_number=caseblocks[0]._id) return err for i in range(row_count): if task: DownloadBase.set_progress(task, i, row_count) # skip first row if it is a header field if i == 0 and config.named_columns: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 row = spreadsheet.get_row(i) search_id = importer_util.parse_search_id(config, columns, row) if config.search_field == 'external_id' and not search_id: # do not allow blank external id since we save this errors.add(ImportErrors.BlankExternalId, i + 1) continue try: fields_to_update = importer_util.populate_updated_fields( config, columns, row, spreadsheet.workbook.datemode) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue except importer_util.InvalidDateException as e: errors.add(ImportErrors.InvalidDate, i + 1, e.column) continue except importer_util.InvalidIntegerException as e: errors.add(ImportErrors.InvalidInteger, i + 1, e.column) continue external_id = fields_to_update.pop('external_id', None) parent_id = fields_to_update.pop('parent_id', None) parent_external_id = fields_to_update.pop('parent_external_id', None) parent_type = fields_to_update.pop('parent_type', config.case_type) parent_ref = fields_to_update.pop('parent_ref', 'parent') to_close = fields_to_update.pop('close', False) if any([ lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id] ]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set( ) # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case(config.search_field, search_id, domain, config.case_type) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue
def _update_progress(progress): DownloadBase.set_progress(bulk_export_async, progress, total)
def delete_cases(self, request, domain): explosion_id = request.POST.get('explosion_id') download = DownloadBase() res = delete_exploded_case_task.delay(self.domain, explosion_id) download.set_task(res) return redirect('hq_soil_download', self.domain, download.download_id)
def fixture_download_async(prepare_download, *args, **kw): task = fixture_download_async DownloadBase.set_progress(task, 0, 100) prepare_download(task=task, *args, **kw) DownloadBase.set_progress(task, 100, 100)
def mark_complete(self): if self.task: DownloadBase.set_progress(self.task, 100, 100)
def _update_progress(value, start=0): DownloadBase.set_progress(task, start + value, total)
}) sync_result = device.restore() cases = {} new_case_ids = {} for case_id, case in six.iteritems(sync_result.cases): if case.case_type != USERCASE_TYPE: cases[case_id] = case new_case_ids[case_id] = [ str(uuid.uuid4()) for _ in range(factor - 1) ] total_cases = len(cases) * (factor - 1) total_ledgers = 0 if task: DownloadBase.set_progress(explode_case_task, 0, total_cases) queue = [] progress = 0 def queue_case(new_case, queue, progress): queue.append(new_case) if len(queue) >= 500: # submit 500 cases at a time submit_case_blocks(queue, domain, user_id=user_id, device_id="explode_cases") progress += len(queue) if task: DownloadBase.set_progress(explode_case_task, progress, total_cases)
def async_fixture_download(table_ids, domain, download_id): task = async_fixture_download DownloadBase.set_progress(task, 0, 100) prepare_fixture_download(table_ids, domain, task, download_id) DownloadBase.set_progress(task, 100, 100)
def fixture_download_async(prepare_download, *args, **kw): # deprecated task. no longer called. to be removed after all tasks consumed task = fixture_download_async DownloadBase.set_progress(task, 0, 100) prepare_download(task=task, *args, **kw) DownloadBase.set_progress(task, 100, 100)
def download_locations_async(domain, download_id, include_consumption=False): DownloadBase.set_progress(download_locations_async, 0, 100) dump_locations(domain, download_id, include_consumption=include_consumption) DownloadBase.set_progress(download_locations_async, 100, 100)
def _update_progress(event_count, item_count, items_in_table): if task and now() - last_update[0] > upate_period: last_update[0] = now() processed = event_count * 10 + (10 * item_count / items_in_table) processed = min(processed, total_events) # limit at 100% DownloadBase.set_progress(task, processed, total_events)
def add_progress(self, count=1): self.progress += count if self.task: DownloadBase.set_progress(self.task, self.progress, self.total_rows)
def export_response(self): download = DownloadBase() res = export_ucr_async.delay(self.report_export, download.download_id, self.request.couch_user) download.set_task(res) return redirect(DownloadUCRStatusView.urlname, self.domain, download.download_id, self.report_config_id)
def operate_on_payloads( repeat_record_ids: List[str], domain: str, action, # type: Literal['resend', 'cancel', 'requeue'] # 3.8+ use_sql: bool, task: Optional = None, from_excel: bool = False, ): if not repeat_record_ids: return {'messages': {'errors': [_('No payloads specified')]}} response = { 'errors': [], 'success': [], } success_count = 0 if task: DownloadBase.set_progress(task, 0, len(repeat_record_ids)) for record_id in repeat_record_ids: if use_sql: record = _get_sql_repeat_record(domain, record_id) else: record = _get_couch_repeat_record(domain, record_id) if record: try: if action == 'resend': record.fire(force_send=True) message = _("Successfully resent repeat record (id={})" ).format(record_id) elif action == 'cancel': if use_sql: record.state = RECORD_CANCELLED_STATE else: record.cancel() record.save() message = _("Successfully cancelled repeat record (id={})" ).format(record_id) elif action == 'requeue': record.requeue() if not use_sql: record.save() message = _("Successfully requeued repeat record (id={})" ).format(record_id) else: raise ValueError(f'Unknown action {action!r}') response['success'].append(message) success_count = success_count + 1 except Exception as e: message = _( "Could not perform action for repeat record (id={}): {}" ).format(record_id, e) response['errors'].append(message) if task: DownloadBase.set_progress(task, success_count, len(repeat_record_ids)) if from_excel: return response if success_count: response["success_count_msg"] = _( "Successfully performed {action} action on {count} form(s)" ).format(action=action, count=success_count) else: response["success_count_msg"] = '' return {"messages": response}
def bulk_import_async(import_id, config, domain, excel_id): excel_ref = DownloadBase.get(excel_id) spreadsheet = importer_util.get_spreadsheet(excel_ref, config.named_columns) return do_import(spreadsheet, config, domain, task=bulk_import_async)
def get_export_files(self, format='', previous_export_id=None, filter=None, use_cache=True, max_column_size=2000, separator='|', process=None, **kwargs): # the APIs of how these methods are broken down suck, but at least # it's DRY from couchexport.export import get_writer, get_export_components, get_headers, get_formatted_rows from django.core.cache import cache import hashlib export_tag = self.index CACHE_TIME = 1 * 60 * 60 # cache for 1 hour, in seconds def _build_cache_key(tag, prev_export_id, format, max_column_size): def _human_readable_key(tag, prev_export_id, format, max_column_size): return "couchexport_:%s:%s:%s:%s" % (tag, prev_export_id, format, max_column_size) return hashlib.md5( _human_readable_key(tag, prev_export_id, format, max_column_size)).hexdigest() # check cache, only supported for filterless queries, currently cache_key = _build_cache_key(export_tag, previous_export_id, format, max_column_size) if use_cache and filter is None: cached_data = cache.get(cache_key) if cached_data: (tmp, checkpoint) = cached_data return ExportFiles(tmp, checkpoint) fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmp: schema_index = export_tag config, updated_schema, export_schema_checkpoint = get_export_components( schema_index, previous_export_id, filter) if config: writer = get_writer(format) # get cleaned up headers formatted_headers = self.remap_tables( get_headers(updated_schema, separator=separator)) writer.open(formatted_headers, tmp, max_column_size=max_column_size) total_docs = len(config.potentially_relevant_ids) if process: DownloadBase.set_progress(process, 0, total_docs) for i, doc in config.enum_docs(): if self.transform: doc = self.transform(doc) writer.write( self.remap_tables( get_formatted_rows(doc, updated_schema, include_headers=False, separator=separator))) if process: DownloadBase.set_progress(process, i + 1, total_docs) writer.close() checkpoint = export_schema_checkpoint if checkpoint: if use_cache: cache.set(cache_key, (path, checkpoint), CACHE_TIME) return ExportFiles(path, checkpoint) return None
def write_export_instance(writer, export_instance, documents, progress_tracker=None): """ Write rows to the given open _Writer. Rows will be written to each table in the export instance for each of the given documents. :param writer: An open _Writer :param export_instance: An ExportInstance :param documents: An iterable yielding documents :param progress_tracker: A task for soil to track progress against :return: None """ if progress_tracker: DownloadBase.set_progress(progress_tracker, 0, documents.count) start = _time_in_milliseconds() total_bytes = 0 total_rows = 0 compute_total = 0 write_total = 0 for row_number, doc in enumerate(documents): total_bytes += sys.getsizeof(doc) for table in export_instance.selected_tables: compute_start = _time_in_milliseconds() try: rows = table.get_rows( doc, row_number, split_columns=export_instance.split_multiselects, transform_dates=export_instance.transform_dates, ) except Exception as e: notify_exception(None, "Error exporting doc", details={ 'domain': export_instance.domain, 'export_instance_id': export_instance.get_id, 'export_table': table.label, 'doc_id': doc.get('_id'), }) e.sentry_capture = False raise compute_total += _time_in_milliseconds() - compute_start write_start = _time_in_milliseconds() for row in rows: # It might be bad to write one row at a time when you can do more (from a performance perspective) # Regardless, we should handle the batching of rows in the _Writer class, not here. writer.write(table, row) write_total += _time_in_milliseconds() - write_start total_rows += len(rows) if progress_tracker: DownloadBase.set_progress(progress_tracker, row_number + 1, documents.count) end = _time_in_milliseconds() tags = ['format:{}'.format(writer.format)] _record_datadog_export_write_rows(write_total, total_bytes, total_rows, tags) _record_datadog_export_compute_rows(compute_total, total_bytes, total_rows, tags) _record_datadog_export_duration(end - start, total_bytes, total_rows, tags)
def download_locations_async(domain, download_id, include_consumption, headers_only): DownloadBase.set_progress(download_locations_async, 0, 100) dump_locations(domain, download_id, include_consumption=include_consumption, headers_only=headers_only, task=download_locations_async) DownloadBase.set_progress(download_locations_async, 100, 100)
def build_application_zip(include_multimedia_files, include_index_files, app, download_id, build_profile_id=None, compress_zip=False, filename="commcare.zip", download_targeted_version=False): from corehq.apps.hqmedia.views import iter_app_files DownloadBase.set_progress(build_application_zip, 0, 100) errors = [] compression = zipfile.ZIP_DEFLATED if compress_zip else zipfile.ZIP_STORED use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: fpath = os.path.join( settings.SHARED_DRIVE_CONF.transfer_dir, "{}{}{}{}{}".format(app._id, 'mm' if include_multimedia_files else '', 'ccz' if include_index_files else '', app.version, build_profile_id)) if download_targeted_version: fpath += '-targeted' else: _, fpath = tempfile.mkstemp() if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there files, errors = iter_app_files( app, include_multimedia_files, include_index_files, build_profile_id, download_targeted_version=download_targeted_version, ) with open(fpath, 'wb') as tmp: with zipfile.ZipFile(tmp, "w") as z: for path, data in files: # don't compress multimedia files extension = os.path.splitext(path)[1] file_compression = zipfile.ZIP_STORED if extension in MULTIMEDIA_EXTENSIONS else compression z.writestr(path, data, file_compression) common_kwargs = dict( mimetype='application/zip' if compress_zip else 'application/x-zip-compressed', content_disposition='attachment; filename="{fname}"'.format( fname=filename), download_id=download_id, expiry=(1 * 60 * 60)) if use_transfer: expose_file_download(fpath, use_transfer=use_transfer, **common_kwargs) else: expose_cached_download( FileWrapper(open(fpath, 'rb')), file_extension=file_extention_from_filename(filename), **common_kwargs) DownloadBase.set_progress(build_application_zip, 100, 100) return { "errors": errors, }
def _get_saved_export_download_data(export_instance_id): download_id = 'rebuild_export_tracker.{}'.format(export_instance_id) download_data = DownloadBase.get(download_id) if download_data is None: download_data = DownloadBase(download_id=download_id) return download_data
def mark_complete(self): if self.task: DownloadBase.set_progress(self.task, 100, 100) self.log("processed %s / %s in %s", self.progress, self.total_rows, datetime.now() - self.start)
def parse_users(group_memoizer, domain, user_data_model, location_cache, user_filters, task, total_count): def _get_group_names(user): return sorted([ group_memoizer.get(id).name for id in Group.by_user_id(user.user_id, wrap=False) ], key=alphanumeric_sort_key) def _get_devices(user): """ Returns a comma-separated list of IMEI numbers of the user's devices, sorted with most-recently-used first """ return ', '.join([ device.device_id for device in sorted( user.devices, key=lambda d: d.last_used, reverse=True) ]) def _make_user_dict(user, group_names, location_cache): model_data, uncategorized_data = ( user_data_model.get_model_and_uncategorized(user.user_data)) role = user.get_role(domain) activity = user.reporting_metadata location_codes = [] try: location_codes.append(location_cache.get(user.location_id)) except SQLLocation.DoesNotExist: pass for location_id in user.assigned_location_ids: # skip if primary location_id, as it is already added to the start of list above if location_id != user.location_id: try: location_codes.append(location_cache.get(location_id)) except SQLLocation.DoesNotExist: pass def _format_date(date): return date.strftime('%Y-%m-%d %H:%M:%S') if date else '' return { 'data': model_data, 'uncategorized_data': uncategorized_data, 'group': group_names, 'name': user.full_name, 'password': "******", # dummy display string for passwords 'phone-number': user.phone_number, 'email': user.email, 'username': user.raw_username, 'language': user.language, 'user_id': user._id, 'is_active': str(user.is_active), 'User IMEIs (read only)': _get_devices(user), 'location_code': location_codes, 'role': role.name if role else '', 'registered_on (read only)': _format_date(user.created_on), 'last_submission (read only)': _format_date(activity.last_submission_for_user.submission_date), 'last_sync (read only)': activity.last_sync_for_user.sync_date, } unrecognized_user_data_keys = set() user_groups_length = 0 max_location_length = 0 user_dicts = [] for n, user in enumerate( get_commcare_users_by_filters(domain, user_filters)): group_names = _get_group_names(user) user_dict = _make_user_dict(user, group_names, location_cache) user_dicts.append(user_dict) unrecognized_user_data_keys.update(user_dict['uncategorized_data']) user_groups_length = max(user_groups_length, len(group_names)) max_location_length = max(max_location_length, len(user_dict["location_code"])) DownloadBase.set_progress(task, n, total_count) user_headers = [ 'username', 'password', 'name', 'phone-number', 'email', 'language', 'role', 'user_id', 'is_active', 'User IMEIs (read only)', 'registered_on (read only)', 'last_submission (read only)', 'last_sync (read only)' ] user_data_fields = [ f.slug for f in user_data_model.get_fields(include_system=False) ] user_headers.extend(build_data_headers(user_data_fields)) user_headers.extend( build_data_headers(unrecognized_user_data_keys, header_prefix='uncategorized_data')) user_headers.extend( json_to_headers({'group': list(range(1, user_groups_length + 1))})) if domain_has_privilege(domain, privileges.LOCATIONS): user_headers.extend( json_to_headers( {'location_code': list(range(1, max_location_length + 1))})) def _user_rows(): for user_dict in user_dicts: row = dict(flatten_json(user_dict)) yield [row.get(header) or '' for header in user_headers] return user_headers, _user_rows()
def build_application_zip(include_multimedia_files, include_index_files, app, download_id, build_profile_id=None, compress_zip=False, filename="commcare.zip", download_targeted_version=False): from corehq.apps.hqmedia.views import iter_app_files DownloadBase.set_progress(build_application_zip, 0, 100) initial_progress = 10 # early on indicate something is happening file_progress = 50.0 # arbitrarily say building files takes half the total time errors = [] compression = zipfile.ZIP_DEFLATED if compress_zip else zipfile.ZIP_STORED use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled if use_transfer: fpath = os.path.join( settings.SHARED_DRIVE_CONF.transfer_dir, "{}{}{}{}{}".format(app._id, 'mm' if include_multimedia_files else '', 'ccz' if include_index_files else '', app.version, build_profile_id)) if download_targeted_version: fpath += '-targeted' else: dummy, fpath = tempfile.mkstemp() DownloadBase.set_progress(build_application_zip, initial_progress, 100) if not (os.path.isfile(fpath) and use_transfer): # Don't rebuild the file if it is already there files, errors, file_count = iter_app_files( app, include_multimedia_files, include_index_files, build_profile_id, download_targeted_version=download_targeted_version, ) if toggles.CAUTIOUS_MULTIMEDIA.enabled(app.domain): manifest = json.dumps( { 'include_multimedia_files': include_multimedia_files, 'include_index_files': include_index_files, 'download_id': download_id, 'build_profile_id': build_profile_id, 'compress_zip': compress_zip, 'filename': filename, 'download_targeted_version': download_targeted_version, 'app': app.to_json(), }, indent=4) files = itertools.chain(files, [('manifest.json', manifest)]) with open(fpath, 'wb') as tmp: with zipfile.ZipFile(tmp, "w") as z: progress = initial_progress for path, data in files: # don't compress multimedia files extension = os.path.splitext(path)[1] file_compression = zipfile.ZIP_STORED if extension in MULTIMEDIA_EXTENSIONS else compression z.writestr(path, data, file_compression) progress += file_progress / file_count DownloadBase.set_progress(build_application_zip, progress, 100) # Integrity check that all media files present in media_suite.xml were added to the zip if include_multimedia_files and include_index_files and toggles.CAUTIOUS_MULTIMEDIA.enabled( app.domain): with open(fpath, 'rb') as tmp: with zipfile.ZipFile(tmp, "r") as z: media_suites = [ f for f in z.namelist() if re.search(r'\bmedia_suite.xml\b', f) ] if len(media_suites) != 1: message = _( 'Could not identify media_suite.xml in CCZ') errors.append(message) notify_exception(None, "[ICDS-291] {}".format(message)) else: with z.open(media_suites[0]) as media_suite: from corehq.apps.app_manager.xform import parse_xml parsed = parse_xml(media_suite.read()) resources = { node.text for node in parsed.findall( "media/resource/location[@authority='local']" ) } names = z.namelist() missing = [ r for r in resources if re.sub(r'^\.\/', '', r) not in names ] if missing: soft_assert(notify_admins=True)( False, '[ICDS-291] Files missing from CCZ', [{ 'missing file count': len(missing), 'app_id': app._id, 'version': app.version, 'build_profile_id': build_profile_id, }, { 'files': missing, }]) errors += [ _('Media file missing from CCZ: {}').format(r) for r in missing ] if errors: os.remove(fpath) update_task_state(build_application_zip, states.FAILURE, {'errors': errors}) raise Ignore( ) # We want the task to fail hard, so ignore any future updates to it else: DownloadBase.set_progress(build_application_zip, initial_progress + file_progress, 100) common_kwargs = { 'mimetype': 'application/zip' if compress_zip else 'application/x-zip-compressed', 'content_disposition': 'attachment; filename="{fname}"'.format(fname=filename), 'download_id': download_id, 'expiry': (1 * 60 * 60), } if use_transfer: expose_file_download(fpath, use_transfer=use_transfer, **common_kwargs) else: expose_cached_download( FileWrapper(open(fpath, 'rb')), file_extension=file_extention_from_filename(filename), **common_kwargs) DownloadBase.set_progress(build_application_zip, 100, 100)
def archive_or_restore_forms(domain, user_id, username, form_ids, archive_or_restore, task=None, from_excel=False): response = { 'errors': [], 'success': [], } missing_forms = set(form_ids) success_count = 0 if task: DownloadBase.set_progress(task, 0, len(form_ids)) for xform in FormAccessors(domain).iter_forms(form_ids): missing_forms.discard(xform.form_id) if xform.domain != domain: response['errors'].append( _(u"XFORM {form_id} does not belong to domain {domain}"). format(form_id=xform.form_id, domain=domain)) continue xform_string = _( u"XFORM {form_id} for domain {domain} by user '{username}'" ).format(form_id=xform.form_id, domain=xform.domain, username=username) try: if archive_or_restore.is_archive_mode(): xform.archive(user_id=user_id) message = _(u"Successfully archived {form}").format( form=xform_string) else: xform.unarchive(user_id=user_id) message = _(u"Successfully unarchived {form}").format( form=xform_string) response['success'].append(message) success_count = success_count + 1 except Exception as e: response['errors'].append( _(u"Could not archive {form}: {error}").format( form=xform_string, error=e)) if task: DownloadBase.set_progress(task, success_count, len(form_ids)) for missing_form_id in missing_forms: response['errors'].append( _(u"Could not find XForm {form_id}").format( form_id=missing_form_id)) if from_excel: return response response["success_count_msg"] = _("{success_msg} {count} form(s)".format( success_msg=archive_or_restore.success_text, count=success_count)) return {"messages": response}
return attr num_cases = len(cases) def get_matching_owner(case): if by_user_id: if case.user_id in by_user_id: return case.user_id elif get_owner_id(case) in by_user_id: return get_owner_id(case) else: return get_owner_id(case) for i, case in enumerate(cases): if process: DownloadBase.set_progress(process, i, num_cases) if get_owner_id(case) in owner_ids: matching_owner = get_matching_owner(case) case_row = {'dynamic_properties': {}} for key in case_static_keys: if key == 'username': try: case_row[key] = by_user_id[matching_owner].raw_username except (TypeError, KeyError): case_row[key] = '' elif key == 'owner_name': if users and case.owner_id in by_user_id: case_row[key] = by_user_id[case.owner_id].full_name elif case.owner_id in by_group_id: case_row[key] = by_group_id[case.owner_id].name else:
def excel_fields(request, domain): named_columns = request.POST['named_columns'] case_type = request.POST['case_type'] search_column = request.POST['search_column'] search_field = request.POST['search_field'] create_new_cases = request.POST.get('create_new_cases') == 'on' key_value_columns = request.POST.get('key_value_columns') == 'on' key_column = '' value_column = '' download_ref = DownloadBase.get(request.session.get(EXCEL_SESSION_ID)) spreadsheet = importer_util.get_spreadsheet(download_ref, named_columns) if not spreadsheet: return _spreadsheet_expired(request, domain) columns = spreadsheet.get_header_columns() if key_value_columns: key_column = request.POST['key_column'] value_column = request.POST['value_column'] excel_fields = [] key_column_index = columns.index(key_column) # if key/value columns were specified, get all the unique keys listed if key_column_index: excel_fields = spreadsheet.get_unique_column_values( key_column_index) # concatenate unique key fields with the rest of the columns excel_fields = columns + excel_fields # remove key/value column names from list excel_fields.remove(key_column) if value_column in excel_fields: excel_fields.remove(value_column) else: excel_fields = columns case_fields = importer_util.get_case_properties(domain, case_type) # hide search column and matching case fields from the update list try: excel_fields.remove(search_column) except: pass try: case_fields.remove(search_field) except: pass # we can't actually update this so don't show it try: case_fields.remove('type') except: pass return render( request, "importer/excel_fields.html", { 'named_columns': named_columns, 'case_type': case_type, 'search_column': search_column, 'search_field': search_field, 'create_new_cases': create_new_cases, 'key_column': key_column, 'value_column': value_column, 'columns': columns, 'excel_fields': excel_fields, 'case_fields': case_fields, 'domain': domain, 'report': { 'name': 'Import: Match columns to fields' }, 'slug': base.ImportCases.slug })
def _set_progress(progress): if task is not None: DownloadBase.set_progress(task, progress, total)
def parse_mobile_users(domain, user_filters, task=None, total_count=None): from corehq.apps.users.views.mobile.custom_data_fields import UserFieldsView fields_definition = CustomDataFieldsDefinition.get_or_create( domain, UserFieldsView.field_type) location_cache = LocationIdToSiteCodeCache(domain) unrecognized_user_data_keys = set() user_groups_length = 0 max_location_length = 0 user_dicts = [] domains_list = [domain] is_multi_domain_download = False if 'domains' in user_filters: domains_list = user_filters['domains'] if domains_list != [domain]: is_multi_domain_download = True current_user_downloaded_count = 0 for current_domain in domains_list: for n, user in enumerate( get_commcare_users_by_filters(current_domain, user_filters)): group_memoizer = load_memoizer(current_domain) group_names = sorted([ group_memoizer.get(id).name for id in Group.by_user_id(user.user_id, wrap=False) ], key=alphanumeric_sort_key) user_dict = make_mobile_user_dict(user, group_names, location_cache, current_domain, fields_definition) user_dicts.append(user_dict) unrecognized_user_data_keys.update(user_dict['uncategorized_data']) user_groups_length = max(user_groups_length, len(group_names)) max_location_length = max(max_location_length, len(user_dict["location_code"])) if task: DownloadBase.set_progress(task, n + current_user_downloaded_count, total_count) current_user_downloaded_count += n + 1 user_headers = [ 'username', 'password', 'name', 'phone-number', 'email', 'language', 'role', 'user_id', 'is_active', 'User IMEIs (read only)', 'registered_on (read only)', 'last_submission (read only)', 'last_sync (read only)' ] if domain_has_privilege(domain, privileges.APP_USER_PROFILES): user_headers += ['user_profile'] user_data_fields = [ f.slug for f in fields_definition.get_fields(include_system=False) ] user_headers.extend(build_data_headers(user_data_fields)) user_headers.extend( build_data_headers(unrecognized_user_data_keys, header_prefix='uncategorized_data')) user_headers.extend( json_to_headers({'group': list(range(1, user_groups_length + 1))})) if domain_has_privilege(domain, privileges.LOCATIONS): user_headers.extend( json_to_headers( {'location_code': list(range(1, max_location_length + 1))})) if is_multi_domain_download: user_headers += ['domain'] return user_headers, get_user_rows(user_dicts, user_headers)