def export_async(self, request, *args, **kwargs): params = request.query_params job_uuid = params.get('job_uuid') export_type = params.get('format') include_hxl = params.get('include_hxl', False) include_labels = params.get('include_labels', False) include_labels_only = params.get('include_labels_only', False) query = params.get("query") dataview = self.get_object() xform = dataview.xform if include_labels is not None: include_labels = str_to_bool(include_labels) if include_labels_only is not None: include_labels_only = str_to_bool(include_labels_only) if include_hxl is not None: include_hxl = str_to_bool(include_hxl) remove_group_name = params.get('remove_group_name', False) columns_with_hxl = get_columns_with_hxl(xform.survey.get('children')) if columns_with_hxl and include_hxl: include_hxl = include_hxl_row( dataview.columns, list(columns_with_hxl) ) options = { 'remove_group_name': remove_group_name, 'dataview_pk': dataview.pk, 'include_hxl': include_hxl, 'include_labels': include_labels, 'include_labels_only': include_labels_only } if query: options.update({'query': query}) if job_uuid: job = AsyncResult(job_uuid) if job.state == 'SUCCESS': export_id = job.result export = Export.objects.get(id=export_id) resp = export_async_export_response(request, export) else: resp = { 'job_status': job.state } else: resp = process_async_export(request, xform, export_type, options=options) return Response(data=resp, status=status.HTTP_202_ACCEPTED, content_type="application/json")
def export_to(self, path, dataview=None): self.ordered_columns = OrderedDict() self._build_ordered_columns(self.dd.survey, self.ordered_columns) if dataview: cursor = dataview.query_data(dataview, all_data=True) if isinstance(cursor, QuerySet): cursor = cursor.iterator() self._update_columns_from_data(cursor) columns = list( chain.from_iterable( [[xpath] if cols is None else cols for xpath, cols in self.ordered_columns.iteritems() if [c for c in dataview.columns if xpath.startswith(c)]])) cursor = dataview.query_data(dataview, all_data=True) if isinstance(cursor, QuerySet): cursor = cursor.iterator() data = self._format_for_dataframe(cursor) else: cursor = self._query_data(self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() self._update_columns_from_data(cursor) columns = list( chain.from_iterable( [[xpath] if cols is None else cols for xpath, cols in self.ordered_columns.iteritems()])) # add extra columns columns += [col for col in self.ADDITIONAL_COLUMNS] for field in self.dd.get_survey_elements_of_type('osm'): columns += OsmData.get_tag_keys(self.xform, field.get_abbreviated_xpath(), include_prefix=True) cursor = self._query_data(self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() data = self._format_for_dataframe(cursor) columns_with_hxl = self.include_hxl and get_columns_with_hxl( self.dd.survey_elements) write_to_csv(path, data, columns, columns_with_hxl=columns_with_hxl, remove_group_name=self.remove_group_name, dd=self.dd, group_delimiter=self.group_delimiter, include_labels=self.include_labels, include_labels_only=self.include_labels_only, include_hxl=self.include_hxl, win_excel_utf8=self.win_excel_utf8, total_records=self.total_records)
def export_to(self, path, dataview=None): self.ordered_columns = OrderedDict() self._build_ordered_columns(self.dd.survey, self.ordered_columns) if dataview: cursor = dataview.query_data(dataview, all_data=True, filter_query=self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() self._update_columns_from_data(cursor) columns = list(chain.from_iterable( [[xpath] if cols is None else cols for (xpath, cols) in iteritems(self.ordered_columns) if [c for c in dataview.columns if xpath.startswith(c)]] )) cursor = dataview.query_data(dataview, all_data=True, filter_query=self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() data = self._format_for_dataframe(cursor) else: cursor = self._query_data(self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() self._update_columns_from_data(cursor) columns = list(chain.from_iterable( [[xpath] if cols is None else cols for (xpath, cols) in iteritems(self.ordered_columns)])) # add extra columns columns += [col for col in self.extra_columns] for field in self.dd.get_survey_elements_of_type('osm'): columns += OsmData.get_tag_keys(self.xform, field.get_abbreviated_xpath(), include_prefix=True) cursor = self._query_data(self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() data = self._format_for_dataframe(cursor) columns_with_hxl = self.include_hxl and get_columns_with_hxl( self.dd.survey_elements) write_to_csv(path, data, columns, columns_with_hxl=columns_with_hxl, remove_group_name=self.remove_group_name, dd=self.dd, group_delimiter=self.group_delimiter, include_labels=self.include_labels, include_labels_only=self.include_labels_only, include_hxl=self.include_hxl, win_excel_utf8=self.win_excel_utf8, total_records=self.total_records, index_tags=self.index_tags)
def save(self, *args, **kwargs): skip_xls_read = kwargs.get('skip_xls_read') if self.xls and not skip_xls_read: default_name = None \ if not self.pk else self.survey.xml_instance().tagName survey_dict = process_xlsform(self.xls, default_name) if has_external_choices(survey_dict): self.has_external_choices = True survey = create_survey_element_from_dict(survey_dict) survey = check_version_set(survey) if get_columns_with_hxl(survey.get('children')): self.has_hxl_support = True # if form is being replaced, don't check for id_string uniqueness if self.pk is None: new_id_string = self.get_unique_id_string( survey.get('id_string')) self._id_string_changed = \ new_id_string != survey.get('id_string') survey['id_string'] = new_id_string # For flow results packages use the user defined id/uuid if self.xls.name.endswith('json'): self.uuid = FloipSurvey(self.xls).descriptor.get('id') if self.uuid: check_xform_uuid(self.uuid) elif self.id_string != survey.get('id_string'): raise XLSFormError( _((u"Your updated form's id_string '%(new_id)s' must match " "the existing forms' id_string '%(old_id)s'." % { 'new_id': survey.get('id_string'), 'old_id': self.id_string }))) elif default_name and default_name != survey.get('name'): survey['name'] = default_name else: survey['id_string'] = self.id_string self.json = survey.to_json() self.xml = survey.to_xml() self.version = survey.get('version') self.last_updated_at = timezone.now() self.title = survey.get('title') self._mark_start_time_boolean() set_uuid(self) self._set_uuid_in_xml() self._set_hash() if 'skip_xls_read' in kwargs: del kwargs['skip_xls_read'] super(DataDictionary, self).save(*args, **kwargs)
def save(self, *args, **kwargs): skip_xls_read = kwargs.get('skip_xls_read') if self.xls and not skip_xls_read: default_name = None \ if not self.pk else self.survey.xml_instance().tagName survey_dict = process_xlsform(self.xls, default_name) if has_external_choices(survey_dict): self.has_external_choices = True survey = create_survey_element_from_dict(survey_dict) survey = check_version_set(survey) if get_columns_with_hxl(survey.get('children')): self.has_hxl_support = True # if form is being replaced, don't check for id_string uniqueness if self.pk is None: new_id_string = self.get_unique_id_string( survey.get('id_string')) self._id_string_changed = \ new_id_string != survey.get('id_string') survey['id_string'] = new_id_string # For flow results packages use the user defined id/uuid if self.xls.name.endswith('json'): self.uuid = FloipSurvey(self.xls).descriptor.get('id') if self.uuid: check_xform_uuid(self.uuid) elif self.id_string != survey.get('id_string'): raise XLSFormError(_( (u"Your updated form's id_string '%(new_id)s' must match " "the existing forms' id_string '%(old_id)s'." % { 'new_id': survey.get('id_string'), 'old_id': self.id_string}))) elif default_name and default_name != survey.get('name'): survey['name'] = default_name else: survey['id_string'] = self.id_string self.json = survey.to_json() self.xml = survey.to_xml() self.version = survey.get('version') self.last_updated_at = timezone.now() self.title = survey.get('title') self._mark_start_time_boolean() set_uuid(self) self._set_uuid_in_xml() self._set_hash() if 'skip_xls_read' in kwargs: del kwargs['skip_xls_read'] super(DataDictionary, self).save(*args, **kwargs)
def export_async(self, request, *args, **kwargs): params = request.query_params job_uuid = params.get('job_uuid') export_type = params.get('format') include_hxl = params.get('include_hxl', False) dataview = self.get_object() xform = dataview.xform remove_group_name = params.get('remove_group_name', False) columns_with_hxl = get_columns_with_hxl(xform.survey.get('children')) if columns_with_hxl and include_hxl: include_hxl = include_hxl_row(dataview.columns, columns_with_hxl.keys()) options = { 'remove_group_name': remove_group_name, 'dataview_pk': dataview.pk, 'include_hxl': include_hxl } if job_uuid: job = AsyncResult(job_uuid) if job.state == 'SUCCESS': export_id = job.result export = Export.objects.get(id=export_id) resp = _export_async_export_response(request, xform, export, dataview_pk=dataview.pk) else: resp = {'job_status': job.state} else: resp = process_async_export(request, xform, export_type, options=options) return Response(data=resp, status=status.HTTP_202_ACCEPTED, content_type="application/json")
def custom_response_handler(request, xform, query, export_type, token=None, meta=None, dataview=False, filename=None): """ Returns a HTTP response with export file for download. """ export_type = _get_export_type(export_type) if export_type in EXTERNAL_EXPORT_TYPES and \ (token is not None) or (meta is not None): export_type = Export.EXTERNAL_EXPORT options = parse_request_export_options(request.query_params) dataview_pk = hasattr(dataview, 'pk') and dataview.pk options["dataview_pk"] = dataview_pk if dataview: columns_with_hxl = get_columns_with_hxl(xform.survey.get('children')) if columns_with_hxl: options['include_hxl'] = include_hxl_row(dataview.columns, list(columns_with_hxl)) try: query = filter_queryset_xform_meta_perms_sql(xform, request.user, query) except NoRecordsPermission: return Response(data=json.dumps( {"details": _("You don't have permission")}), status=status.HTTP_403_FORBIDDEN, content_type="application/json") if query: options['query'] = query remove_group_name = options.get("remove_group_name") export_id = request.query_params.get("export_id") if export_id: export = get_object_or_404(Export, id=export_id, xform=xform) else: if export_type == Export.GOOGLE_SHEETS_EXPORT: return Response(data=json.dumps( {"details": _("Sheets export only supported in async mode")}), status=status.HTTP_403_FORBIDDEN, content_type="application/json") # check if we need to re-generate, # we always re-generate if a filter is specified def _new_export(): return _generate_new_export(request, xform, query, export_type, dataview_pk=dataview_pk) if should_create_new_export(xform, export_type, options, request=request): export = _new_export() else: export = newest_export_for(xform, export_type, options) if not export.filename and not export.error_message: export = _new_export() log_export(request, xform, export_type) if export_type == Export.EXTERNAL_EXPORT: return external_export_response(export) if export.filename is None and export.error_message: raise exceptions.ParseError(export.error_message) # get extension from file_path, exporter could modify to # xlsx if it exceeds limits _path, ext = os.path.splitext(export.filename) ext = ext[1:] show_date = True if filename is None and export.status == Export.SUCCESSFUL: filename = _generate_filename(request, xform, remove_group_name, dataview_pk=dataview_pk) else: show_date = False response = response_with_mimetype_and_name(Export.EXPORT_MIMES[ext], filename, extension=ext, show_date=show_date, file_path=export.filepath) return response
def get_has_hxl_support(self, obj): columns_with_hxl = get_columns_with_hxl( obj.xform.survey.get('children')) return include_hxl_row(obj.columns, columns_with_hxl.keys())
def save(self, *args, **kwargs): skip_xls_read = kwargs.get('skip_xls_read') if self.xls and not skip_xls_read: default_name = None \ if not self.pk else self.survey.xml_instance().tagName try: if self.xls.name.endswith('csv'): # csv file gets closed in pyxform, make a copy self.xls.seek(0) file_object = io.BytesIO() file_object.write(self.xls.read()) file_object.seek(0) self.xls.seek(0) else: file_object = self.xls if self.xls.name.endswith('json'): survey_dict = FloipSurvey(self.xls).survey.to_json_dict() else: survey_dict = parse_file_to_json(self.xls.name, file_object=file_object) except csv.Error as e: newline_error = u'new-line character seen in unquoted field '\ u'- do you need to open the file in universal-newline '\ u'mode?' if newline_error == unicode(e): self.xls.seek(0) file_obj = StringIO(u'\n'.join( self.xls.read().splitlines())) survey_dict = parse_file_to_json(self.xls.name, default_name=default_name, file_object=file_obj) else: raise e if has_external_choices(survey_dict): self.survey_dict = survey_dict self.has_external_choices = True survey = create_survey_element_from_dict(survey_dict) survey = self._check_version_set(survey) if get_columns_with_hxl(survey.get('children')): self.has_hxl_support = True # if form is being replaced, don't check for id_string uniqueness if self.pk is None: new_id_string = self.get_unique_id_string( survey.get('id_string')) self._id_string_changed = \ new_id_string != survey.get('id_string') survey['id_string'] = new_id_string elif self.id_string != survey.get('id_string'): raise XLSFormError( _((u"Your updated form's id_string '%(new_id)s' must match " "the existing forms' id_string '%(old_id)s'." % { 'new_id': survey.get('id_string'), 'old_id': self.id_string }))) elif default_name and default_name != survey.get('name'): survey['name'] = default_name else: survey['id_string'] = self.id_string self.json = survey.to_json() self.xml = survey.to_xml() self.version = survey.get('version') self.last_updated_at = timezone.now() self.title = survey.get('title') self._mark_start_time_boolean() self._set_hash() set_uuid(self) self._set_uuid_in_xml() if 'skip_xls_read' in kwargs: del kwargs['skip_xls_read'] super(DataDictionary, self).save(*args, **kwargs)
def generate_export(export_type, xform, export_id=None, options=None, retries=0): """ Create appropriate export object given the export type. param: export_type param: xform params: export_id: ID of export object associated with the request param: options: additional parameters required for the lookup. binary_select_multiples: boolean flag end: end offset ext: export extension type dataview_pk: dataview pk group_delimiter: "/" or "." query: filter_query for custom queries remove_group_name: boolean flag split_select_multiples: boolean flag index_tag: ('[', ']') or ('_', '_') """ username = xform.user.username id_string = xform.id_string end = options.get("end") extension = options.get("extension", export_type) filter_query = options.get("query") remove_group_name = options.get("remove_group_name", False) start = options.get("start") export_type_func_map = { Export.XLS_EXPORT: 'to_xls_export', Export.CSV_EXPORT: 'to_flat_csv_export', Export.CSV_ZIP_EXPORT: 'to_zipped_csv', Export.SAV_ZIP_EXPORT: 'to_zipped_sav', Export.GOOGLE_SHEETS_EXPORT: 'to_google_sheets', } if xform is None: xform = XForm.objects.get(user__username__iexact=username, id_string__iexact=id_string) dataview = None if options.get("dataview_pk"): dataview = DataView.objects.get(pk=options.get("dataview_pk")) records = dataview.query_data(dataview, all_data=True, filter_query=filter_query) total_records = dataview.query_data(dataview, count=True)[0].get('count') else: records = query_data(xform, query=filter_query, start=start, end=end) if filter_query: total_records = query_data(xform, query=filter_query, start=start, end=end, count=True)[0].get('count') else: total_records = xform.num_of_submissions if isinstance(records, QuerySet): records = records.iterator() export_builder = ExportBuilder() export_builder.TRUNCATE_GROUP_TITLE = True \ if export_type == Export.SAV_ZIP_EXPORT else remove_group_name export_builder.GROUP_DELIMITER = options.get("group_delimiter", DEFAULT_GROUP_DELIMITER) export_builder.SPLIT_SELECT_MULTIPLES = options.get( "split_select_multiples", True) export_builder.BINARY_SELECT_MULTIPLES = options.get( "binary_select_multiples", False) export_builder.INCLUDE_LABELS = options.get('include_labels', False) export_builder.INCLUDE_LABELS_ONLY = options.get('include_labels_only', False) export_builder.INCLUDE_HXL = options.get('include_hxl', False) export_builder.INCLUDE_IMAGES \ = options.get("include_images", settings.EXPORT_WITH_IMAGE_DEFAULT) export_builder.VALUE_SELECT_MULTIPLES = options.get( 'value_select_multiples', False) export_builder.REPEAT_INDEX_TAGS = options.get("repeat_index_tags", DEFAULT_INDEX_TAGS) # 'win_excel_utf8' is only relevant for CSV exports if 'win_excel_utf8' in options and export_type != Export.CSV_EXPORT: del options['win_excel_utf8'] export_builder.set_survey(xform.survey, xform) temp_file = NamedTemporaryFile(suffix=("." + extension)) columns_with_hxl = export_builder.INCLUDE_HXL and get_columns_with_hxl( xform.survey_elements) # get the export function by export type func = getattr(export_builder, export_type_func_map[export_type]) try: func.__call__(temp_file.name, records, username, id_string, filter_query, start=start, end=end, dataview=dataview, xform=xform, options=options, columns_with_hxl=columns_with_hxl, total_records=total_records) except NoRecordsFoundError: pass except SPSSIOError as e: export = get_or_create_export(export_id, xform, export_type, options) export.error_message = str(e) export.internal_status = Export.FAILED export.save() report_exception("SAV Export Failure", e, sys.exc_info()) return export # generate filename basename = "%s_%s" % (id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")) if remove_group_name: # add 'remove group name' flag to filename basename = "{}-{}".format(basename, GROUPNAME_REMOVED_FLAG) if dataview: basename = "{}-{}".format(basename, DATAVIEW_EXPORT) filename = basename + "." + extension # check filename is unique while not Export.is_filename_unique(xform, filename): filename = increment_index_in_filename(filename) file_path = os.path.join(username, 'exports', id_string, export_type, filename) # TODO: if s3 storage, make private - how will we protect local storage?? # seek to the beginning as required by storage classes temp_file.seek(0) export_filename = default_storage.save(file_path, File(temp_file, file_path)) temp_file.close() dir_name, basename = os.path.split(export_filename) # get or create export object export = get_or_create_export(export_id, xform, export_type, options) export.filedir = dir_name export.filename = basename export.internal_status = Export.SUCCESSFUL # do not persist exports that have a filter # Get URL of the exported sheet. if export_type == Export.GOOGLE_SHEETS_EXPORT: export.export_url = export_builder.url # if we should create a new export is true, we should not save it if start is None and end is None: export.save() return export
def get_has_hxl_support(self, obj): columns_with_hxl = get_columns_with_hxl( obj.xform.survey.get('children') ) return include_hxl_row(obj.columns, list(columns_with_hxl))
def generate_export(export_type, xform, export_id=None, options=None): """ Create appropriate export object given the export type. param: export_type param: xform params: export_id: ID of export object associated with the request param: options: additional parameters required for the lookup. binary_select_multiples: boolean flag end: end offset ext: export extension type dataview_pk: dataview pk group_delimiter: "/" or "." query: filter_query for custom queries remove_group_name: boolean flag split_select_multiples: boolean flag index_tag: ('[', ']') or ('_', '_') show_choice_labels: boolean flag language: language labels as in the XLSForm/XForm """ username = xform.user.username id_string = xform.id_string end = options.get("end") extension = options.get("extension", export_type) filter_query = options.get("query") remove_group_name = options.get("remove_group_name", False) start = options.get("start") export_type_func_map = { Export.XLS_EXPORT: 'to_xls_export', Export.CSV_EXPORT: 'to_flat_csv_export', Export.CSV_ZIP_EXPORT: 'to_zipped_csv', Export.SAV_ZIP_EXPORT: 'to_zipped_sav', Export.GOOGLE_SHEETS_EXPORT: 'to_google_sheets', } if xform is None: xform = XForm.objects.get( user__username__iexact=username, id_string__iexact=id_string) dataview = None if options.get("dataview_pk"): dataview = DataView.objects.get(pk=options.get("dataview_pk")) records = dataview.query_data(dataview, all_data=True, filter_query=filter_query) total_records = dataview.query_data(dataview, count=True)[0].get('count') else: records = query_data(xform, query=filter_query, start=start, end=end) if filter_query: total_records = query_data(xform, query=filter_query, start=start, end=end, count=True)[0].get('count') else: total_records = xform.num_of_submissions if isinstance(records, QuerySet): records = records.iterator() export_builder = ExportBuilder() export_builder.TRUNCATE_GROUP_TITLE = True \ if export_type == Export.SAV_ZIP_EXPORT else remove_group_name export_builder.GROUP_DELIMITER = options.get( "group_delimiter", DEFAULT_GROUP_DELIMITER ) export_builder.SPLIT_SELECT_MULTIPLES = options.get( "split_select_multiples", True ) export_builder.BINARY_SELECT_MULTIPLES = options.get( "binary_select_multiples", False ) export_builder.INCLUDE_LABELS = options.get('include_labels', False) include_reviews = options.get('include_reviews', False) export_builder.INCLUDE_LABELS_ONLY = options.get( 'include_labels_only', False ) export_builder.INCLUDE_HXL = options.get('include_hxl', False) export_builder.INCLUDE_IMAGES \ = options.get("include_images", settings.EXPORT_WITH_IMAGE_DEFAULT) export_builder.VALUE_SELECT_MULTIPLES = options.get( 'value_select_multiples', False) export_builder.REPEAT_INDEX_TAGS = options.get( "repeat_index_tags", DEFAULT_INDEX_TAGS ) export_builder.SHOW_CHOICE_LABELS = options.get('show_choice_labels', False) export_builder.language = options.get('language') # 'win_excel_utf8' is only relevant for CSV exports if 'win_excel_utf8' in options and export_type != Export.CSV_EXPORT: del options['win_excel_utf8'] export_builder.INCLUDE_REVIEWS = include_reviews export_builder.set_survey(xform.survey, xform, include_reviews=include_reviews) temp_file = NamedTemporaryFile(suffix=("." + extension)) columns_with_hxl = export_builder.INCLUDE_HXL and get_columns_with_hxl( xform.survey_elements) # get the export function by export type func = getattr(export_builder, export_type_func_map[export_type]) try: func.__call__( temp_file.name, records, username, id_string, filter_query, start=start, end=end, dataview=dataview, xform=xform, options=options, columns_with_hxl=columns_with_hxl, total_records=total_records ) except NoRecordsFoundError: pass except SPSSIOError as e: export = get_or_create_export(export_id, xform, export_type, options) export.error_message = str(e) export.internal_status = Export.FAILED export.save() report_exception("SAV Export Failure", e, sys.exc_info()) return export # generate filename basename = "%s_%s" % ( id_string, datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")) if remove_group_name: # add 'remove group name' flag to filename basename = "{}-{}".format(basename, GROUPNAME_REMOVED_FLAG) if dataview: basename = "{}-{}".format(basename, DATAVIEW_EXPORT) filename = basename + "." + extension # check filename is unique while not Export.is_filename_unique(xform, filename): filename = increment_index_in_filename(filename) file_path = os.path.join( username, 'exports', id_string, export_type, filename) # seek to the beginning as required by storage classes temp_file.seek(0) export_filename = default_storage.save(file_path, File(temp_file, file_path)) temp_file.close() dir_name, basename = os.path.split(export_filename) # get or create export object export = get_or_create_export(export_id, xform, export_type, options) export.filedir = dir_name export.filename = basename export.internal_status = Export.SUCCESSFUL # do not persist exports that have a filter # Get URL of the exported sheet. if export_type == Export.GOOGLE_SHEETS_EXPORT: export.export_url = export_builder.url # if we should create a new export is true, we should not save it if start is None and end is None: export.save() return export