def test_strip_xml_nodes_by_xpaths_similar_names(self): expected_subgroup = ('<root>' ' <group1>' ' <subgroup1>' ' <question_1>Answer 1</question_1>' ' <question_2>Answer 2</question_2>' ' </subgroup1>' ' </group1>' '</root>') expected_group = ('<root>' ' <group1>' ' <subgroup1>' ' <question_1>Answer 1</question_1>' ' <question_2>Answer 2</question_2>' ' </subgroup1>' ' <subgroup11>' ' <question_3>Answer 3</question_3>' ' <question_4>Answer 4</question_4>' ' </subgroup11>' ' <question_5>Answer 5</question_5>' ' </group1>' '</root>') self.__compare_xml( strip_nodes(self.__submission, ['group1/subgroup1'], use_xpath=True), expected_subgroup, ) self.__compare_xml( strip_nodes(self.__submission, ['group1'], use_xpath=True), expected_group, )
def test_strip_xml_nodes_by_xpaths_with_slashes(self): expected = ('<root>' ' <group1>' ' <question_5>Answer 5</question_5>' ' </group1>' '</root>') # With trailing slash self.__compare_xml( strip_nodes(self.__submission, ['group1/question_5/'], use_xpath=True), expected, ) # With leading slash self.__compare_xml( strip_nodes(self.__submission, ['/group1/question_5'], use_xpath=True), expected, ) # With both self.__compare_xml( strip_nodes(self.__submission, ['/group1/question_5/'], use_xpath=True), expected, )
def test_strip_xml_nodes_and_rename_root_node(self): source = '<abcdef><a><b><c>abcdef</c></b></a></abcdef>' expected = '<root><a><b><c>abcdef</c></b></a></root>' result = strip_nodes( source=source, nodes_to_keep=['a', 'b', 'c'], rename_root_node_to='root', ) self.__compare_xml(result, expected)
def test_strip_xml_nodes_by_fields(self): expected = ('<root>' ' <group1>' ' <subgroup1>' ' <question_1>Answer 1</question_1>' ' </subgroup1>' ' <question_5>Answer 5</question_5>' ' </group1>' '</root>') self.__compare_xml( strip_nodes(self.__submission, ['question_1', 'question_5']), expected, ) expected = ('<root>' ' <group1>' ' <question_5>Answer 5</question_5>' ' </group1>' '</root>') self.__compare_xml( strip_nodes(self.__submission, ['question_5']), expected, )
def test_strip_xml_nodes_by_xpaths(self): expected = ('<root>' ' <group1>' ' <subgroup1>' ' <question_1>Answer 1</question_1>' ' </subgroup1>' ' <question_5>Answer 5</question_5>' ' </group1>' '</root>') self.__compare_xml( strip_nodes( self.__submission, ['group1/subgroup1/question_1', 'group1/question_5'], use_xpath=True, ), expected, )
def external(self, request, paired_data_uid, **kwargs): """ Returns an XML which contains data submitted to paired asset Creates the endpoints - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external/ - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external.xml/ """ paired_data = self.get_object() # Retrieve the source if it exists source_asset = paired_data.get_source() if not source_asset: # We can enter this condition when source data sharing has been # deactivated after it has been paired with current form. # We don't want to keep zombie files on storage. try: asset_file = self.asset.asset_files.get(uid=paired_data_uid) except AssetFile.DoesNotExist: pass else: asset_file.delete() raise Http404 if not source_asset.has_deployment or not self.asset.has_deployment: raise Http404 old_hash = None # Retrieve data from related asset file. # If data has already been fetched once, an `AssetFile` should exist. # Otherwise, we create one to store the generated XML. try: asset_file = self.asset.asset_files.get(uid=paired_data_uid) except AssetFile.DoesNotExist: asset_file = AssetFile( uid=paired_data_uid, asset=self.asset, file_type=AssetFile.PAIRED_DATA, user=self.asset.owner, ) # When asset file is new, we consider its content as expired to # force its creation below has_expired = True else: if not asset_file.content: # if `asset_file` exists but does not have any content, it means # `paired_data` has changed since last time this endpoint has been # called. E.g.: Project owner has changed the questions they want # to include in the `xml-external` file has_expired = True else: old_hash = asset_file.md5_hash timedelta = timezone.now() - asset_file.date_modified has_expired = (timedelta.total_seconds() > settings.PAIRED_DATA_EXPIRATION) # ToDo evaluate adding headers for caching and a HTTP 304 status code if not has_expired: return Response(asset_file.content.file.read().decode()) # If the content of `asset_file' has expired, let's regenerate the XML submissions = source_asset.deployment.get_submissions( self.asset.owner, format_type=SUBMISSION_FORMAT_TYPE_XML) parsed_submissions = [] for submission in submissions: # Use `rename_root_node_to='data'` to rename the root node of each # submission to `data` so that form authors do not have to rewrite # their `xml-external` formulas any time the asset UID changes, # e.g. when cloning a form or creating a project from a template. # Set `use_xpath=True` because `paired_data.fields` uses full group # hierarchies, not just question names. parsed_submissions.append( strip_nodes( submission, paired_data.allowed_fields, use_xpath=True, rename_root_node_to='data', )) filename = paired_data.filename parsed_submissions_to_str = ''.join(parsed_submissions) root_tag_name = SubmissionXMLRenderer.root_tag_name xml_ = add_xml_declaration(f'<{root_tag_name}>' f'{parsed_submissions_to_str}' f'</{root_tag_name}>') if not parsed_submissions: # We do not want to cache an empty file return Response(xml_) # We need to delete the current file (if it exists) when filename # has changed. Otherwise, it would leave an orphan file on storage if asset_file.pk and asset_file.content.name != filename: asset_file.content.delete() asset_file.content = ContentFile(xml_.encode(), name=filename) # `xml_` is already there in memory, let's use its content to get its # hash and store it within `asset_file` metadata asset_file.set_md5_hash(calculate_hash(xml_, prefix=True)) asset_file.save() if old_hash != asset_file.md5_hash: # resync paired data to the deployment backend self.asset.deployment.sync_media_files(AssetFile.PAIRED_DATA) return Response(xml_)
def _parse_data(self, submission, fields): return strip_nodes(submission, fields, xml_declaration=True)