def test_raw_to_dict_throws_exception_when_invalid_xml(self): # Arrange raw_xml = "<root><test>Hello</test?</root>" # Act # Assert with self.assertRaises(exceptions.XMLError): raw_xml_to_dict(raw_xml)
def test_raw_to_dict_with_bad_type_raises_error(self): # Arrange raw_xml = "<root><test>Hello</test?</root>" # Act # Assert with self.assertRaises(exceptions.CoreError): raw_xml_to_dict(raw_xml, postprocessor=1)
def transform_dict_identifier_to_oai_identifier(data): """Transforms a dict to an OaiIdentify object. Args: data: Data to transform. Returns: OaiIdentify instance. """ return OaiIdentify( admin_email=data["adminEmail"], base_url=data["baseURL"], repository_name=data["repositoryName"], deleted_record=data["deletedRecord"], delimiter=data["delimiter"], description=data["description"], earliest_datestamp=data["earliestDatestamp"], granularity=data["granularity"], oai_identifier=data["oai_identifier"], protocol_version=data["protocolVersion"], repository_identifier=data["repositoryIdentifier"], sample_identifier=data["sampleIdentifier"], scheme=data["scheme"], raw=raw_xml_to_dict(data["raw"]), )
def test_raw_to_dict_valid(self): # Arrange raw_xml = '<root><test>Hello</test></root>' expected_dict = OrderedDict([(u'root', OrderedDict([(u'test', u'Hello')]))]) # Act xml_dict = raw_xml_to_dict(raw_xml) # Assert self.assertEquals(expected_dict, xml_dict)
def test_raw_to_dict_valid(self): # Arrange raw_xml = "<root><test>Hello</test></root>" expected_dict = OrderedDict([("root", OrderedDict([("test", "Hello")])) ]) # Act xml_dict = raw_xml_to_dict(raw_xml) # Assert self.assertEquals(expected_dict, xml_dict)
def test_raw_to_dict_with_numeric_post_processor(self): # Arrange raw_xml = "<root><test>Hello</test><test>1</test></root>" expected_dict = OrderedDict([("root", OrderedDict([("test", ["Hello", 1])]))]) # Act xml_dict = raw_xml_to_dict(raw_xml, postprocessor="NUMERIC") # Assert self.assertEquals(expected_dict, xml_dict)
def insert_oai_records(self): oai_records = OaiPmhMock.mock_oai_record(version=1) saved_oai_records = [] for oai_record in oai_records: oai_record.title = oai_record.identifier oai_record.registry = self.registry oai_record.harvester_metadata_format = self.oai_metadata_formats[0] oai_record.dict_content = xml_utils.raw_xml_to_dict( oai_record.xml_content, xml_utils.post_processor) saved_oai_records.append(oai_record.save()) return saved_oai_records
def get_role(curate_data_structure): """Get the role saved in the curate_data_structure's form string Args: curate_data_structure: Returns: """ return role_extraction( xml_utils.raw_xml_to_dict(curate_data_structure.form_string, xml_utils.post_processor))
def test_raw_to_dict_with_callable_post_processor(self): def test_processor(path, key, value): return key, "test" # Arrange raw_xml = "<root><test>Hello</test><test>1</test></root>" expected_dict = OrderedDict([("root", "test")]) # Act xml_dict = raw_xml_to_dict(raw_xml, postprocessor=test_processor) # Assert self.assertEquals(expected_dict, xml_dict)
def transform_dict_set_to_oai_harvester_set(data): """ Transforms a dict to a list of OaiHarvesterSet object. Args: data: Data to transform. Returns: List of OaiHarvesterSet instances. """ return [ OaiHarvesterSet(set_name=obj['setName'], set_spec=obj['setSpec'], raw=raw_xml_to_dict(obj['raw'])) for obj in data ]
def convert_to_dict(self): """ Convert the xml contained in xml_content into a dictionary. Returns: """ # transform xml content into a dictionary dict_content = xml_utils.raw_xml_to_dict(self.xml_content, xml_utils.post_processor) # if limit on element occurrences is set if SEARCHABLE_DATA_OCCURRENCES_LIMIT is not None: # Remove lists which size exceed the limit size xml_utils.remove_lists_from_xml_dict(dict_content, SEARCHABLE_DATA_OCCURRENCES_LIMIT) # store dictionary self.dict_content = dict_content
def upsert(oai_identify): """Create or update an OaiIdentify. Args: oai_identify: OaiIdentify to create or update. Returns: OaiIdentify instance. """ if oai_identify.raw and isinstance(oai_identify.raw, str): try: oai_identify.raw = raw_xml_to_dict(oai_identify.raw) except exceptions.XMLError: oai_identify.raw = {} return oai_identify.save()
def transform_dict_metadata_format_to_oai_harvester_metadata_format(data): """ Transforms a dict to a list of OaiHarvesterMetadataFormat object. Args: data: Data to transform. Returns: List of OaiHarvesterMetadataFormat instances. """ return [ OaiHarvesterMetadataFormat(metadata_prefix=obj['metadataPrefix'], metadata_namespace=obj['metadataNamespace'], schema=obj['schema'], raw=raw_xml_to_dict(obj['raw'])) for obj in data ]
def transform(self, xml_inputs, session_key): """Transforms the input to a json content Args: xml_inputs: session_key: Returns: """ results_transform = [] # loops on all xml input for xml_item in xml_inputs: # generate the title document with the sha document_name_with_sha = AbstractExporter.get_title_document( xml_item["title"], xml_item["xml_content"]) transform_result = TransformResult() # set the document name to the collection transform_result.source_document_name = document_name_with_sha # for an JSON transformation there is a list of one element transform_result_content = TransformResultContent() transform_result_content.file_name = document_name_with_sha # Transform to JSON transformed_content = xml_utils.raw_xml_to_dict( xml_item["xml_content"], postprocessor=XML_POST_PROCESSOR, force_list=XML_FORCE_LIST, ) # sets the content and extension try: transform_result_content.content_converted = json.dumps( transformed_content, indent=4, ensure_ascii=False) except: transform_result_content.content_converted = json.dumps( transformed_content, indent=4) transform_result_content.content_extension = self.extension # add the content to the list of content transform_result.transform_result_content.append( transform_result_content) # add the result to the list of result results_transform.append(transform_result) return results_transform
def transform_dict_identifier_to_oai_identifier(data): """ Transforms a dict to an OaiIdentify object. Args: data: Data to transform. Returns: OaiIdentify instance. """ return OaiIdentify(admin_email=data['adminEmail'], base_url=data['baseURL'], repository_name=data['repositoryName'], deleted_record=data['deletedRecord'], delimiter=data['delimiter'], description=data['description'], earliest_datestamp=data['earliestDatestamp'], granularity=data['granularity'], oai_identifier=data['oai_identifier'], protocol_version=data['protocolVersion'], repository_identifier=data['repositoryIdentifier'], sample_identifier=data['sampleIdentifier'], scheme=data['scheme'], raw=raw_xml_to_dict(data['raw']))