示例#1
0
def transform_xml(xml_asset_path, identifier):
    "modify the XML"
    # remove history tags from XML for certain article types
    root = parse.parse_article_xml(xml_asset_path)
    soup = parser.parse_document(xml_asset_path)
    root = transform_xml_history_tags(root, soup, identifier)
    write_xml_file(root, xml_asset_path, identifier)
示例#2
0
 def test_parse_article_xml_entities(self):
     xml_file_path = os.path.join(self.temp_dir, "test.xml")
     with open(xml_file_path, "w") as open_file:
         open_file.write("<article>&mdash;&lt;&gt;&amp;&quot;&beta;</article>")
     expected = b'<article>&#8212;&lt;&gt;&amp;"&#946;</article>'
     root = parse.parse_article_xml(xml_file_path)
     self.assertIsNotNone(root)
     self.assertEqual(ElementTree.tostring(root), expected)
示例#3
0
    def test_file_list(self):
        zip_file = "tests/test_data/30-01-2019-RA-eLife-45644.zip"
        asset_file_name_map = zip_lib.unzip_zip(zip_file, self.temp_dir)
        xml_asset = parse.article_xml_asset(asset_file_name_map)
        root = parse.parse_article_xml(xml_asset[1])
        expected = read_fixture("file_list_45644.py")

        files = parse.file_list(root)
        self.assertEqual(files, expected)
示例#4
0
def transform_code_files(asset_file_name_map, output_dir, identifier):
    "zip code files if they are not already a zip file"
    # parse XML file
    xml_asset = parse.article_xml_asset(asset_file_name_map)
    xml_asset_path = xml_asset[1]
    root = parse.parse_article_xml(xml_asset_path)

    file_transformations = code_file_transformations(
        root, asset_file_name_map, output_dir, identifier
    )
    code_file_zip(file_transformations, output_dir, identifier)

    # create a new asset map
    new_asset_file_name_map = transform_asset_file_name_map(
        asset_file_name_map, file_transformations
    )

    xml_rewrite_file_tags(xml_asset_path, file_transformations, identifier)
    return new_asset_file_name_map
示例#5
0
def glencoe_xml(xml_file_path, video_data, pretty=True, indent=""):
    "generate XML to be submitted to Glencoe"
    # build an Article object from the XML
    article, error_count = article_from_xml(xml_file_path)
    # collect journal data from the XML elementtree
    root = parse.parse_article_xml(xml_file_path)
    journal_ids = parse.xml_journal_id_values(root)
    filtered_journal_ids = {
        key: value
        for key, value in journal_ids.items() if key in JOURNAL_ID_TYPES
    }
    journal_title = parse.xml_journal_title(root)
    publisher_name = parse.xml_publisher_name(root)
    journal_data = {
        "journal_ids": filtered_journal_ids,
        "journal_title": journal_title,
        "publisher_name": publisher_name,
    }
    return generate_xml(article, journal_data, video_data, pretty, indent)
示例#6
0
def xml_rewrite_file_tags(xml_asset_path, file_transformations, identifier):
    root = parse.parse_article_xml(xml_asset_path)
    # rewrite the XML tags
    LOGGER.info("%s rewriting xml tags", identifier)
    root = transform_xml_file_tags(root, file_transformations)
    write_xml_file(root, xml_asset_path, identifier)
示例#7
0
 def test_parse_article_xml_failure(self):
     xml_file_path = os.path.join(self.temp_dir, "test.xml")
     with open(xml_file_path, "w") as open_file:
         open_file.write("malformed xml")
     with self.assertRaises(ElementTree.ParseError):
         parse.parse_article_xml(xml_file_path)
示例#8
0
 def test_parse_article_xml(self):
     xml_file_path = os.path.join(self.temp_dir, "test.xml")
     with open(xml_file_path, "w") as open_file:
         open_file.write("<article/>")
     root = parse.parse_article_xml(xml_file_path)
     self.assertIsNotNone(root)