Exemplo n.º 1
0
    def test_tostring_result_depends_on_the_param_remove_blank_text_of_load_xml(
            self):
        xml_input = (
            "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
            "</source></root>")
        xml_with_blank_text_as_false, errors = xml_utils.load_xml(
            xml_input, remove_blank_text=False)
        result_with_blank_text_as_false = xml_utils.tostring(
            xml_with_blank_text_as_false)

        xml_with_blank_text_as_true, errors = xml_utils.load_xml(
            xml_input, remove_blank_text=True)
        result_with_blank_text_as_true = xml_utils.tostring(
            xml_with_blank_text_as_true)

        self.assertNotEqual(result_with_blank_text_as_false,
                            result_with_blank_text_as_true)

        self.assertEqual(
            result_with_blank_text_as_false, "<root>"
            "<source><italic>texto 1</italic> <italic>"
            "texto 2</italic></source>"
            "</root>")
        self.assertEqual(
            result_with_blank_text_as_true, "<root>"
            "<source><italic>texto 1</italic><italic>"
            "texto 2</italic></source>"
            "</root>")
Exemplo n.º 2
0
 def test_merge_siblings_style_tags_content_does_not_merge_sup(self):
     text = "<root><source><sup>texto 1</sup> <sup>texto 2</sup> </source></root>"
     expected = "<source><sup>texto 1</sup> <sup>texto 2</sup> </source>"
     obj = xml_utils.etree.fromstring(text)
     node = obj.find(".//source")
     xml_utils.merge_siblings_style_tags_content(node, ('bold', 'italic'))
     result = xml_utils.tostring(node)
     self.assertEqual(result, expected)
Exemplo n.º 3
0
 def test_remove_styles_off_tagged_content_does_not_remove_italic(self):
     text = "<root><source>texto 1 <italic>texto italic</italic> texto 2</source></root>"
     expected = "<source>texto 1 <italic>texto italic</italic> texto 2</source>"
     obj = xml_utils.etree.fromstring(text)
     node = obj.find(".//source")
     xml_utils.remove_styles_off_tagged_content(node, ('bold', 'italic'))
     result = xml_utils.tostring(node)
     self.assertEqual(result, expected)
Exemplo n.º 4
0
 def test_merge_siblings_style_tags_content_does_not_merge_italic_if_there_are_elements_in_the_middle(
         self):
     text = "<root><source><italic>texto 1</italic> <bold>texto</bold> <italic>texto 2</italic></source></root>"
     expected = "<source><italic>texto 1</italic> <bold>texto</bold> <italic>texto 2</italic></source>"
     obj = xml_utils.etree.fromstring(text)
     node = obj.find(".//source")
     xml_utils.merge_siblings_style_tags_content(node, ('bold', 'italic'))
     result = xml_utils.tostring(node)
     self.assertEqual(result, expected)
Exemplo n.º 5
0
 def test_remove_styles_off_tagged_content_removes_external_and_keeps_inner(
         self):
     text = "<root><source><bold>texto 1 <bold>texto bold</bold> texto 2</bold></source></root>"
     expected = "<source>texto 1 <bold>texto bold</bold> texto 2</source>"
     obj = xml_utils.etree.fromstring(text)
     node = obj.find(".//source")
     xml_utils.remove_styles_off_tagged_content(node, ('bold', 'italic'))
     result = xml_utils.tostring(node)
     self.assertEqual(result, expected)
Exemplo n.º 6
0
def load_articles(filenames):
    files = {}
    for name, f in filenames.items():
        xmltree, errors = xml_utils.load_xml(f)
        if xmltree is not None:
            files[name] = xml_utils.tostring(xmltree.getroot())
        else:
            print(' ERROR 1: {} - {}'.format(name, errors))
    return files
Exemplo n.º 7
0
def write_etree_to_file(tree: etree.ElementTree, path: str) -> None:
    """Escreve uma árvore lxml em um arquivo de destino. Também
    garante que as entidades não serão modificadas por meio da função
    xml_utils.tostring(etree)."""

    if tree is None or path is None:
        return None

    fs_utils.write_file(path, xml_utils.tostring(tree))
Exemplo n.º 8
0
 def test_load_xml_with_remove_blank_text_as_false_keep_blanks(self):
     xml_input = (
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>")
     xml, errors = xml_utils.load_xml(
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>",
         remove_blank_text=False)
     result = xml_utils.tostring(xml)
     self.assertEqual(xml_input, result)
Exemplo n.º 9
0
 def test_load_xml_with_remove_blank_text_as_true_remove_blanks(self):
     xml_input = (
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>")
     expected = (
         "<root><source><italic>texto 1</italic><italic>texto 2</italic>"
         "</source></root>")
     xml, errors = xml_utils.load_xml(xml_input, remove_blank_text=True)
     result = xml_utils.tostring(xml)
     self.assertEqual(expected, result)
Exemplo n.º 10
0
 def insert_ext_link_elements_in_mixed_citation(self):
     """
     Se no texto de mixed-citation há links não identificados como ext-link,
     inserir ext-link baseados nos ext-links existentes em element-citation
     """
     links = self.tree.findall(".//mixed-citation//ext-link")
     if links:
         return
     mixed_citation = self.tree.find(".//mixed-citation")
     if mixed_citation is None:
         return
     links = self.tree.findall(".//element-citation//ext-link")
     if not links:
         return
     mixed_citation_text = xml_utils.tostring(mixed_citation)
     for link in links:
         mixed_citation_text = mixed_citation_text.replace(
             link.text, xml_utils.tostring(link))
     new_mixed_citation = xml_utils.etree.fromstring(mixed_citation_text)
     parent = mixed_citation.getparent()
     parent.replace(mixed_citation, new_mixed_citation)
Exemplo n.º 11
0
 def test_strip_all_tags_except_removes_all_a_except_a_with_href(self):
     text = """<root><p>
         <a>Texto 1</a>
         <a href="x">Ciência</a>
         <a href="y">Arte</a>
         <a>Texto 2</a>
         </p></root>"""
     expected = """<p>
         Texto 1
         <a href="x">Ciência</a>
         <a href="y">Arte</a>
         Texto 2
         </p>"""
     xml = xml_utils.etree.fromstring(text)
     node = xml.find(".//p")
     xml_utils.strip_all_tags_except(node, [".//a[@href]"])
     result = xml_utils.tostring(node)
     self.assertEqual(expected, result)
Exemplo n.º 12
0
    def _get_filenames(self, tree):
        files = []
        delete = False
        rename = False
        tiff_items = self.scielo_pkg_files.tiff_name_and_basename_items
        for node in article.nodes_which_have_xlink_href(tree):
            if node.get("specific-use") == "scielo-web":
                node.tag = "REMOVE"
                delete = True
                continue

            href = node.attrib['{http://www.w3.org/1999/xlink}href']
            name, ext = os.path.splitext(href)

            # substitui o valor de href por ativo digital em tiffs
            tiff = tiff_items.get(name)
            if tiff and href != tiff:
                rename = True
                node.set("{http://www.w3.org/1999/xlink}href", tiff)
                href = tiff

            # remove o sufixo -en dos ativos digitais da versao ingles
            name, ext = os.path.splitext(href)
            if name.endswith("-en"):
                new = name[:-3] + ext
                files.append((href, new))
                rename = True
                node.set("{http://www.w3.org/1999/xlink}href", new)
            else:
                files.append((href, href))

        if delete:
            xml_utils.etree.strip_tags(tree, "REMOVE")
            for node in tree.findall(".//alternatives"):
                if len(node.getchildren()) == 1:
                    logger.info("Remove alternatives: {}".format(
                        xml_utils.tostring(node)))
                    node.tag = "REMOVE"
            xml_utils.etree.strip_tags(tree, "REMOVE")
        return files, delete or rename
Exemplo n.º 13
0
 def __init__(self, tree):
     self.tree = tree
     self.content = xml_utils.tostring(self.tree)