Exemplo n.º 1
0
 def setUp(self):
     self.text = """<root>
         <body>
             <p>
                 <p>paragrafo 1</p>
                 <p>paragrafo 2</p>
             </p>
         </body>
         </root>"""
     self.xml = etree.fromstring(self.text)
     self.pipe = HTML2SPSPipeline().RemovePWhichIsParentOfPPipe()
    def transform_body(self):

        for index, body in enumerate(self.xmltree.xpath("//body"), start=1):
            logger.info("Processando body numero: %s" % index)

            txt_body = body.findtext("./p") or ""
            convert = HTML2SPSPipeline(pid=self.publisher_id, index_body=index)
            _, obj_html_body = convert.deploy(txt_body)

            # sobrecreve o html escapado anterior pelo novo xml tratado
            body.getparent().replace(body, obj_html_body.find("body"))

        return self.xmltree
Exemplo n.º 3
0
    def transform_body(self, spy=False):

        for index, body in enumerate(self.xmltree.xpath("//body"), start=1):
            logger.debug("Processando body numero: %s" % index)

            txt_body = body.findtext("./p") or ""
            convert = HTML2SPSPipeline(
                pid=self.scielo_pid_v2,
                ref_items=self._get_ref_items(body),
                body_index=index,
                spy=spy,
            )
            _, obj_html_body = convert.deploy(txt_body)

            # sobrecreve o html escapado anterior pelo novo xml tratado
            if obj_html_body.tag != "body":
                obj_html_body = obj_html_body.find("body")
            if obj_html_body is None:
                raise TypeError("XML: %s esta sem Body" % (self.scielo_pid_v2))

            body.getparent().replace(body, obj_html_body)

        return self.xmltree
Exemplo n.º 4
0
 def test_create_instance(self):
     expected_text = "<p>La nueva epoca de la revista<italic>Salud Publica de Mexico </italic></p>"
     pipeline = HTML2SPSPipeline()
     raw, xml = pipeline.SetupPipe().transform(expected_text)
     self.assertIn(expected_text, str(etree.tostring(xml)))
Exemplo n.º 5
0
 def setUp(self):
     filename = os.path.join(SAMPLES_PATH, "example_convert_html.xml")
     with open(filename, "r") as f:
         self.xml_txt = f.read()
     self.etreeXML = etree.fromstring(self.xml_txt)
     self.pipeline = HTML2SPSPipeline()