示例#1
0
    def test_tostring_result_depends_on_the_param_remove_blank_text_of_load_xml(
            self):
        xml_input = (
            "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
            "</source></root>")
        xml_with_blank_text_as_false, errors = xml_utils.load_xml(
            xml_input, remove_blank_text=False)
        result_with_blank_text_as_false = xml_utils.tostring(
            xml_with_blank_text_as_false)

        xml_with_blank_text_as_true, errors = xml_utils.load_xml(
            xml_input, remove_blank_text=True)
        result_with_blank_text_as_true = xml_utils.tostring(
            xml_with_blank_text_as_true)

        self.assertNotEqual(result_with_blank_text_as_false,
                            result_with_blank_text_as_true)

        self.assertEqual(
            result_with_blank_text_as_false, "<root>"
            "<source><italic>texto 1</italic> <italic>"
            "texto 2</italic></source>"
            "</root>")
        self.assertEqual(
            result_with_blank_text_as_true, "<root>"
            "<source><italic>texto 1</italic><italic>"
            "texto 2</italic></source>"
            "</root>")
示例#2
0
 def test_load_xml_from_not_found_file(self):
     xml, errors = xml_utils.load_xml("notfoundfile.xml")
     self.assertIsNone(xml)
     self.assertEqual(
         ("Loading XML from 'notfoundfile.xml': "
          "Error reading file 'notfoundfile.xml': "
          "failed to load external entity \"notfoundfile.xml\""), errors)
示例#3
0
 def test_load_xml_from_not_a_file_and_not_xml(self):
     xml, errors = xml_utils.load_xml("notfile_notxml")
     self.assertIsNone(xml)
     self.assertEqual(
         ("Loading XML from 'notfile_notxml': "
          "Invalid value: it must be an XML content or XML file path"),
         errors)
示例#4
0
 def test_load_xml_successfully_from_file(self):
     with open("file.xml", "w") as fp:
         fp.write("<root/>")
     xml, e = xml_utils.load_xml("file.xml")
     self.assertIsNone(e)
     self.assertIsNotNone(xml)
     os.unlink("file.xml")
示例#5
0
def load_articles(filenames):
    files = {}
    for name, f in filenames.items():
        xmltree, errors = xml_utils.load_xml(f)
        if xmltree is not None:
            files[name] = xml_utils.tostring(xmltree.getroot())
        else:
            print(' ERROR 1: {} - {}'.format(name, errors))
    return files
示例#6
0
 def test_load_xml_with_remove_blank_text_as_false_keep_blanks(self):
     xml_input = (
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>")
     xml, errors = xml_utils.load_xml(
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>",
         remove_blank_text=False)
     result = xml_utils.tostring(xml)
     self.assertEqual(xml_input, result)
示例#7
0
 def test_load_xml_with_remove_blank_text_as_true_remove_blanks(self):
     xml_input = (
         "<root><source><italic>texto 1</italic> <italic>texto 2</italic>"
         "</source></root>")
     expected = (
         "<root><source><italic>texto 1</italic><italic>texto 2</italic>"
         "</source></root>")
     xml, errors = xml_utils.load_xml(xml_input, remove_blank_text=True)
     result = xml_utils.tostring(xml)
     self.assertEqual(expected, result)
示例#8
0
 def load_xml(self):
     content = fs_utils.read_file(self.file_path)
     content = xml_utils.insert_break_lines(content)
     self.tree, self.loading_error = xml_utils.load_xml(content)
     if self.loading_error:
         content = xml_utils.numbered_lines(content)
         if content.startswith("1: <?xml"):
             content = content[content.find("?>") + 2:].strip()
         self.loading_error = (self.file_path + "\n\n" +
                               self.loading_error + "\n\n" + content)
         fs_utils.write_file(self.file_path, content)
示例#9
0
 def validate_pubmed_xml(self):
     r = False
     err_filepath = self.pubmed_filename + '.err'
     if os.path.isfile(err_filepath):
         os.unlink(err_filepath)
     xml, error = xml_utils.load_xml(self.pubmed_filename, validate=True)
     if error:
         with open(err_filepath, "w") as fp:
             fp.write(error)
         print('Validation error: ' + err_filepath)
     else:
         print('Validates fine')
     return r
示例#10
0
def format_text_as_xml(text):
    xml, e = xml_utils.load_xml(text)
    if xml is not None:
        prefix = '<root'
        for n_id, n_link in xml_utils.namespaces.items():
            prefix += ' xmlns:' + n_id + '=' + '"' + n_link + '"'
        prefix += '>'

        pretty = xml_utils.pretty_print(prefix + text + '</root>')
        if pretty is not None:
            if '<root' in pretty:
                pretty = pretty[pretty.find('<root'):]
                pretty = pretty[pretty.find('>') + 1:].replace('</root>', '')
                text = pretty
    return text
示例#11
0
 def _sgmxml2xml(self):
     """
     convert o arquivo sgmlxml para xml
     """
     logger.info("Convert sgml to xml")
     xml_obj, xml_error = xml_utils.load_xml(
         self.FILES.src_pkgfiles.filename)
     if xml_error:
         return
     sps_version = xml_obj.find(".").get("sps")
     if sps_version is None:
         sps_version = xml_versions.get_latest_sps_version()[4:]
         xml_obj.find(".").set("sps", sps_version)
     xsl_filepath = xml_versions.xsl_getter(sps_version)
     result = xml_utils.transform(xml_obj, xsl_filepath)
     content = xml_utils.insert_namespaces_in_root("article", str(result))
     fs_utils.write_file(self.FILES.src_pkgfiles.filename, content)
示例#12
0
 def __init__(self,
              path,
              output_path,
              xml_names,
              sgmxml_name=None,
              optimised=False):
     self.package_folder = workarea.MultiDocsPackageFolder(path)
     self.wk = workarea.MultiDocsPackageOuputs(output_path)
     self.xml_names = xml_names
     self.optimised = optimised
     self._articles = {}
     if xml_names:
         for name, item in self.files.items():
             if item.basename not in xml_names:
                 continue
             xml, xml_error = xml_utils.load_xml(item.filename)
             self._articles[name] = article.Article(xml, name)
             self.wk.get_doc_outputs(name, sgmxml_name)
     self.issue_data = PackageIssueData()
     self.issue_data.setup(self._articles)
     if len(xml_names) < len(self.package_folder.pkgfiles_items):
         print("SPPackage have {} documents. "
               "{} was filtered to be processed.".format(
                   len(self.package_folder.pkgfiles_items), len(xml_names)))
示例#13
0
 def test_load_xml_loads_xml_but_ignore_incomplete_entities(self):
     xml, errors = xml_utils.load_xml("<root><a>&#91</a></root>")
     self.assertEqual(
         errors, "Loading XML from 'str': CharRef: invalid decimal value, "
         "line 1, column 14 (<string>, line 1)")
示例#14
0
 def test_load_return_errors_because_of_tag_which_does_not_close(self):
     xml, errors = xml_utils.load_xml("<root>")
     self.assertEqual(
         errors, "Loading XML from 'str': EndTag: '</' not found, "
         "line 1, column 7 (<string>, line 1)")
示例#15
0
 def test_load_xml_return_errors_because_of_incomplete_tag(self):
     xml, errors = xml_utils.load_xml("<root")
     self.assertEqual(
         errors,
         "Loading XML from 'str': Couldn't find end of Start Tag root "
         "line 1, line 1, column 6 (<string>, line 1)")
示例#16
0
 def test_load_xml_successfully_from_str(self):
     xml, e = xml_utils.load_xml("<root/>")
     self.assertIsNone(e)
     self.assertIsNotNone(xml)