def test_tostring_result_depends_on_the_param_remove_blank_text_of_load_xml( self): xml_input = ( "<root><source><italic>texto 1</italic> <italic>texto 2</italic>" "</source></root>") xml_with_blank_text_as_false, errors = xml_utils.load_xml( xml_input, remove_blank_text=False) result_with_blank_text_as_false = xml_utils.tostring( xml_with_blank_text_as_false) xml_with_blank_text_as_true, errors = xml_utils.load_xml( xml_input, remove_blank_text=True) result_with_blank_text_as_true = xml_utils.tostring( xml_with_blank_text_as_true) self.assertNotEqual(result_with_blank_text_as_false, result_with_blank_text_as_true) self.assertEqual( result_with_blank_text_as_false, "<root>" "<source><italic>texto 1</italic> <italic>" "texto 2</italic></source>" "</root>") self.assertEqual( result_with_blank_text_as_true, "<root>" "<source><italic>texto 1</italic><italic>" "texto 2</italic></source>" "</root>")
def test_load_xml_from_not_found_file(self): xml, errors = xml_utils.load_xml("notfoundfile.xml") self.assertIsNone(xml) self.assertEqual( ("Loading XML from 'notfoundfile.xml': " "Error reading file 'notfoundfile.xml': " "failed to load external entity \"notfoundfile.xml\""), errors)
def test_load_xml_from_not_a_file_and_not_xml(self): xml, errors = xml_utils.load_xml("notfile_notxml") self.assertIsNone(xml) self.assertEqual( ("Loading XML from 'notfile_notxml': " "Invalid value: it must be an XML content or XML file path"), errors)
def test_load_xml_successfully_from_file(self): with open("file.xml", "w") as fp: fp.write("<root/>") xml, e = xml_utils.load_xml("file.xml") self.assertIsNone(e) self.assertIsNotNone(xml) os.unlink("file.xml")
def load_articles(filenames): files = {} for name, f in filenames.items(): xmltree, errors = xml_utils.load_xml(f) if xmltree is not None: files[name] = xml_utils.tostring(xmltree.getroot()) else: print(' ERROR 1: {} - {}'.format(name, errors)) return files
def test_load_xml_with_remove_blank_text_as_false_keep_blanks(self): xml_input = ( "<root><source><italic>texto 1</italic> <italic>texto 2</italic>" "</source></root>") xml, errors = xml_utils.load_xml( "<root><source><italic>texto 1</italic> <italic>texto 2</italic>" "</source></root>", remove_blank_text=False) result = xml_utils.tostring(xml) self.assertEqual(xml_input, result)
def test_load_xml_with_remove_blank_text_as_true_remove_blanks(self): xml_input = ( "<root><source><italic>texto 1</italic> <italic>texto 2</italic>" "</source></root>") expected = ( "<root><source><italic>texto 1</italic><italic>texto 2</italic>" "</source></root>") xml, errors = xml_utils.load_xml(xml_input, remove_blank_text=True) result = xml_utils.tostring(xml) self.assertEqual(expected, result)
def load_xml(self): content = fs_utils.read_file(self.file_path) content = xml_utils.insert_break_lines(content) self.tree, self.loading_error = xml_utils.load_xml(content) if self.loading_error: content = xml_utils.numbered_lines(content) if content.startswith("1: <?xml"): content = content[content.find("?>") + 2:].strip() self.loading_error = (self.file_path + "\n\n" + self.loading_error + "\n\n" + content) fs_utils.write_file(self.file_path, content)
def validate_pubmed_xml(self): r = False err_filepath = self.pubmed_filename + '.err' if os.path.isfile(err_filepath): os.unlink(err_filepath) xml, error = xml_utils.load_xml(self.pubmed_filename, validate=True) if error: with open(err_filepath, "w") as fp: fp.write(error) print('Validation error: ' + err_filepath) else: print('Validates fine') return r
def format_text_as_xml(text): xml, e = xml_utils.load_xml(text) if xml is not None: prefix = '<root' for n_id, n_link in xml_utils.namespaces.items(): prefix += ' xmlns:' + n_id + '=' + '"' + n_link + '"' prefix += '>' pretty = xml_utils.pretty_print(prefix + text + '</root>') if pretty is not None: if '<root' in pretty: pretty = pretty[pretty.find('<root'):] pretty = pretty[pretty.find('>') + 1:].replace('</root>', '') text = pretty return text
def _sgmxml2xml(self): """ convert o arquivo sgmlxml para xml """ logger.info("Convert sgml to xml") xml_obj, xml_error = xml_utils.load_xml( self.FILES.src_pkgfiles.filename) if xml_error: return sps_version = xml_obj.find(".").get("sps") if sps_version is None: sps_version = xml_versions.get_latest_sps_version()[4:] xml_obj.find(".").set("sps", sps_version) xsl_filepath = xml_versions.xsl_getter(sps_version) result = xml_utils.transform(xml_obj, xsl_filepath) content = xml_utils.insert_namespaces_in_root("article", str(result)) fs_utils.write_file(self.FILES.src_pkgfiles.filename, content)
def __init__(self, path, output_path, xml_names, sgmxml_name=None, optimised=False): self.package_folder = workarea.MultiDocsPackageFolder(path) self.wk = workarea.MultiDocsPackageOuputs(output_path) self.xml_names = xml_names self.optimised = optimised self._articles = {} if xml_names: for name, item in self.files.items(): if item.basename not in xml_names: continue xml, xml_error = xml_utils.load_xml(item.filename) self._articles[name] = article.Article(xml, name) self.wk.get_doc_outputs(name, sgmxml_name) self.issue_data = PackageIssueData() self.issue_data.setup(self._articles) if len(xml_names) < len(self.package_folder.pkgfiles_items): print("SPPackage have {} documents. " "{} was filtered to be processed.".format( len(self.package_folder.pkgfiles_items), len(xml_names)))
def test_load_xml_loads_xml_but_ignore_incomplete_entities(self): xml, errors = xml_utils.load_xml("<root><a>[</a></root>") self.assertEqual( errors, "Loading XML from 'str': CharRef: invalid decimal value, " "line 1, column 14 (<string>, line 1)")
def test_load_return_errors_because_of_tag_which_does_not_close(self): xml, errors = xml_utils.load_xml("<root>") self.assertEqual( errors, "Loading XML from 'str': EndTag: '</' not found, " "line 1, column 7 (<string>, line 1)")
def test_load_xml_return_errors_because_of_incomplete_tag(self): xml, errors = xml_utils.load_xml("<root") self.assertEqual( errors, "Loading XML from 'str': Couldn't find end of Start Tag root " "line 1, line 1, column 6 (<string>, line 1)")
def test_load_xml_successfully_from_str(self): xml, e = xml_utils.load_xml("<root/>") self.assertIsNone(e) self.assertIsNotNone(xml)