def test_should_remove_doi_prefix_from_doi(self):
     original_ref = get_jats_mixed_ref('some text',
                                       get_jats_doi_element('doi:' + DOI_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_remove_duplicate_doi_with_tail(self):
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(DOI_1 + '; ' + DOI_1), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_remove_doi_suffix_from_doi_without_tail(self):
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_remove_doi_prefix_after_preceeding_element_with_tail_text(
         self):
     original_ref = get_jats_mixed_ref(E.other('other text'), 'tail text',
                                       get_jats_doi_element('doi:' + DOI_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_convert_doi_with_outside_url_prefix_to_ext_link(self):
     original_ref = get_jats_mixed_ref('some text ' + HTTPS_DOI_URL_PREFIX,
                                       get_jats_doi_element(DOI_1),
                                       'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     ext_link_text = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.EXT_LINK)))
     assert ext_link_text == HTTPS_DOI_URL_PREFIX + DOI_1
 def test_should_remove_doi_pub_id_element_if_not_containing_valid_doi(
         self):
     original_ref = get_jats_mixed_ref('doi: ',
                                       get_jats_doi_element('not a doi'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == ''
 def test_should_remove_duplicate_doi_ignoring_punct_with_tail(self):
     doi_1_a = DOI_1 + '.ab-123'
     doi_1_b = DOI_1 + '.ab.123'
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(doi_1_a + '; ' + doi_1_b),
         'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == doi_1_a
 def test_should_not_include_doi_colon_in_pii(self):
     original_ref = get_jats_mixed_ref(
         'doi:',
         get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     fixed_pii = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PII)))
     assert fixed_doi == DOI_1
     assert fixed_pii == PII_1
 def test_should_separately_annotate_pii_with_preceding_element(self):
     original_ref = get_jats_mixed_ref(
         E.other('other text'), 'doi: ',
         get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     fixed_pii = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PII)))
     assert fixed_doi == DOI_1
     assert fixed_pii == PII_1
 def test_should_remove_doi_duplicate_pii_suffix_from_doi_with_tail(self):
     doi_fragment_duplicate = 'doi-duplicate'
     doi = DOI_1 + '.' + doi_fragment_duplicate
     original_ref = get_jats_mixed_ref(
         'doi: ',
         get_jats_doi_element(doi + '  ' + doi_fragment_duplicate +
                              ' [pii]'), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == doi
 def test_should_separately_annotate_invalid_pii_as_other_pub_id(self):
     original_ref = get_jats_mixed_ref(
         'doi: ',
         get_jats_doi_element(INVALID_PII_1 + ' [pii]; ' + DOI_1 +
                              ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     other_pub_id = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID)))
     assert fixed_doi == DOI_1
     assert other_pub_id == INVALID_PII_1
 def test_should_convert_doi_with_outside_spaced_url_prefix_to_ext_link(
         self):
     original_ref = get_jats_mixed_ref(
         'some text ' + HTTPS_SPACED_DOI_URL_PREFIX,
         get_jats_doi_element(DOI_1), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK)
     ext_link_text = '|'.join(get_text_content_list(ext_links))
     assert ext_link_text == HTTPS_SPACED_DOI_URL_PREFIX + DOI_1
     assert ext_links[0].attrib == {
         'ext-link-type': 'uri',
         XLINK_HREF: HTTPS_DOI_URL_PREFIX + DOI_1
     }
 def test_should_fix_jats_xml_using_source_path(self, input_dir: Path,
                                                output_dir: Path):
     original_ref = get_jats_mixed_ref('doi: ',
                                       get_jats_doi_element('doi:' + DOI_1))
     input_file = input_dir / 'file1.xml'
     input_file.parent.mkdir()
     input_file.write_bytes(
         etree.tostring(get_jats(references=[original_ref])))
     output_file = output_dir / 'file1.xml'
     main(
         ['--source-path=%s' % input_file,
          '--output-path=%s' % output_dir])
     assert output_file.exists()
     fixed_root = parse_xml(str(output_file))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_root.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_fix_jats_xml_using_source_file_list_in_sub_directory(
         self, input_dir: Path, output_dir: Path):
     original_ref = get_jats_mixed_ref('doi: ',
                                       get_jats_doi_element('doi:' + DOI_1))
     input_file = input_dir / 'sub' / 'file1.xml'
     input_file.parent.mkdir(parents=True)
     input_file.write_bytes(
         etree.tostring(get_jats(references=[original_ref])))
     output_file = output_dir / 'sub' / 'file1.xml'
     source_file_list_path = input_dir / 'file-list.tsv'
     source_file_list_path.write_text('\n'.join(
         ['xml_url', 'sub/file1.xml']))
     main([
         '--source-file-list=%s' % source_file_list_path,
         '--output-path=%s' % output_dir
     ])
     assert output_file.exists()
     fixed_root = parse_xml(str(output_file))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_root.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1
 def test_should_not_change_valid_doi(self):
     original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element(DOI_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1