def test_should_replace_affiliation_with_author_if_single_tokens(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        author_text = 'Mary Maison 1, John Smith 1'
        affiliation_text = '1 University of Science, Smithonia'
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_header_tei_node([
                    E.note(author_text),
                    E.lb(),
                    E.note(affiliation_text),
                    E.lb()
                ])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(
                get_target_xml_node(author_nodes=[
                    E.contrib(
                        E.name(E.surname('Maison'), E('given-names', 'Mary'))),
                    E.contrib(
                        E.name(E.surname('Smith'), E('given-names', 'John'))),
                    E.aff(E.label('1'), E.institution('University of Science'),
                          E.country('Smithonia'))
                ])))
        main(dict_to_args({
            **test_helper.main_args_dict, 'fields':
            ','.join(['title', 'author', 'author_aff', 'abstract']),
            'matcher':
            'simple'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_xpath_text(tei_auto_root,
                              '//byline/docAuthor') == author_text
        assert get_xpath_text(tei_auto_root,
                              '//byline/affiliation') == affiliation_text
    def test_should_auto_annotate_title(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(get_header_tei_node([E.note(TEXT_1)])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(get_target_xml_node(title=TEXT_1)))
        main([*test_helper.main_args], save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TEXT_1
    def test_should_auto_annotate_affiliation_preceding_number_using_simple_matcher(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        author_text = 'Mary Maison 1, John Smith 1'
        affiliation_text_1 = '1'
        affiliation_text_2 = 'University of Science, Smithonia'
        affiliation_text = ' '.join([affiliation_text_1, affiliation_text_2])
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_header_tei_node([
                    E.note(TITLE_1),
                    E.lb(),
                    E.note(author_text),
                    E.lb(),
                    E.note(affiliation_text_1),
                    E.lb(),
                    E.note(affiliation_text_2),
                    E.lb(),
                    E.note(ABSTRACT_PREFIX_1, E.lb(), ABSTRACT_1)
                ])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(
                get_target_xml_node(
                    title=TITLE_1,
                    author_nodes=[
                        E.contrib(
                            E.name(E.surname('Maison'),
                                   E('given-names', 'Mary'))),
                        E.contrib(
                            E.name(E.surname('Smith'),
                                   E('given-names', 'John'))),
                        E.aff(E.institution('University of Science'),
                              E.country('Smithonia'))
                    ],
                    abstract_node=E.abstract(E.p(ABSTRACT_1)))))
        main(dict_to_args({
            **test_helper.main_args_dict, 'fields':
            ','.join(['title', 'author', 'author_aff', 'abstract']),
            'matcher':
            'simple'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TITLE_1
        assert get_xpath_text(tei_auto_root,
                              '//byline/docAuthor') == author_text
        assert get_xpath_text(tei_auto_root,
                              '//byline/affiliation') == affiliation_text
        assert get_xpath_text(
            tei_auto_root,
            '//div[@type="abstract"]') == (ABSTRACT_PREFIX_1 + ABSTRACT_1)
    def test_should_extend_title_annotation_to_whole_line(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        title_text = 'Chocolate bars for mice'
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_header_tei_node([E.note('Title: ' + title_text)])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(get_target_xml_node(title=title_text)))
        main([*test_helper.main_args, '--matcher=simple'],
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_xpath_text(tei_auto_root,
                              '//docTitle/titlePart') == title_text
    def test_should_skip_errors(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        tei_raw_other_file_path = test_helper.tei_raw_path.joinpath(
            'document0.header.tei.xml')
        tei_raw_other_file_path.write_bytes(
            etree.tostring(get_header_tei_node([E.note(TEXT_1)])))
        xml_other_file_path = test_helper.xml_path.joinpath('document0.xml')
        xml_other_file_path.write_bytes(
            etree.tostring(get_target_xml_node(title=TEXT_1)) + b'error')
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(get_header_tei_node([E.note(TEXT_1)])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(get_target_xml_node(title=TEXT_1)))
        main([*test_helper.main_args, '--matcher=simple', '--skip-errors'],
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TEXT_1
    def test_should_filter_out_xml_if_selected_fields_are_not_matching(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper,
            actual_abstract: str, expected_abstract: str, expected_match: bool,
            required_fields: str, relative_failed_output_path: str,
            temp_dir: Path):
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_header_tei_node([
                    E.note(TITLE_1),
                    E.lb(),
                    E.note(ABSTRACT_PREFIX_1, E.lb(), actual_abstract)
                ])))
        test_helper.xml_file_path.write_bytes(
            etree.tostring(
                get_target_xml_node(
                    title=TITLE_1,
                    abstract_node=(E.abstract(E.p(expected_abstract))
                                   if expected_abstract else None))))
        failed_output_path: str = (str(temp_dir / relative_failed_output_path)
                                   if relative_failed_output_path else '')
        main(dict_to_args({
            **test_helper.main_args_dict, 'fields':
            ','.join(['title', 'author', 'author_aff', 'abstract']),
            'require-matching-fields':
            ','.join(['abstract']),
            'required-fields':
            required_fields,
            'failed-output-path':
            failed_output_path,
            'matcher':
            'simple'
        }),
             save_main_session=False)

        if not expected_match:
            assert not test_helper.tei_auto_file_path.exists()
            if failed_output_path:
                assert (Path(failed_output_path) /
                        test_helper.tei_auto_file_path.name).exists()
        else:
            assert test_helper.tei_auto_file_path.exists()