示例#1
0
def test_extent_before_translation() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), "en", Extent(LineNumber(1))),
            ]
        )
def test_update_lemmatization() -> None:
    line = TextLine.of_iterable(LINE_NUMBER,
                                [Word.of([Reading.of_name("bu")])])
    lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), )
    expected = TextLine.of_iterable(
        LINE_NUMBER,
        [Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), ))])

    assert line.update_lemmatization(lemmatization) == expected
示例#3
0
def test_extent_overlapping_languages() -> None:
    Text.of_iterable(
        [
            TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "en", Extent(LineNumber(2))),
            TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
            TranslationLine(tuple(), "de"),
        ]
    )
def text_with_labels():
    return Text.of_iterable([
        TextLine.of_iterable(LineNumber(1),
                             [Word.of([Reading.of_name("bu")])]),
        ColumnAtLine(ColumnLabel.from_int(1)),
        SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")),
        ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")),
        TextLine.of_iterable(LineNumber(2),
                             [Word.of([Reading.of_name("bu")])]),
    ])
示例#5
0
def test_exent_overlapping() -> None:
    with pytest.raises(ValueError):
        Text.of_iterable(
            [
                TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple(), extent=Extent(LineNumber(2))),
                TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]),
                TranslationLine(tuple()),
            ]
        )
def test_text_line_atf_erasure(erasure, expected: str) -> None:
    word = Word.of(
        [Reading.of_name("mu"),
         Joiner.hyphen(),
         Reading.of_name("mu")])
    line = TextLine.of_iterable(LINE_NUMBER, [word, *erasure, word])
    assert line.atf == f"{line.line_number.atf} {word.value} {expected} {word.value}"
示例#7
0
def test_dump_line():
    text = Text(
        (
            TextLine.of_iterable(
                LineNumber(1),
                [
                    Word.of(
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ]
                    )
                ],
            ),
            EmptyLine(),
            ControlLine("#", " comment"),
        ),
        "1.0.0",
    )

    assert TextSchema().dump(text) == {
        "lines": OneOfLineSchema().dump(text.lines, many=True),
        "parser_version": text.parser_version,
        "numberOfLines": 1,
    }
class ManuscriptLineFactory(factory.Factory):
    class Meta:
        model = ManuscriptLine

    manuscript_id = factory.Sequence(lambda n: n)
    labels = (
        SurfaceLabel.from_label(Surface.OBVERSE),
        ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]),
    )
    line = factory.Sequence(
        lambda n: TextLine.of_iterable(
            LineNumber(n),
            (
                Word.of(
                    [
                        Reading.of_name("ku"),
                        Joiner.hyphen(),
                        BrokenAway.open(),
                        Reading.of_name("nu"),
                        Joiner.hyphen(),
                        Reading.of_name("ši"),
                        BrokenAway.close(),
                    ]
                ),
            ),
        )
    )
    paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE))
    omitted_words = (1,)
示例#9
0
def test_update_lemmatization() -> None:
    tokens = [list(line) for line in TEXT.lemmatization.tokens]
    tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),))
    lemmatization = Lemmatization(tokens)

    expected = Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        unique_lemma=(WordId("nu I"),),
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ],
                    ),
                ),
            ),
            RulingDollarLine(atf.Ruling.SINGLE),
        ),
        TEXT.parser_version,
    )

    assert TEXT.update_lemmatization(lemmatization) == expected
def test_update_lemmatization_wrong_lenght() -> None:
    line = TextLine.of_iterable(
        LINE_NUMBER,
        [Word.of([Reading.of_name("bu")]),
         Word.of([Reading.of_name("bu")])],
    )
    lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), )
    with pytest.raises(LemmatizationError):
        line.update_lemmatization(lemmatization)
def test_statistics(database, fragment_repository):
    database[COLLECTION].insert_many(
        [
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            TextLine(
                                LineNumber(1),
                                (
                                    Word.of([Reading.of_name("first")]),
                                    Word.of([Reading.of_name("line")]),
                                ),
                            ),
                            ControlLine("#", "ignore"),
                            EmptyLine(),
                        )
                    )
                )
            ),
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(1), (Word.of([Reading.of_name("second")]),)
                            ),
                            TextLine(
                                LineNumber(2), (Word.of([Reading.of_name("third")]),)
                            ),
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(3), (Word.of([Reading.of_name("fourth")]),)
                            ),
                        )
                    )
                )
            ),
            SCHEMA.dump(FragmentFactory.build(text=Text())),
        ]
    )
    assert fragment_repository.count_transliterated_fragments() == 2
    assert fragment_repository.count_lines() == 4
示例#12
0
def test_updating_alignment(client, bibliography, sign_repository, signs,
                            text_repository):
    allow_signs(signs, sign_repository)
    chapter = ChapterFactory.build()
    allow_references(chapter, bibliography)
    text_repository.create_chapter(chapter)
    alignment = 0
    omitted_words = (1, )
    updated_chapter = attr.evolve(
        chapter,
        lines=(attr.evolve(
            chapter.lines[0],
            variants=(attr.evolve(
                chapter.lines[0].variants[0],
                manuscripts=(attr.evolve(
                    chapter.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        chapter.lines[0].variants[0].manuscripts[0].line.
                        line_number,
                        (Word.of(
                            [
                                Reading.of_name("ku"),
                                Joiner.hyphen(),
                                BrokenAway.open(),
                                Reading.of_name("nu"),
                                Joiner.hyphen(),
                                Reading.of_name("ši"),
                                BrokenAway.close(),
                            ],
                            alignment=alignment,
                            variant=Word.of(
                                [Logogram.of_name("KU")],
                                language=Language.SUMERIAN,
                            ),
                        ), ),
                    ),
                    omitted_words=omitted_words,
                ), ),
            ), ),
        ), ),
    )

    expected_chapter = ApiChapterSchema().dump(updated_chapter)

    post_result = client.simulate_post(create_chapter_url(
        chapter, "/alignment"),
                                       body=json.dumps(DTO))

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected_chapter

    get_result = client.simulate_get(create_chapter_url(chapter))

    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == expected_chapter
def test_text_line_atf_gloss() -> None:
    line = TextLine.of_iterable(
        LINE_NUMBER,
        [
            DocumentOrientedGloss.open(),
            Word.of([Reading.of_name("mu")]),
            Word.of([Reading.of_name("bu")]),
            DocumentOrientedGloss.close(),
        ],
    )
    assert line.atf == f"{line.line_number.atf} {{(mu bu)}}"
示例#14
0
def test_query_lemmas_ignores_in_value(parts, expected, fragment_repository,
                                       lemma_repository):
    fragment = FragmentFactory.build(
        text=Text.of_iterable([
            TextLine.of_iterable(
                LineNumber(1),
                [Word.of(parts, unique_lemma=(WordId("ana I"), ))])
        ]),
        signs="DIŠ",
    )
    fragment_repository.create(fragment)

    assert lemma_repository.query_lemmas("ana", False) == expected
def test_updating_alignment(corpus, text_repository, bibliography, changelog,
                            signs, sign_repository, user, when) -> None:
    aligmnet = 0
    omitted_words = (1, )
    updated_chapter = attr.evolve(
        CHAPTER,
        lines=(attr.evolve(
            CHAPTER.lines[0],
            variants=(attr.evolve(
                CHAPTER.lines[0].variants[0],
                manuscripts=(attr.evolve(
                    CHAPTER.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        CHAPTER.lines[0].variants[0].manuscripts[0].line.
                        line_number,
                        (Word.of(
                            [
                                Reading.of_name("ku"),
                                Joiner.hyphen(),
                                BrokenAway.open(),
                                Reading.of_name("nu"),
                                Joiner.hyphen(),
                                Reading.of_name("ši"),
                                BrokenAway.close(),
                            ],
                            alignment=aligmnet,
                        ), ),
                    ),
                    omitted_words=omitted_words,
                ), ),
            ), ),
        ), ),
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    alignment = Alignment((((ManuscriptLineAlignment(
        (AlignmentToken("ku-[nu-ši]", aligmnet), ), omitted_words), ), ), ))
    assert corpus.update_alignment(CHAPTER.id_, alignment,
                                   user) == updated_chapter
def test_updating_manuscripts(corpus, text_repository, bibliography, changelog,
                              signs, sign_repository, user, when) -> None:
    uncertain_fragments = (MuseumNumber.of("K.1"), )
    updated_chapter = attr.evolve(
        CHAPTER,
        manuscripts=(attr.evolve(
            CHAPTER.manuscripts[0],
            colophon=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ba")]), ))
            ]),
            unplaced_lines=Transliteration.of_iterable([
                TextLine.of_iterable(LineNumber(1, True),
                                     (Word.of([Reading.of_name("ku")]), ))
            ]),
            notes="Updated manuscript.",
        ), ),
        uncertain_fragments=uncertain_fragments,
        signs=("KU ABZ075 ABZ207a\\u002F207b\\u0020X\nBA\nKU", ),
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    manuscripts = (updated_chapter.manuscripts[0], )
    assert (corpus.update_manuscripts(CHAPTER.id_, manuscripts,
                                      uncertain_fragments,
                                      user) == updated_chapter)
class ManuscriptFactory(factory.Factory):
    class Meta:
        model = Manuscript

    id = factory.Sequence(lambda n: n + 1)
    siglum_disambiguator = factory.Faker("word")
    museum_number = factory.Sequence(
        lambda n: MuseumNumber("M", str(n)) if pydash.is_odd(n) else None
    )
    accession = factory.Sequence(lambda n: f"A.{n}" if pydash.is_even(n) else "")
    period_modifier = factory.fuzzy.FuzzyChoice(PeriodModifier)
    period = factory.fuzzy.FuzzyChoice(set(Period) - {Period.NONE})
    provenance = factory.fuzzy.FuzzyChoice(set(Provenance) - {Provenance.STANDARD_TEXT})
    type = factory.fuzzy.FuzzyChoice(set(ManuscriptType) - {ManuscriptType.NONE})
    notes = factory.Faker("sentence")
    colophon = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ku")]),))]
    )
    unplaced_lines = Transliteration.of_iterable(
        [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("nu")]),))]
    )
    references = factory.List(
        [factory.SubFactory(ReferenceFactory, with_document=True)], TupleFactory
    )
def test_lemmatization() -> None:
    line = TextLine.of_iterable(
        LINE_NUMBER,
        [
            Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), )),
            UnknownNumberOfSigns.of(),
            Word.of([Reading.of_name("nu")]),
        ],
    )

    assert line.lemmatization == (
        LemmatizationToken("bu", (WordId("nu I"), )),
        LemmatizationToken("..."),
        LemmatizationToken("nu", tuple()),
    )
def test_parse_normalized_akkadain_shift() -> None:
    word = "ha"
    line = f"1. {word} %n {word} %sux {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE),
            LanguageShift.normalized_akkadian(),
            AkkadianWord.of((ValueToken.of(word), )),
            LanguageShift.of("%sux"),
            Word.of((Reading.of_name(word), ), Language.SUMERIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
def test_text_line_of_iterable(code: str, language: Language) -> None:
    tokens = [
        Word.of([Reading.of_name("first")]),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")]),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]),
        Word.of([BrokenAway.open(),
                 Reading.of_name("fourth")]),
        UnknownNumberOfSigns.of(),
        BrokenAway.close(),
    ]
    expected_tokens = (
        Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE),
        LanguageShift.of(code),
        Word.of([Reading.of_name("second")], language),
        LanguageShift.of("%sb"),
        LoneDeterminative.of([Determinative.of([Reading.of_name("third")])],
                             Language.AKKADIAN),
        Word.of(
            [
                BrokenAway.open(),
                Reading.of((ValueToken(
                    frozenset({EnclosureType.BROKEN_AWAY}),
                    ErasureState.NONE,
                    "fourth",
                ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY
                                                    })),
            ],
            DEFAULT_LANGUAGE,
        ),
        UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}),
                             ErasureState.NONE),
        BrokenAway.close().set_enclosure_type(
            frozenset({EnclosureType.BROKEN_AWAY})),
    )
    line = TextLine.of_iterable(LINE_NUMBER, tokens)

    assert line.line_number == LINE_NUMBER
    assert line.content == expected_tokens
    assert (
        line.key ==
        f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩"
    )
    assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
def test_text_line_of_iterable_normalized() -> None:
    tokens = [
        LanguageShift.normalized_akkadian(),
        AkkadianWord.of((ValueToken.of("kur"), )),
    ]
    expected_tokens = (
        LanguageShift.normalized_akkadian(),
        AkkadianWord.of((ValueToken.of("kur"), )),
    )
    line = TextLine.of_iterable(LINE_NUMBER, tokens)

    assert line.content == expected_tokens
    assert (
        line.key ==
        f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩"
    )

    assert line.atf == "1. %n kur"
def test_parse_atf_language_shifts(code: str,
                                   expected_language: Language) -> None:
    word = "ha-am"
    parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")]
    line = f"1. {word} {code} {word} %sb {word}"

    expected = Text((TextLine.of_iterable(
        LineNumber(1),
        (
            Word.of(parts, DEFAULT_LANGUAGE),
            LanguageShift.of(code),
            Word.of(parts, expected_language),
            LanguageShift.of("%sb"),
            Word.of(parts, Language.AKKADIAN),
        ),
    ), ))

    assert parse_atf_lark(line).lines == expected.lines
def test_updating_lines_edit(corpus, text_repository, bibliography, changelog,
                             signs, sign_repository, user, when) -> None:
    updated_chapter = attr.evolve(
        CHAPTER,
        lines=(attr.evolve(
            CHAPTER.lines[0],
            number=LineNumber(1, True),
            variants=(attr.evolve(
                CHAPTER.lines[0].variants[0],
                manuscripts=(attr.evolve(
                    CHAPTER.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        LineNumber(1, True),
                        (Word.of([
                            Reading.of_name("nu"),
                            Joiner.hyphen(),
                            BrokenAway.open(),
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("ši"),
                            BrokenAway.close(),
                        ]), ),
                    ),
                ), ),
            ), ),
        ), ),
        signs=("ABZ075 KU ABZ207a\\u002F207b\\u0020X\nKU\nABZ075", ),
        parser_version=ATF_PARSER_VERSION,
    )
    expect_find_and_update_chapter(
        bibliography,
        changelog,
        CHAPTER_WITHOUT_DOCUMENTS,
        updated_chapter,
        signs,
        sign_repository,
        text_repository,
        user,
        when,
    )

    assert (corpus.update_lines(
        CHAPTER.id_, LinesUpdate([], set(), {0: updated_chapter.lines[0]}),
        user) == updated_chapter)
def test_parse_dividers() -> None:
    line, expected_tokens = (
        r'1. :? :#! :# ::? :.@v /@19* :"@20@c ;@v@19!',
        [
            TextLine.of_iterable(
                LineNumber(1),
                (
                    Divider.of(":", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":", tuple(),
                               (atf.Flag.DAMAGE, atf.Flag.CORRECTION)),
                    Divider.of(":", tuple(), (atf.Flag.DAMAGE, )),
                    Divider.of("::", tuple(), (atf.Flag.UNCERTAIN, )),
                    Divider.of(":.", ("@v", ), tuple()),
                    Divider.of("/", ("@19", ), (atf.Flag.COLLATION, )),
                    Divider.of(':"', ("@20", "@c"), tuple()),
                    Divider.of(";", ("@v", "@19"), (atf.Flag.CORRECTION, )),
                ),
            )
        ],
    )
    assert parse_atf_lark(line).lines == Text.of_iterable(
        expected_tokens).lines
ANOTHER_LEMMATIZED_FRAGMENT = attr.evolve(
    TransliteratedFragmentFactory.build(),
    text=Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        [Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"),)
                    ),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana II"),)),
                    Word.of(
                        [
                            Reading.of_name("u", 4),
                            Joiner.hyphen(),
                            Reading.of_name("šu"),
                        ],
                        unique_lemma=(WordId("ūsu I"),),
                    ),
                    AkkadianWord.of(
                        [ValueToken.of("ana")], unique_lemma=(WordId("normalized I"),)
                    ),
                ),
            ),
        )
    ),
    signs="MI DIŠ DIŠ UD ŠU",
)
示例#26
0
def test_updating_lemmatization(client, bibliography, sign_repository, signs,
                                text_repository):
    allow_signs(signs, sign_repository)
    chapter: Chapter = ChapterFactory.build()
    allow_references(chapter, bibliography)
    text_repository.create_chapter(chapter)
    updated_chapter = attr.evolve(
        chapter,
        lines=(attr.evolve(
            chapter.lines[0],
            variants=(attr.evolve(
                chapter.lines[0].variants[0],
                reconstruction=(
                    chapter.lines[0].variants[0].reconstruction[0],
                    chapter.lines[0].variants[0].reconstruction[1].
                    set_unique_lemma(
                        LemmatizationToken(
                            chapter.lines[0].variants[0].reconstruction[1].
                            value,
                            (WordId("aklu I"), ),
                        )),
                    *chapter.lines[0].variants[0].reconstruction[2:6],
                    chapter.lines[0].variants[0].reconstruction[6].
                    set_unique_lemma(
                        LemmatizationToken(
                            chapter.lines[0].variants[0].reconstruction[6].
                            value,
                            tuple(),
                        )),
                ),
                manuscripts=(attr.evolve(
                    chapter.lines[0].variants[0].manuscripts[0],
                    line=TextLine.of_iterable(
                        chapter.lines[0].variants[0].manuscripts[0].line.
                        line_number,
                        (Word.of(
                            [
                                Reading.of_name("ku"),
                                Joiner.hyphen(),
                                BrokenAway.open(),
                                Reading.of_name("nu"),
                                Joiner.hyphen(),
                                Reading.of_name("ši"),
                                BrokenAway.close(),
                            ],
                            unique_lemma=[WordId("aklu I")],
                        ), ),
                    ),
                ), ),
            ), ),
        ), ),
    )

    expected = create_chapter_dto(updated_chapter)

    post_result = client.simulate_post(create_chapter_url(
        chapter, "/lemmatization"),
                                       body=json.dumps(DTO))

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected

    get_result = client.simulate_get(create_chapter_url(chapter))

    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == expected
示例#27
0
)
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.dollar_line import RulingDollarLine
from ebl.transliteration.domain.labels import ColumnLabel, ObjectLabel, SurfaceLabel
from ebl.transliteration.domain.line import Line
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.text import LineLabel, Text
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import Joiner
from ebl.transliteration.domain.translation_line import Extent, TranslationLine
from ebl.transliteration.domain.word_tokens import Word

LINES: Sequence[Line] = (
    TextLine(
        LineNumber(1),
        (Word.of([Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")]),),
    ),
    RulingDollarLine(atf.Ruling.SINGLE),
)
PARSER_VERSION = "1.0.0"
TEXT: Text = Text(LINES, PARSER_VERSION)


def test_of_iterable() -> None:
    assert Text.of_iterable(LINES) == Text(LINES, atf.ATF_PARSER_VERSION)


def test_lines() -> None:
    assert TEXT.lines == LINES

from ebl.transliteration.domain import atf
from ebl.transliteration.domain.enclosure_tokens import BrokenAway, PerhapsBrokenAway
from ebl.transliteration.domain.line import ControlLine, EmptyLine, Line
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import Joiner, LanguageShift
from ebl.transliteration.domain.word_tokens import Word


@pytest.mark.parametrize(  # pyre-ignore[56]
    "old,new,expected",
    [
        (
            EmptyLine(),
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
        ),
        (
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            ControlLine("#", " comment"),
            ControlLine("#", " comment"),
        ),
        (
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            TextLine.of_iterable(LineNumber(2),
                                 [Word.of([Reading.of_name("bu")])]),
            TextLine.of_iterable(LineNumber(2),
from ebl.tests.factories.corpus import ManuscriptLineFactory
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.dollar_line import StateDollarLine
from ebl.transliteration.domain.line import EmptyLine
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.note_line import NoteLine
from ebl.transliteration.domain.markup import StringPart


@pytest.mark.parametrize(
    "line,is_beginning",
    [
        (EmptyLine(), False),
        (TextLine(LineNumber(2)), False),
        (TextLine(LineNumber(1)), True),
    ],
)
def test_is_beginning_of_side(line, is_beginning) -> None:
    line = ManuscriptLineFactory.build(line=line)
    assert line.is_beginning_of_side is is_beginning


@pytest.mark.parametrize(  # pyre-ignore[56]
    "paratext,is_end",
    [
        (tuple(), False),
        ((NoteLine((StringPart("note"), )), ), False),
        ((StateDollarLine(None, atf.Extent.SEVERAL, None, None,
                          None), ), False),
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))