Exemplo n.º 1
0
def test_note_line():
    parts = (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION),
    )
    line = NoteLine(parts)

    assert line.parts == (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart(Language.AKKADIAN,
                     expected_transliteration(Language.AKKADIAN)),
        LanguagePart(Language.SUMERIAN,
                     expected_transliteration(Language.SUMERIAN)),
        LanguagePart(Language.EMESAL,
                     expected_transliteration(Language.EMESAL)),
    )
    assert line.atf == (
        "#note: this is a note "
        "@i{italic text}"
        f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}")
    assert line.lemmatization == (
        LemmatizationToken("this is a note "),
        LemmatizationToken("@i{italic text}"),
        LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"),
    )
class ManuscriptLineFactory(factory.Factory):
    class Meta:
        model = ManuscriptLine

    manuscript_id = factory.Sequence(lambda n: n)
    labels = (
        SurfaceLabel.from_label(Surface.OBVERSE),
        ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]),
    )
    line = factory.Sequence(
        lambda n: TextLine.of_iterable(
            LineNumber(n),
            (
                Word.of(
                    [
                        Reading.of_name("ku"),
                        Joiner.hyphen(),
                        BrokenAway.open(),
                        Reading.of_name("nu"),
                        Joiner.hyphen(),
                        Reading.of_name("ši"),
                        BrokenAway.close(),
                    ]
                ),
            ),
        )
    )
    paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE))
    omitted_words = (1,)
class ChapterListingFactory(factory.Factory):
    class Meta:
        model = ChapterListing

    stage = factory.fuzzy.FuzzyChoice(Stage)
    name = factory.Faker("sentence")
    translation = (TranslationLine((StringPart("foo"),), "en", None),)
    uncertain_fragments = tuple()
Exemplo n.º 4
0
def test_line_display_of_line() -> None:
    translation_lines = (
        TranslationLine((StringPart("bar"),), "de", None),
        TranslationLine((StringPart("foo"),), DEFAULT_LANGUAGE, None),
    )
    line = LineFactory.build(translation=translation_lines)

    line_display = LineDisplay.of_line(line)

    assert line_display == LineDisplay(
        line.number,
        line.is_second_line_of_parallelism,
        line.is_beginning_of_section,
        line.variants[0].intertext,
        line.variants[0].reconstruction,
        translation_lines,
        line.variants[0].note,
        line.variants[0].parallel_lines,
    )
    assert line_display.title == make_title(translation_lines)
class LineVariantFactory(factory.Factory):
    class Meta:
        model = LineVariant

    class Params:
        manuscript_id = factory.Sequence(lambda n: n)
        manuscript = factory.SubFactory(
            ManuscriptLineFactory,
            manuscript_id=factory.SelfAttribute("..manuscript_id"),
        )

    reconstruction = (
        LanguageShift.normalized_akkadian(),
        AkkadianWord.of((ValueToken.of("buāru"),)),
        MetricalFootSeparator.uncertain(),
        BrokenAway.open(),
        UnknownNumberOfSigns.of(),
        Caesura.certain(),
        AkkadianWord.of(
            (
                UnknownNumberOfSigns.of(),
                BrokenAway.close(),
                Joiner.hyphen(),
                ValueToken.of("buāru"),
            ),
            (Flag.DAMAGE,),
        ),
    )
    note = factory.fuzzy.FuzzyChoice([None, NoteLine((StringPart("a note"),))])
    manuscripts = factory.List([factory.SelfAttribute("..manuscript")], TupleFactory)
    intertext = factory.fuzzy.FuzzyChoice([tuple(), (StringPart("bar"),)])
    parallel_lines = factory.List(
        [
            factory.SubFactory(ParallelCompositionFactory),
            factory.SubFactory(ParallelTextFactory),
            factory.SubFactory(ParallelFragmentFactory),
        ],
        TupleFactory,
    )
class LineFactory(factory.Factory):
    class Meta:
        model = Line

    class Params:
        manuscript_id = factory.Sequence(lambda n: n)
        variant = factory.SubFactory(
            LineVariantFactory, manuscript_id=factory.SelfAttribute("..manuscript_id")
        )

    number = factory.Sequence(lambda n: LineNumber(n))
    variants = factory.List([factory.SelfAttribute("..variant")], TupleFactory)
    is_second_line_of_parallelism = factory.Faker("boolean")
    is_beginning_of_section = factory.Faker("boolean")
    translation = (TranslationLine((StringPart("foo"),), "en", None),)
PERIOD_MODIFIER = PeriodModifier.LATE
PERIOD = Period.OLD_BABYLONIAN
PROVENANCE = Provenance.NINEVEH
TYPE = ManuscriptType.LIBRARY
NOTES = "some notes"
COLOPHON = Transliteration.of_iterable(
    [TextLine(LineNumber(1, True), (Word.of([Reading.of_name("ku")]), ))])
UNPLACED_LINES = Transliteration.of_iterable(
    [TextLine(LineNumber(4, True), (Word.of([Reading.of_name("bu")]), ))])
REFERENCES = (ReferenceFactory.build(), )
LINE_NUMBER = LineNumber(1)
LINE_RECONSTRUCTION = (AkkadianWord.of((ValueToken.of("buāru"), )), )
IS_SECOND_LINE_OF_PARALLELISM = True
IS_BEGINNING_OF_SECTION = True
LABELS = (SurfaceLabel.from_label(Surface.OBVERSE), )
PARATEXT = (NoteLine((StringPart("note"), )), RulingDollarLine(Ruling.SINGLE))
OMITTED_WORDS = (1, )

NOTE = None
PARALLEL_LINES = (ParallelComposition(False, "a composition", LineNumber(7)), )
TRANSLATION = (TranslationLine((StringPart("foo"), ), "en", None), )
SIGNS = ("FOO BAR", )

MANUSCRIPT_TEXT_1 = TextLine(LineNumber(1),
                             (Word.of([Reading.of([ValueToken.of("ku")])]), ))

LINE_VARIANT_1 = LineVariant(
    LINE_RECONSTRUCTION,
    NOTE,
    (ManuscriptLine(MANUSCRIPT_ID, LABELS, MANUSCRIPT_TEXT_1, PARATEXT,
                    OMITTED_WORDS), ),
 def ebl_atf_text_line__string_part(self, text: str) -> StringPart:
     return StringPart(text)
Exemplo n.º 9
0
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.translation_line import TranslationLine

GENRE = Genre.LITERATURE
CATEGORY = 1
INDEX = 2
NAME = "Palm & Vine"
HAS_DOI = True
VERSES = 100
APPROXIMATE = True
CLASSIFICATION = Classification.ANCIENT
STAGE = Stage.NEO_BABYLONIAN
INTRO = "**Intro**"
CHAPTER_NAME = "I"
TRANSLATION = (
    TranslationLine([StringPart("not the title")], "de"),
    TranslationLine([StringPart("the title,")], "en"),
)
UNCERTAIN_FRAGMENTS = (UncertainFragment(MuseumNumber("X", "1"), True), )
CHAPTER = ChapterListing(STAGE, CHAPTER_NAME, TRANSLATION, UNCERTAIN_FRAGMENTS)

TEXT = Text(GENRE, CATEGORY, INDEX, NAME, HAS_DOI, VERSES, APPROXIMATE, INTRO,
            (CHAPTER, ))


def test_text_constructor_sets_correct_fields() -> None:
    assert TEXT.id == TextId(GENRE, CATEGORY, INDEX)
    assert TEXT.genre == GENRE
    assert TEXT.category == CATEGORY
    assert TEXT.index == INDEX
    assert TEXT.name == NAME
from ebl.transliteration.domain.text_line_transformer import TextLineTransformer


def parse_text(atf: str):
    tree = LINE_PARSER.parse(atf, start="ebl_atf_text_line__text")
    return TextLineTransformer().transform(tree)


def expected_language_part(language: Language, transliteration: str) -> LanguagePart:
    return LanguagePart.of_transliteration(language, parse_text(transliteration))


@pytest.mark.parametrize(  # pyre-ignore[56]
    "atf,expected",
    [
        ("this is a note ", (StringPart("this is a note "),)),
        ("@i{italic text}", (EmphasisPart("italic text"),)),
        ("@akk{{d}kur}", (expected_language_part(Language.AKKADIAN, "{d}kur"),)),
        ("@sux{kur}", (expected_language_part(Language.SUMERIAN, "kur"),)),
        ("@es{kur}", (expected_language_part(Language.EMESAL, "kur"),)),
        (
            "@bib{RN123@x 2-3a}",
            (BibliographyPart.of(BibliographyId("RN123"), "x 2-3a"),),
        ),
        ("@bib{RN1\\}@2}", (BibliographyPart.of(BibliographyId("RN1}"), "2"),)),
        ("@bib{RN1@1\\}2}", (BibliographyPart.of(BibliographyId("RN1"), "1}2"),)),
        ("@bib{RN12\\@3@3}", (BibliographyPart.of(BibliographyId("RN12@3"), "3"),)),
        ("@bib{RN@1\\}\\@2}", (BibliographyPart.of(BibliographyId("RN"), "1}@2"),)),
        (
            "this is a note @i{italic text}@akk{kur}@sux{kur}",
            (
    [
        (EmptyLine(), False),
        (TextLine(LineNumber(2)), False),
        (TextLine(LineNumber(1)), True),
    ],
)
def test_is_beginning_of_side(line, is_beginning) -> None:
    line = ManuscriptLineFactory.build(line=line)
    assert line.is_beginning_of_side is is_beginning


@pytest.mark.parametrize(  # pyre-ignore[56]
    "paratext,is_end",
    [
        (tuple(), False),
        ((NoteLine((StringPart("note"), )), ), False),
        ((StateDollarLine(None, atf.Extent.SEVERAL, None, None,
                          None), ), False),
        ((StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True),
        (
            (
                StateDollarLine(None, atf.Extent.SEVERAL, None, None, None),
                StateDollarLine(None, atf.Extent.END_OF, None, None, None),
            ),
            True,
        ),
    ],
)
def test_is_end_of_side(paratext, is_end) -> None:
    line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext)
    assert line.is_end_of_side is is_end
Exemplo n.º 12
0
     (
         "@prism!",
         [
             ObjectAtLine(
                 ObjectLabel([atf.Status.CORRECTION], atf.Object.PRISM))
         ],
     ),
     ("@prism", [ObjectAtLine(ObjectLabel([], atf.Object.PRISM))]),
     ("@object stone",
      [ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "stone"))]),
     ("@fragment 1",
      [ObjectAtLine(ObjectLabel([], atf.Object.FRAGMENT, "1"))]),
     ("@edge a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.EDGE, "a"))]),
     ("@face a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.FACE, "a"))]),
     ("@h1", [HeadingAtLine(1, tuple())]),
     ("@h1 foo", [HeadingAtLine(1, (StringPart("foo"), ))]),
     ("@column 1", [ColumnAtLine(ColumnLabel.from_int(1))]),
     (
         "@column 1!",
         [ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.CORRECTION, )))],
     ),
     (
         "@column 1!*",
         [
             ColumnAtLine(
                 ColumnLabel.from_int(
                     1, (atf.Status.CORRECTION, atf.Status.COLLATION)))
         ],
     ),
     ("@date", [DiscourseAtLine(atf.Discourse.DATE)]),
 ],
import pytest

from ebl.transliteration.domain.lark_parser import parse_translation_line
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.translation_line import Extent, TranslationLine
from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.atf import Surface


@pytest.mark.parametrize(
    "atf,expected_line",
    [
        ("#tr: translation", TranslationLine((StringPart("translation"),), "en", None)),
        (
            "#tr.en: translation",
            TranslationLine((StringPart("translation"),), "en", None),
        ),
        (
            "#tr.ar.(2): translation",
            TranslationLine(
                (StringPart("translation"),), "ar", Extent(LineNumber(2), tuple())
            ),
        ),
        (
            "#tr.(2): translation",
            TranslationLine(
                (StringPart("translation"),), "en", Extent(LineNumber(2), tuple())
            ),
        ),
        (
Exemplo n.º 14
0
     Line(
         LineNumber(1),
         (LineVariant(RECONSTRUCTION, NOTE, tuple()), ),
         IS_SECOND_LINE_OF_PARALLELISM,
         IS_BEGINNING_OF_SECTION,
     ),
     Line(LineNumber(1),
          (LineVariant(RECONSTRUCTION, NOTE, tuple()), ), True, True),
     Line(LineNumber(1),
          (LineVariant(RECONSTRUCTION, NOTE, tuple()), ), True, True),
 ),
 (
     Line(
         LineNumber(1),
         (LineVariant(RECONSTRUCTION, NoteLine(
             (StringPart("a note"), )), tuple()), ),
         IS_SECOND_LINE_OF_PARALLELISM,
         IS_BEGINNING_OF_SECTION,
     ),
     Line(
         LineNumber(1),
         (LineVariant(RECONSTRUCTION,
                      NoteLine((StringPart("new note"), )), tuple()), ),
         IS_SECOND_LINE_OF_PARALLELISM,
         IS_BEGINNING_OF_SECTION,
     ),
     Line(
         LineNumber(1),
         (LineVariant(RECONSTRUCTION,
                      NoteLine((StringPart("new note"), )), tuple()), ),
         IS_SECOND_LINE_OF_PARALLELISM,
import pytest

from ebl.lemmatization.domain.lemmatization import LemmatizationToken
from ebl.transliteration.domain.atf import Atf, Surface
from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.translation_line import Extent, TranslationLine


@pytest.mark.parametrize(  # pyre-ignore[56]
    "parts,language,extent,prefix,translation",
    [
        ((StringPart("foo"), StringPart("bar")), "en", None, "#tr.en",
         "foobar"),
        ((StringPart("foo"), ), "de", Extent(
            LineNumber(1)), "#tr.de.(1)", "foo"),
        (
            (StringPart("foo"), ),
            "de",
            Extent(
                LineNumber(1),
                (SurfaceLabel(tuple(), Surface.OBVERSE), ColumnLabel(
                    tuple(), 2)),
            ),
            "#tr.de.(o ii 1)",
            "foo",
        ),
    ],
)
def test_parallel_fragment(parts, language, extent, prefix,
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.parallel_line import ParallelComposition
from ebl.transliteration.domain.translation_line import TranslationLine

REFERENCES = (ReferenceFactory.build(with_document=True), )
MANUSCRIPT = ManuscriptFactory.build(references=REFERENCES)
UNCERTAIN_FRAGMENTS = (MuseumNumber.of("K.1"), )
FIRST_MANUSCRIPT_LINE = ManuscriptLineFactory.build(
    manuscript_id=MANUSCRIPT.id)
SECOND_MANUSCRIPT_LINE = ManuscriptLineFactory.build(
    manuscript_id=MANUSCRIPT.id)

LINE_VARIANT = LineVariantFactory.build(
    manuscripts=(FIRST_MANUSCRIPT_LINE, SECOND_MANUSCRIPT_LINE),
    parallel_lines=(ParallelComposition(False, "name", LineNumber(2)), ),
    intertext=(StringPart("bar"), ),
)
TRANSLATION_LINE = TranslationLine((StringPart("foo"), ), "en", None)
LINE = LineFactory.build(variants=(LINE_VARIANT, ),
                         translation=(TRANSLATION_LINE, ))
CHAPTER = ChapterFactory.build(manuscripts=(MANUSCRIPT, ),
                               uncertain_fragments=UNCERTAIN_FRAGMENTS,
                               lines=(LINE, ))


def strip_documents(chapter: Chapter) -> Chapter:
    return attr.evolve(
        chapter,
        manuscripts=tuple(
            attr.evolve(
                manuscript,
from ebl.transliteration.domain.at_line import (
    DiscourseAtLine,
    SurfaceAtLine,
    ColumnAtLine,
    ObjectAtLine,
    CompositeAtLine,
    HeadingAtLine,
)
from ebl.transliteration.domain.labels import SurfaceLabel, ColumnLabel, ObjectLabel
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.atf import Atf


@pytest.mark.parametrize(  # pyre-ignore[56]
    "parts,parts_text", [(tuple(), ""),
                         ((StringPart("a"), StringPart("b c")), " ab c")])
def test_at_line_heading(parts, parts_text) -> None:
    at_line = HeadingAtLine(1, parts)

    assert at_line.atf == Atf(f"@h1{parts_text}")
    assert at_line.lemmatization == (LemmatizationToken(f"h1{parts_text}"), )
    assert at_line.display_value == f"h1{parts_text}"


def test_at_line_column() -> None:
    at_line = ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.COLLATION, )))

    assert at_line.lemmatization == (LemmatizationToken("column 1*"), )
    assert at_line.display_value == "column 1*"

         "displayValue": "seal 1",
     },
 ),
 (
     HeadingAtLine(1),
     {
         "prefix": "@",
         "content": [OneOfTokenSchema().dump(ValueToken.of("h1"))],
         "type": "HeadingAtLine",
         "number": 1,
         "displayValue": "h1",
         "parts": [],
     },
 ),
 (
     HeadingAtLine(2, (StringPart("foo"),)),
     {
         "prefix": "@",
         "content": [OneOfTokenSchema().dump(ValueToken.of("h2 foo"))],
         "type": "HeadingAtLine",
         "number": 2,
         "displayValue": "h2 foo",
         "parts": [{"type": "StringPart", "text": "foo"}],
     },
 ),
 (
     StateDollarLine(
         atf.Qualification.AT_LEAST,
         atf.Extent.BEGINNING_OF,
         ScopeContainer(atf.Surface.OBVERSE),
         atf.State.BLANK,
PUNCTUATION = ";,:.-–—"
TEXT = "sed nec tortor varius, iaculis."
LANGUAGE_PART = LanguagePart(
    Language.AKKADIAN,
    [Reading.of_name("kur"), Divider.of(":")])
BIBLIOGRAPHY_PART = BibliographyPart(
    Reference(BibliographyId("1"), ReferenceType.DISCUSSION,
              TEXT + PUNCTUATION))


@pytest.mark.parametrize(  # pyre-ignore[56]
    "part,expected",
    [
        (
            StringPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"),
            StringPart(f"{PUNCTUATION}A{PUNCTUATION}A"),
        ),
        (
            EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"),
            EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A"),
        ),
        (LANGUAGE_PART, LANGUAGE_PART),
        (BIBLIOGRAPHY_PART, BIBLIOGRAPHY_PART),
    ],
)
def test_part_rstrip(part: MarkupPart, expected: MarkupPart) -> None:
    assert part.rstrip() == expected


@pytest.mark.parametrize(  # pyre-ignore[56]
Exemplo n.º 22
0
 (
     Text.of_iterable([
         TextLine.of_iterable(LineNumber(1),
                              [Word.of([Reading.of_name("bu")])])
     ]),
     Text.of_iterable([
         TextLine.of_iterable(LineNumber(1),
                              [Word.of([Reading.of_name("bu")])])
     ]),
     Text.of_iterable([
         TextLine.of_iterable(LineNumber(1),
                              [Word.of([Reading.of_name("bu")])])
     ]),
 ),
 (
     Text.of_iterable([NoteLine((StringPart("this is a note "), ))]),
     Text.of_iterable(
         [NoteLine((StringPart("this is another note "), ))]),
     Text.of_iterable(
         [NoteLine((StringPart("this is another note "), ))]),
 ),
 (
     Text.of_iterable([NoteLine((StringPart("this is a note "), ))]),
     Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]),
     Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]),
 ),
 (
     Text.of_iterable([
         NoteLine((LanguagePart.of_transliteration(
             Language.AKKADIAN, (ValueToken.of("bu"), )), ))
     ]),
def test_make_title() -> None:
    assert make_title(TRANSLATION) == (StringPart("The Title"),)
Exemplo n.º 24
0
 def make_part(self, data, **kwargs) -> StringPart:
     return StringPart(data["text"])