def test_note_line(): parts = ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION), ) line = NoteLine(parts) assert line.parts == ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart(Language.AKKADIAN, expected_transliteration(Language.AKKADIAN)), LanguagePart(Language.SUMERIAN, expected_transliteration(Language.SUMERIAN)), LanguagePart(Language.EMESAL, expected_transliteration(Language.EMESAL)), ) assert line.atf == ( "#note: this is a note " "@i{italic text}" f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}") assert line.lemmatization == ( LemmatizationToken("this is a note "), LemmatizationToken("@i{italic text}"), LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"), )
def parse_text(atf: str): tree = LINE_PARSER.parse(atf, start="ebl_atf_text_line__text") return TextLineTransformer().transform(tree) def expected_language_part(language: Language, transliteration: str) -> LanguagePart: return LanguagePart.of_transliteration(language, parse_text(transliteration)) @pytest.mark.parametrize( # pyre-ignore[56] "atf,expected", [ ("this is a note ", (StringPart("this is a note "),)), ("@i{italic text}", (EmphasisPart("italic text"),)), ("@akk{{d}kur}", (expected_language_part(Language.AKKADIAN, "{d}kur"),)), ("@sux{kur}", (expected_language_part(Language.SUMERIAN, "kur"),)), ("@es{kur}", (expected_language_part(Language.EMESAL, "kur"),)), ( "@bib{RN123@x 2-3a}", (BibliographyPart.of(BibliographyId("RN123"), "x 2-3a"),), ), ("@bib{RN1\\}@2}", (BibliographyPart.of(BibliographyId("RN1}"), "2"),)), ("@bib{RN1@1\\}2}", (BibliographyPart.of(BibliographyId("RN1"), "1}2"),)), ("@bib{RN12\\@3@3}", (BibliographyPart.of(BibliographyId("RN12@3"), "3"),)), ("@bib{RN@1\\}\\@2}", (BibliographyPart.of(BibliographyId("RN"), "1}@2"),)), ( "this is a note @i{italic text}@akk{kur}@sux{kur}", ( StringPart("this is a note "),
Language.AKKADIAN, [Reading.of_name("kur"), Divider.of(":")]) BIBLIOGRAPHY_PART = BibliographyPart( Reference(BibliographyId("1"), ReferenceType.DISCUSSION, TEXT + PUNCTUATION)) @pytest.mark.parametrize( # pyre-ignore[56] "part,expected", [ ( StringPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"), StringPart(f"{PUNCTUATION}A{PUNCTUATION}A"), ), ( EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"), EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A"), ), (LANGUAGE_PART, LANGUAGE_PART), (BIBLIOGRAPHY_PART, BIBLIOGRAPHY_PART), ], ) def test_part_rstrip(part: MarkupPart, expected: MarkupPart) -> None: assert part.rstrip() == expected @pytest.mark.parametrize( # pyre-ignore[56] "part,expected", [ (StringPart(TEXT), StringPart(TEXT.title())), (EmphasisPart(TEXT), EmphasisPart(TEXT.title())),
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
def ebl_atf_text_line__emphasis_part(self, text: str) -> EmphasisPart: return EmphasisPart(text)
]), Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]) ]), ), ( Text.of_iterable([NoteLine((StringPart("this is a note "), ))]), Text.of_iterable( [NoteLine((StringPart("this is another note "), ))]), Text.of_iterable( [NoteLine((StringPart("this is another note "), ))]), ), ( Text.of_iterable([NoteLine((StringPart("this is a note "), ))]), Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]), Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]), ), ( Text.of_iterable([ NoteLine((LanguagePart.of_transliteration( Language.AKKADIAN, (ValueToken.of("bu"), )), )) ]), Text.of_iterable([ NoteLine((LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )) ]), Text.of_iterable([ NoteLine((LanguagePart.of_transliteration( Language.AKKADIAN,
), ( SealDollarLine(1), { "type": "SealDollarLine", "prefix": "$", "content": [OneOfTokenSchema().dump(ValueToken.of(" seal 1"))], "number": 1, "displayValue": "seal 1", }, ), ( NoteLine( ( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, [Word.of([Reading.of_name("bu")])] ), BibliographyPart.of(BibliographyId("A"), "1-4"), ) ), { "type": "NoteLine", "prefix": "#note: ", "parts": [ {"type": "StringPart", "text": "a note "}, {"type": "EmphasisPart", "text": "italic"}, { "type": "LanguagePart", "language": Language.AKKADIAN.name,
def make_part(self, data, **kwargs) -> EmphasisPart: return EmphasisPart(data["text"])