def make_token(self, data, **kwargs): return ( Number.of( data["name_parts"], data["modifiers"], data["flags"], data["sign"], data["sub_index"], ) .set_enclosure_type(frozenset(data["enclosure_type"])) .set_erasure(data["erasure"]) )
def test_number( name_parts, modifiers, flags, sign, expected_value, expected_clean_value, expected_name, ) -> None: number = Number.of(name_parts, modifiers, flags, sign) expected_sub_index = 1 expected_parts = (*name_parts, sign) if sign else name_parts assert number.value == expected_value assert number.clean_value == expected_clean_value assert ( number.get_key() == f"Number⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩" ) assert number.name_parts == name_parts assert number.name == expected_name assert number.sub_index == expected_sub_index assert number.modifiers == tuple(modifiers) assert number.flags == tuple(flags) assert number.lemmatizable is False assert number.sign == sign serialized = { "type": "Number", "name": expected_name, "nameParts": OneOfTokenSchema().dump(name_parts, many=True), "modifiers": modifiers, "subIndex": expected_sub_index, "flags": [flag.value for flag in flags], "sign": sign and OneOfTokenSchema().dump(sign), } assert_token_serialization(number, serialized)
def ebl_atf_text_line__number(self, number, modifiers, flags, sign=None): return Number.of(tuple(number.children), modifiers, flags, sign)
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
TextLine.of_iterable( LineNumber(1), ( Word.of([Reading.of_name("šu")]), Word.of([Reading.of_name("gid", 2)]), ), ), TextLine.of_iterable( LineNumber(2), ( Word.of(parts=[ BrokenAway.open(), Logogram.of_name("U", 4), BrokenAway.close(), Joiner.dot(), Number.of_name("14"), Joiner.dot(), Logogram.of_name("KAM", 2), ]), Word.of(parts=[ Logogram.of_name("U", 4), Joiner.dot(), Number.of_name("15"), Joiner.dot(), Logogram.of_name("KAM", 2), ]), ), ), ], ), (
Word.of(( Variant.of( Reading.of(( ValueToken.of("ku"), BrokenAway.open(), ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "r", ), )), Number.of(( ValueToken.of("12"), BrokenAway.open(), ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "3", ), )), ), Joiner.hyphen().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), UnclearSign.of().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), )), Word.of(( UnknownNumberOfSigns.of().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})),
ErasureState, InWordNewline, LoneDeterminative, Word, ) @pytest.mark.parametrize( # pyre-ignore[56] "atf,expected", [ ("...", Word.of([UnknownNumberOfSigns.of()])), ("x", Word.of([UnclearSign.of()])), ("X", Word.of([UnidentifiedSign.of()])), ("x?", Word.of([UnclearSign.of([atf.Flag.UNCERTAIN])])), ("X#", Word.of([UnidentifiedSign.of([atf.Flag.DAMAGE])])), ("12", Word.of([Number.of_name("12")])), ( "1]2", Word.of([ Number.of((ValueToken.of("1"), BrokenAway.close(), ValueToken.of("2"))) ]), ), ( "1[2", Word.of([ Number.of((ValueToken.of("1"), BrokenAway.open(), ValueToken.of("2"))) ]), ), ("ʾ", Word.of([Reading.of_name("ʾ")])),