def test_set_language(): parts = [Determinative.of([Reading.of_name("bu")])] language = Language.SUMERIAN lone_determinative = LoneDeterminative.of(parts, Language.AKKADIAN) expected = LoneDeterminative.of(parts, language) assert lone_determinative.set_language(language) == expected
def test_text_line_of_iterable(code: str, language: Language) -> None: tokens = [ Word.of([Reading.of_name("first")]), LanguageShift.of(code), Word.of([Reading.of_name("second")]), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])]), Word.of([BrokenAway.open(), Reading.of_name("fourth")]), UnknownNumberOfSigns.of(), BrokenAway.close(), ] expected_tokens = ( Word.of([Reading.of_name("first")], DEFAULT_LANGUAGE), LanguageShift.of(code), Word.of([Reading.of_name("second")], language), LanguageShift.of("%sb"), LoneDeterminative.of([Determinative.of([Reading.of_name("third")])], Language.AKKADIAN), Word.of( [ BrokenAway.open(), Reading.of((ValueToken( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE, "fourth", ), )).set_enclosure_type(frozenset({EnclosureType.BROKEN_AWAY })), ], DEFAULT_LANGUAGE, ), UnknownNumberOfSigns(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE), BrokenAway.close().set_enclosure_type( frozenset({EnclosureType.BROKEN_AWAY})), ) line = TextLine.of_iterable(LINE_NUMBER, tokens) assert line.line_number == LINE_NUMBER assert line.content == expected_tokens assert ( line.key == f"TextLine⁞{line.atf}⟨{'⁚'.join(token.get_key() for token in expected_tokens)}⟩" ) assert line.atf == f"1. first {code} second %sb {{third}} [fourth ...]"
def make_token(self, data, **kwargs): return LoneDeterminative.of( data["parts"], data["language"], tuple(data["unique_lemma"]), data["erasure"], data["alignment"], data["variant"], ).set_enclosure_type(frozenset(data["enclosure_type"]))
def test_of_value(): value = "{bu}" parts = [Determinative.of([Reading.of_name("bu")])] lone_determinative = LoneDeterminative.of_value(parts) assert lone_determinative.value == value assert lone_determinative.lemmatizable is False assert lone_determinative.language == DEFAULT_LANGUAGE assert lone_determinative.normalized is False assert lone_determinative.unique_lemma == tuple() assert lone_determinative.erasure == ErasureState.NONE assert lone_determinative.alignment is None
def test_lone_determinative(language): value = "{mu}" parts = [Determinative.of([Reading.of_name("mu")])] lone_determinative = LoneDeterminative.of(parts, language) equal = LoneDeterminative.of(parts, language) other_language = LoneDeterminative.of(parts, Language.UNKNOWN) other_parts = LoneDeterminative.of( [Determinative.of([Reading.of_name("bu")])], language) assert lone_determinative.value == value assert lone_determinative.lemmatizable is False assert lone_determinative.language == language assert lone_determinative.normalized is False assert lone_determinative.unique_lemma == tuple() serialized = { "type": "LoneDeterminative", "uniqueLemma": [], "normalized": False, "language": lone_determinative.language.name, "lemmatizable": lone_determinative.lemmatizable, "alignable": lone_determinative.lemmatizable, "erasure": ErasureState.NONE.name, "parts": OneOfTokenSchema().dump(parts, many=True), } assert_token_serialization(lone_determinative, serialized) assert lone_determinative == equal assert hash(lone_determinative) == hash(equal) for not_equal in [other_language, other_parts]: assert lone_determinative != not_equal assert hash(lone_determinative) != hash(not_equal) assert lone_determinative != ValueToken.of(value)
( ("1. [...] {bu} [...]\n" "2. [...]{bu} [...]\n" "3. [...] {bu}[...]\n" "4. [...]{bu}[...]"), [ TextLine.of_iterable( LineNumber(1), ( Word.of(( BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), )), LoneDeterminative.of_value( [Determinative.of([Reading.of_name("bu")])], ErasureState.NONE, ), Word.of(( BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), )), ), ), TextLine.of_iterable( LineNumber(2), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(),
), ( ControlLine("#", " comment"), { "type": "ControlLine", "prefix": "#", "content": [OneOfTokenSchema().dump(ValueToken.of(" comment"))], }, ), ( TextLine.of_iterable( LineNumber(1), ( DocumentOrientedGloss.open(), Word.of([Reading.of_name("bu")]), LoneDeterminative.of([Determinative.of([Reading.of_name("d")])]), DocumentOrientedGloss.close(), ), ), { "type": "TextLine", "prefix": "1.", "lineNumber": OneOfLineNumberSchema().dump(LineNumber(1)), "content": OneOfTokenSchema().dump( [ DocumentOrientedGloss.open(), Word.of( [ Reading.of( ( ValueToken(
@pytest.mark.parametrize( "lines", [ [EmptyLine()], [ControlLine("#", " comment")], [ TextLine.of_iterable( LineNumber(1), [ Word.of( unique_lemma=(WordId("nu I"),), parts=[Reading.of_name("nu")] ), Word.of(alignment=1, parts=[Reading.of_name("nu")]), LanguageShift.of("%sux"), LoneDeterminative.of( [Determinative.of([Reading.of_name("nu")])], language=Language.SUMERIAN, ), Erasure.open(), Erasure.center(), Erasure.close(), ], ) ], ], ) def test_load_line(lines): parser_version = "2.3.1" serialized_lines = OneOfLineSchema().dump(lines, many=True) assert TextSchema().load( { "lines": serialized_lines,
]), ), ], ) def test_word(atf, expected) -> None: assert parse_word(atf) == expected @pytest.mark.parametrize( # pyre-ignore[56] "atf,expected", [ ( "<{10}>", LoneDeterminative.of([ AccidentalOmission.open(), Determinative.of([Number.of_name("10")]), AccidentalOmission.close(), ]), ), ( "{ud]u?}", LoneDeterminative.of([ Determinative.of([ Reading.of( ( ValueToken.of("ud"), BrokenAway.close(), ValueToken.of("u"), ), flags=[atf.Flag.UNCERTAIN], )