def test_metrical_foot_separator(separator: MetricalFootSeparator, is_uncertain: bool, value: str) -> None: assert separator.value == value assert separator.is_uncertain == is_uncertain serialized = {"type": "MetricalFootSeparator", "isUncertain": is_uncertain} assert_token_serialization(separator, serialized)
def test_akkadian_word( word: GreekWord, expected: str, language: Language, lemmatizable: bool, alignable: bool, ) -> None: assert word.value == expected assert word.clean_value == expected.translate(str.maketrans("", "", "[]()<>#?!")) assert word.language == language assert word.normalized is False assert word.lemmatizable is lemmatizable assert word.alignable is alignable serialized = { "type": "GreekWord", "parts": OneOfTokenSchema().dump(word.parts, many=True), "uniqueLemma": [], "alignment": None, "variant": None, "lemmatizable": lemmatizable, "alignable": alignable, "normalized": word.normalized, "language": language.name, } assert_token_serialization(word, serialized)
def test_language_shift(value, expected_language, normalized): shift = LanguageShift.of(value) equal = LanguageShift.of(value) other = ValueToken.of(r"%bar") assert shift.value == value assert shift.clean_value == value assert shift.get_key() == f"LanguageShift⁝{value}" assert shift.lemmatizable is False assert shift.normalized == normalized assert shift.language == expected_language serialized = { "type": "LanguageShift", "normalized": normalized, "language": shift.language.name, } assert_token_serialization(shift, serialized) assert shift == equal assert hash(shift) == hash(equal) assert shift != other assert hash(shift) != hash(other) assert shift != ValueToken.of(value)
def test_joiner(joiner, expected_value): assert joiner.value == expected_value assert joiner.clean_value == expected_value assert joiner.get_key() == f"Joiner⁝{expected_value}" assert joiner.lemmatizable is False serialized = {"type": "Joiner"} assert_token_serialization(joiner, serialized)
def test_column_with_number(): column = Column.of(1) expected_value = "&1" assert column.value == expected_value assert column.clean_value == expected_value assert column.get_key() == f"Column⁝{expected_value}" assert column.lemmatizable is False serialized = {"type": "Column", "number": 1} assert_token_serialization(column, serialized)
def test_tabulation(): value = "($___$)" tabulation = Tabulation.of() assert tabulation.value == value assert tabulation.clean_value == value assert tabulation.get_key() == f"Tabulation⁝{value}" assert tabulation.lemmatizable is False serialized = {"type": "Tabulation"} assert_token_serialization(tabulation, serialized)
def test_line_break(): value = "|" line_break = LineBreak.of() assert line_break.value == value assert line_break.clean_value == value assert line_break.get_key() == f"LineBreak⁝{value}" assert line_break.lemmatizable is False serialized = {"type": "LineBreak"} assert_token_serialization(line_break, serialized)
def test_greek_letter() -> None: alphabet = "α" flag = atf.Flag.UNCERTAIN greek_letter = GreekLetter.of(alphabet, [flag]) assert greek_letter.value == f"{alphabet}{flag.value}" assert greek_letter.clean_value == alphabet assert greek_letter.get_key() == f"GreekLetter⁝{greek_letter.value}" assert greek_letter.lemmatizable is False serialized = {"type": "GreekLetter", "letter": alphabet, "flags": [flag.value]} assert_token_serialization(greek_letter, serialized)
def test_compound_grapheme() -> None: compound = CompoundGrapheme.of(["BI", "IS"]) expected_value = "|BI.IS|" assert compound.name == SignName(expected_value) assert compound.value == expected_value assert compound.clean_value == expected_value assert (compound.get_key() == f"CompoundGrapheme⁝{expected_value}⟨ValueToken⁝BI⁚ValueToken⁝IS⟩") serialized = {"type": "CompoundGrapheme", "compound_parts": ["BI", "IS"]} assert_token_serialization(compound, serialized)
def test_unclear_sign() -> None: sign = UnclearSign.of() expected_value = "x" assert sign.value == expected_value assert sign.clean_value == expected_value assert sign.get_key() == f"UnclearSign⁝{expected_value}" assert sign.flags == tuple() assert sign.lemmatizable is False serialized = {"type": "UnclearSign", "flags": []} assert_token_serialization(sign, serialized)
def test_commentary_protocol(protocol_enum): value = protocol_enum.value protocol = CommentaryProtocol.of(value) assert protocol.value == value assert protocol.clean_value == value assert protocol.get_key() == f"CommentaryProtocol⁝{value}" assert protocol.lemmatizable is False assert protocol.protocol == protocol_enum serialized = {"type": "CommentaryProtocol"} assert_token_serialization(protocol, serialized)
def test_in_word_new_line(): newline = InWordNewline(frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE) expected_value = ";" assert newline.value == expected_value assert newline.clean_value == expected_value assert newline.get_key() == f"InWordNewline⁝{expected_value}" assert newline.lemmatizable is False serialized = {"type": "InWordNewline"} assert_token_serialization(newline, serialized)
def test_unclear_sign_with_flags() -> None: flags = [atf.Flag.CORRECTION] sign = UnclearSign.of(flags) expected_value = "x!" assert sign.value == expected_value assert sign.clean_value == "x" assert sign.get_key() == f"UnclearSign⁝{expected_value}" assert sign.flags == tuple(flags) assert sign.lemmatizable is False serialized = {"type": "UnclearSign", "flags": ["!"]} assert_token_serialization(sign, serialized)
def test_unknown_number_of_signs(): unknown_number_of_signs = UnknownNumberOfSigns( frozenset({EnclosureType.BROKEN_AWAY}), ErasureState.NONE ) expected_value = "..." assert unknown_number_of_signs.value == expected_value assert unknown_number_of_signs.clean_value == expected_value assert unknown_number_of_signs.get_key() == f"UnknownNumberOfSigns⁝{expected_value}" assert unknown_number_of_signs.lemmatizable is False serialized = {"type": "UnknownNumberOfSigns"} assert_token_serialization(unknown_number_of_signs, serialized)
def test_unidentified_sign_with_flags() -> None: flags = [atf.Flag.DAMAGE] sign = UnidentifiedSign.of(flags) expected_value = "X#" assert sign.value == expected_value assert sign.clean_value == "X" assert sign.get_key() == f"UnidentifiedSign⁝{expected_value}" assert sign.flags == tuple(flags) assert sign.lemmatizable is False serialized = {"type": "UnidentifiedSign", "flags": ["#"]} assert_token_serialization(sign, serialized)
def test_enclosure(enclosure_class, type_, sides, side): value = sides[side] enclosure = enclosure_class.of(side) assert enclosure.value == value assert enclosure.clean_value == "" assert enclosure.get_key() == f"{type_}⁝{value}" assert enclosure.side == side assert enclosure.is_open == (side == Side.LEFT) assert enclosure.is_close == (side == Side.RIGHT) assert enclosure.lemmatizable is False serialized = {"type": type_, "side": side.name} assert_token_serialization(enclosure, serialized)
def test_egyptian_metrical_feet_separator(): egyptian_metrical_feet_separator = EgyptianMetricalFeetSeparator.of( (atf.Flag.UNCERTAIN,) ) expected_value = "•?" assert egyptian_metrical_feet_separator.value == expected_value assert egyptian_metrical_feet_separator.clean_value == "•" assert ( egyptian_metrical_feet_separator.get_key() == f"EgyptianMetricalFeetSeparator⁝{expected_value}" ) assert egyptian_metrical_feet_separator.lemmatizable is False serialized = {"type": "EgyptianMetricalFeetSeparator", "flags": ["?"]} assert_token_serialization(egyptian_metrical_feet_separator, serialized)
def test_determinative(): parts = [Reading.of_name("kur"), Joiner.hyphen(), Reading.of_name("kur")] determinative = Determinative.of(parts) expected_value = f"{{{''.join(part.value for part in parts)}}}" expected_clean_value = f"{{{''.join(part.clean_value for part in parts)}}}" expected_parts = f"⟨{'⁚'.join(part.get_key() for part in parts)}⟩" assert determinative.value == expected_value assert determinative.clean_value == expected_clean_value assert determinative.get_key() == f"Determinative⁝{expected_value}{expected_parts}" assert determinative.parts == tuple(parts) assert determinative.lemmatizable is False serialized = { "type": "Determinative", "parts": OneOfTokenSchema().dump(parts, many=True), } assert_token_serialization(determinative, serialized)
def test_linguistic_gloss(): parts = [Reading.of_name("kur"), Joiner.hyphen(), Reading.of_name("kur")] gloss = LinguisticGloss.of(parts) expected_value = f"{{{{{''.join(part.value for part in parts)}}}}}" expected_clean_value = f"{{{{{''.join(part.clean_value for part in parts)}}}}}" expected_parts = f"⟨{'⁚'.join(part.get_key() for part in parts)}⟩" assert gloss.value == expected_value assert gloss.clean_value == expected_clean_value assert gloss.get_key() == f"LinguisticGloss⁝{expected_value}{expected_parts}" assert gloss.parts == tuple(parts) assert gloss.lemmatizable is False serialized = { "type": "LinguisticGloss", "parts": OneOfTokenSchema().dump(parts, many=True), } assert_token_serialization(gloss, serialized)
def test_grapheme(name, modifiers, flags, expected_value, expected_clean_value) -> None: grapheme = Grapheme.of(name, modifiers, flags) assert grapheme.name == name assert grapheme.value == expected_value assert grapheme.clean_value == expected_clean_value assert grapheme.get_key() == f"Grapheme⁝{expected_value}" assert grapheme.modifiers == tuple(modifiers) assert grapheme.flags == tuple(flags) assert grapheme.lemmatizable is False serialized = { "type": "Grapheme", "name": name, "modifiers": modifiers, "flags": [flag.value for flag in flags], } assert_token_serialization(grapheme, serialized)
def test_value_token(): value = "value" token = ValueToken.of(value) equal = ValueToken.of(value) other = ValueToken.of("anothervalue") assert token.value == value assert token.clean_value == value assert token.get_key() == f"ValueToken⁝{value}" assert token.lemmatizable is False serialized = {"type": "ValueToken"} assert_token_serialization(token, serialized) assert token == equal assert hash(token) == hash(equal) assert token != other assert hash(token) != hash(other)
def test_divider() -> None: value = ":" modifiers = ("@v", ) flags = (atf.Flag.UNCERTAIN, ) divider = Divider.of(value, modifiers, flags) expected_value = ":@v?" assert divider.value == expected_value assert divider.clean_value == ":@v" assert divider.get_key() == f"Divider⁝{expected_value}" assert divider.lemmatizable is False serialized = { "type": "Divider", "divider": value, "modifiers": list(modifiers), "flags": ["?"], } assert_token_serialization(divider, serialized)
def test_variant(): reading = Reading.of([ValueToken.of("sa"), BrokenAway.open(), ValueToken.of("l")]) divider = Divider.of(":") variant = Variant.of(reading, divider) expected_value = "sa[l/:" assert variant.value == expected_value assert variant.clean_value == "sal/:" assert variant.tokens == (reading, divider) assert variant.parts == variant.tokens assert ( variant.get_key() == f"Variant⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in variant.tokens)}⟩" ) assert variant.lemmatizable is False serialized = { "type": "Variant", "tokens": OneOfTokenSchema().dump([reading, divider], many=True), } assert_token_serialization(variant, serialized)
def test_akkadian_word(word: AkkadianWord, expected: str, lemmatizable: bool) -> None: assert word.value == expected assert word.clean_value == expected.translate( str.maketrans("", "", "[]()<>#?!")) assert word.lemmatizable is lemmatizable assert word.alignable is lemmatizable serialized = { "type": "AkkadianWord", "parts": OneOfTokenSchema().dump(word.parts, many=True), "modifiers": [modifier.value for modifier in word.modifiers], "uniqueLemma": [], "alignment": None, "variant": None, "lemmatizable": lemmatizable, "alignable": lemmatizable, "normalized": True, "language": "AKKADIAN", } assert_token_serialization(word, serialized)
def test_logogram( name_parts, sub_index, modifiers, flags, sign, surrogate, expected_value, expected_clean_value, expected_name, ) -> None: logogram = Logogram.of(name_parts, sub_index, modifiers, flags, sign, surrogate) expected_parts = (*name_parts, sign) if sign else name_parts assert logogram.value == expected_value assert logogram.clean_value == expected_clean_value assert ( logogram.get_key() == f"Logogram⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩" ) assert logogram.name_parts == name_parts assert logogram.name == expected_name assert logogram.modifiers == tuple(modifiers) assert logogram.flags == tuple(flags) assert logogram.lemmatizable is False assert logogram.sign == sign assert logogram.surrogate == tuple(surrogate) serialized = { "type": "Logogram", "name": expected_name, "nameParts": OneOfTokenSchema().dump(name_parts, many=True), "subIndex": sub_index, "modifiers": modifiers, "flags": [flag.value for flag in flags], "surrogate": OneOfTokenSchema().dump(surrogate, many=True), "sign": sign and OneOfTokenSchema().dump(sign), } assert_token_serialization(logogram, serialized)
def test_number( name_parts, modifiers, flags, sign, expected_value, expected_clean_value, expected_name, ) -> None: number = Number.of(name_parts, modifiers, flags, sign) expected_sub_index = 1 expected_parts = (*name_parts, sign) if sign else name_parts assert number.value == expected_value assert number.clean_value == expected_clean_value assert ( number.get_key() == f"Number⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩" ) assert number.name_parts == name_parts assert number.name == expected_name assert number.sub_index == expected_sub_index assert number.modifiers == tuple(modifiers) assert number.flags == tuple(flags) assert number.lemmatizable is False assert number.sign == sign serialized = { "type": "Number", "name": expected_name, "nameParts": OneOfTokenSchema().dump(name_parts, many=True), "modifiers": modifiers, "subIndex": expected_sub_index, "flags": [flag.value for flag in flags], "sign": sign and OneOfTokenSchema().dump(sign), } assert_token_serialization(number, serialized)
def test_reading( name_parts, sub_index, modifiers, flags, sign, expected_value, expected_clean_value, expected_name, ) -> None: reading = Reading.of(name_parts, sub_index, modifiers, flags, sign) expected_parts = (*name_parts, sign) if sign else name_parts assert reading.value == expected_value assert reading.clean_value == expected_clean_value assert ( reading.get_key() == f"Reading⁝{expected_value}⟨{'⁚'.join(token.get_key() for token in expected_parts)}⟩" ) assert reading.name_parts == name_parts assert reading.name == expected_name assert reading.modifiers == tuple(modifiers) assert reading.flags == tuple(flags) assert reading.lemmatizable is False assert reading.sign == sign serialized = { "type": "Reading", "name": expected_name, "nameParts": OneOfTokenSchema().dump(name_parts, many=True), "subIndex": sub_index, "modifiers": modifiers, "flags": [flag.value for flag in flags], "sign": sign and OneOfTokenSchema().dump(sign), } assert_token_serialization(reading, serialized)
def test_lone_determinative(language): value = "{mu}" parts = [Determinative.of([Reading.of_name("mu")])] lone_determinative = LoneDeterminative.of(parts, language) equal = LoneDeterminative.of(parts, language) other_language = LoneDeterminative.of(parts, Language.UNKNOWN) other_parts = LoneDeterminative.of( [Determinative.of([Reading.of_name("bu")])], language) assert lone_determinative.value == value assert lone_determinative.lemmatizable is False assert lone_determinative.language == language assert lone_determinative.normalized is False assert lone_determinative.unique_lemma == tuple() serialized = { "type": "LoneDeterminative", "uniqueLemma": [], "normalized": False, "language": lone_determinative.language.name, "lemmatizable": lone_determinative.lemmatizable, "alignable": lone_determinative.lemmatizable, "erasure": ErasureState.NONE.name, "parts": OneOfTokenSchema().dump(parts, many=True), } assert_token_serialization(lone_determinative, serialized) assert lone_determinative == equal assert hash(lone_determinative) == hash(equal) for not_equal in [other_language, other_parts]: assert lone_determinative != not_equal assert hash(lone_determinative) != hash(not_equal) assert lone_determinative != ValueToken.of(value)
def test_caesura(caesura: Caesura, is_uncertain: bool, value: str) -> None: assert caesura.value == value assert caesura.is_uncertain == is_uncertain serialized = {"type": "Caesura", "isUncertain": is_uncertain} assert_token_serialization(caesura, serialized)