def test_lemmatization() -> None: assert TEXT.lemmatization == Lemmatization( ( (LemmatizationToken("ha-am", tuple()),), (LemmatizationToken(" single ruling"),), ) )
def test_note_line(): parts = ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION), ) line = NoteLine(parts) assert line.parts == ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart(Language.AKKADIAN, expected_transliteration(Language.AKKADIAN)), LanguagePart(Language.SUMERIAN, expected_transliteration(Language.SUMERIAN)), LanguagePart(Language.EMESAL, expected_transliteration(Language.EMESAL)), ) assert line.atf == ( "#note: this is a note " "@i{italic text}" f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}") assert line.lemmatization == ( LemmatizationToken("this is a note "), LemmatizationToken("@i{italic text}"), LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"), )
def test_lemmatization() -> None: line = TextLine.of_iterable( LINE_NUMBER, [ Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), )), UnknownNumberOfSigns.of(), Word.of([Reading.of_name("nu")]), ], ) assert line.lemmatization == ( LemmatizationToken("bu", (WordId("nu I"), )), LemmatizationToken("..."), LemmatizationToken("nu", tuple()), )
def test_at_line_surface_no_status() -> None: at_line = SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")) assert at_line.lemmatization == (LemmatizationToken("surface Stone wig"), ) assert at_line.surface_label == SurfaceLabel([], atf.Surface.SURFACE, "Stone wig") assert at_line.display_value == "surface Stone wig"
def test_at_line_object() -> None: at_line = ObjectAtLine( ObjectLabel([atf.Status.CORRECTION], atf.Object.OBJECT, "Stone wig")) assert at_line.lemmatization == (LemmatizationToken("object Stone wig!"), ) assert at_line.label == ObjectLabel([atf.Status.CORRECTION], atf.Object.OBJECT, "Stone wig") assert at_line.display_value == "object Stone wig!"
def test_update_lemmatization(client, fragmentarium, user, database): transliterated_fragment = TransliteratedFragmentFactory.build() fragmentarium.create(transliterated_fragment) tokens = [ list(line) for line in transliterated_fragment.text.lemmatization.tokens ] tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I", )) lemmatization = Lemmatization(tokens) body = LemmatizationSchema().dumps(lemmatization) url = f"/fragments/{transliterated_fragment.number}/lemmatization" post_result = client.simulate_post(url, body=body) expected_json = create_response_dto( transliterated_fragment.update_lemmatization(lemmatization), user, transliterated_fragment.number == MuseumNumber("K", "1"), ) assert post_result.status == falcon.HTTP_OK assert post_result.json == expected_json get_result = client.simulate_get( f"/fragments/{transliterated_fragment.number}") assert get_result.json == expected_json assert database["changelog"].find_one({ "resource_id": str(transliterated_fragment.number), "resource_type": "fragments", "user_profile.name": user.profile["name"], })
def test_set_unique_lemma_empty() -> None: word = AkkadianWord.of((ValueToken.of("bu"), ), unique_lemma=(WordId("nu I"), )) lemma = LemmatizationToken("bu", tuple()) expected = AkkadianWord.of((ValueToken.of("bu"), )) assert word.set_unique_lemma(lemma) == expected
def test_update_lemmatization( fragment_updater, user, fragment_repository, parallel_line_injector, changelog, when ): transliterated_fragment = TransliteratedFragmentFactory.build() number = transliterated_fragment.number tokens = [list(line) for line in transliterated_fragment.text.lemmatization.tokens] tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I",)) lemmatization = Lemmatization(tokens) lemmatized_fragment = transliterated_fragment.update_lemmatization(lemmatization) ( when(fragment_repository) .query_by_museum_number(number) .thenReturn(transliterated_fragment) ) injected_fragment = lemmatized_fragment.set_text( parallel_line_injector.inject_transliteration(lemmatized_fragment.text) ) when(changelog).create( "fragments", user.profile, {"_id": str(number), **SCHEMA.dump(transliterated_fragment)}, {"_id": str(number), **SCHEMA.dump(lemmatized_fragment)}, ).thenReturn() when(fragment_repository).update_lemmatization(lemmatized_fragment).thenReturn() result = fragment_updater.update_lemmatization(number, lemmatization, user) assert result == (injected_fragment, False)
def test_update_lemmatization() -> None: tokens = [list(line) for line in TEXT.lemmatization.tokens] tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),)) lemmatization = Lemmatization(tokens) expected = Text( ( TextLine( LineNumber(1), ( Word.of( unique_lemma=(WordId("nu I"),), parts=[ Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am"), ], ), ), ), RulingDollarLine(atf.Ruling.SINGLE), ), TEXT.parser_version, ) assert TEXT.update_lemmatization(lemmatization) == expected
def test_at_line_composite_constant() -> None: at_line = CompositeAtLine(atf.Composite.COMPOSITE, "") assert at_line.lemmatization == (LemmatizationToken("composite"), ) assert at_line.composite == atf.Composite.COMPOSITE assert at_line.text == "" assert at_line.number is None assert at_line.display_value == "composite"
def test_state_dollar_line_range() -> None: scope = ScopeContainer(atf.Scope.LINES) actual = StateDollarLine(None, (2, 4), scope, atf.State.MISSING, None) assert actual.scope == scope assert actual.lemmatization == (LemmatizationToken(" 2-4 lines missing"), ) assert actual.display_value == "2-4 lines missing" assert actual.is_end_of is False
def test_at_line_composite_milestone() -> None: at_line = CompositeAtLine(atf.Composite.MILESTONE, "o", 1) assert at_line.lemmatization == (LemmatizationToken("m=locator o 1"), ) assert at_line.composite == atf.Composite.MILESTONE assert at_line.text == "o" assert at_line.number == 1 assert at_line.display_value == "m=locator o 1"
def test_at_line_composite() -> None: at_line = CompositeAtLine(atf.Composite.DIV, "paragraph", 1) assert at_line.lemmatization == (LemmatizationToken("div paragraph 1"), ) assert at_line.composite == atf.Composite.DIV assert at_line.text == "paragraph" assert at_line.number == 1 assert at_line.display_value == "div paragraph 1"
def test_state_dollar_line_end_of() -> None: scope = ScopeContainer(atf.Surface.OBVERSE) actual = StateDollarLine(None, atf.Extent.END_OF, scope, None, None) assert actual.scope == scope assert actual.lemmatization == (LemmatizationToken(" end of obverse"), ) assert actual.display_value == "end of obverse" assert actual.is_end_of is True
def test_update_lemmatization_wrong_lenght() -> None: line = TextLine.of_iterable( LINE_NUMBER, [Word.of([Reading.of_name("bu")]), Word.of([Reading.of_name("bu")])], ) lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), ) with pytest.raises(LemmatizationError): line.update_lemmatization(lemmatization)
def test_update_lemmatization() -> None: line = TextLine.of_iterable(LINE_NUMBER, [Word.of([Reading.of_name("bu")])]) lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), ) expected = TextLine.of_iterable( LINE_NUMBER, [Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), ))]) assert line.update_lemmatization(lemmatization) == expected
def test_loose_dollar_line() -> None: text = "this is a loose line" loose_line = LooseDollarLine(text) assert loose_line.lemmatization == (LemmatizationToken(f" ({text})"), ) assert loose_line.text == text assert loose_line.atf == f"$ ({text})" assert loose_line.display_value == f"({text})" assert loose_line.is_end_of is False
def test_control_line() -> None: prefix = "#" content = "only" line = ControlLine(prefix, content) assert line.prefix == prefix assert line.content == content assert line.key == f"ControlLine⁞#only⁞{hash(line)}" assert line.lemmatization == (LemmatizationToken(content),)
def test_parallel_fragment(parts, language, extent, prefix, translation) -> None: line = TranslationLine(parts, language, extent) assert line.parts == parts assert line.language == language assert line.extent == extent assert line.translation == translation assert line.atf == Atf(f"{prefix}: {translation}") assert line.lemmatization == (LemmatizationToken(translation), )
def test_image_dollar_line() -> None: image = ImageDollarLine("1", "a", "great") assert image.lemmatization == (LemmatizationToken(" (image 1a = great)"), ) assert image.number == "1" assert image.letter == "a" assert image.text == "great" assert image.atf == "$ (image 1a = great)" assert image.display_value == "(image 1a = great)" assert image.is_end_of is False
def test_update_update_lemmatization_not_found( fragment_updater, user, fragment_repository, when ): number = "K.1" (when(fragment_repository).query_by_museum_number(number).thenRaise(NotFoundError)) with pytest.raises(NotFoundError): fragment_updater.update_lemmatization( number, Lemmatization(((LemmatizationToken("1.", tuple()),),)), user )
def test_ruling_dollar_line() -> None: ruling_line = RulingDollarLine(atf.Ruling.DOUBLE) assert ruling_line.lemmatization == ( LemmatizationToken(" double ruling"), ) assert ruling_line.number == atf.Ruling.DOUBLE assert ruling_line.status is None assert ruling_line.atf == "$ double ruling" assert ruling_line.display_value == "double ruling" assert ruling_line.is_end_of is False
def test_at_line_surface_instantiate_text_with_wrong_surface() -> None: with pytest.raises(ValueError): at_line = SurfaceAtLine( SurfaceLabel((atf.Status.CORRECTION, ), atf.Surface.OBVERSE, "Stone wig")) assert at_line.lemmatization == ( LemmatizationToken("obverse Stone wig!"), ) assert at_line.surface_label == SurfaceLabel( (atf.Status.CORRECTION, ), atf.Surface.OBVERSE, "Stone wig") assert at_line.display_value == "obverse Stone wig!"
def test_at_line_surface() -> None: at_line = SurfaceAtLine( SurfaceLabel((atf.Status.CORRECTION, ), atf.Surface.SURFACE, "Stone wig")) assert at_line.lemmatization == ( LemmatizationToken("surface Stone wig!"), ) assert at_line.surface_label == SurfaceLabel( (atf.Status.CORRECTION, ), atf.Surface.SURFACE, "Stone wig") assert at_line.display_value == "surface Stone wig!"
def test_ruling_dollar_line_status() -> None: ruling_line = RulingDollarLine(atf.Ruling.DOUBLE, atf.DollarStatus.EMENDED_NOT_COLLATED) assert ruling_line.lemmatization == ( LemmatizationToken(" double ruling !"), ) assert ruling_line.number == atf.Ruling.DOUBLE assert ruling_line.status == atf.DollarStatus.EMENDED_NOT_COLLATED assert ruling_line.atf == "$ double ruling !" assert ruling_line.display_value == "double ruling !" assert ruling_line.is_end_of is False
def _merge_word(self, token: A) -> A: is_compatible = self._is_compatible(token) result = token if is_compatible and token.lemmatizable: result = result.set_unique_lemma( LemmatizationToken(token.value, self.unique_lemma)) if is_compatible and token.alignable: result = result.set_alignment(self.alignment, self.variant) return result
def test_update_lemmatization(fragment_repository): transliterated_fragment = TransliteratedFragmentFactory.build() fragment_repository.create(transliterated_fragment) tokens = [list(line) for line in transliterated_fragment.text.lemmatization.tokens] tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I",)) lemmatization = Lemmatization(tokens) updated_fragment = transliterated_fragment.update_lemmatization(lemmatization) fragment_repository.update_lemmatization(updated_fragment) result = fragment_repository.query_by_museum_number(transliterated_fragment.number) assert result == updated_fragment
def test_parallel_composition(cf, display_value) -> None: name = "my name" line_number = LineNumber(1) line = ParallelComposition(cf, name, line_number) assert line.has_cf is cf assert line.name == name assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )
def test_strict_dollar_line_with_none() -> None: scope = ScopeContainer(atf.Object.OBJECT, "what") actual = StateDollarLine(None, atf.Extent.SEVERAL, scope, None, None) assert scope.content == atf.Object.OBJECT assert scope.text == "what" assert actual.scope == scope assert actual.lemmatization == ( LemmatizationToken(" several object what"), ) assert actual.atf == "$ several object what" assert actual.display_value == "several object what" assert actual.is_end_of is False
def test_parallel_text(cf, chapter, display_value) -> None: text_id = TextId(Genre.LITERATURE, 1, 1) line_number = LineNumber(1) line = ParallelText(cf, text_id, chapter, line_number) assert line.has_cf is cf assert line.text == text_id assert line.chapter == chapter assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )