Пример #1
0
def test_lemmatization() -> None:
    assert TEXT.lemmatization == Lemmatization(
        (
            (LemmatizationToken("ha-am", tuple()),),
            (LemmatizationToken(" single ruling"),),
        )
    )
Пример #2
0
def test_note_line():
    parts = (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION),
        LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION),
    )
    line = NoteLine(parts)

    assert line.parts == (
        StringPart("this is a note "),
        EmphasisPart("italic text"),
        LanguagePart(Language.AKKADIAN,
                     expected_transliteration(Language.AKKADIAN)),
        LanguagePart(Language.SUMERIAN,
                     expected_transliteration(Language.SUMERIAN)),
        LanguagePart(Language.EMESAL,
                     expected_transliteration(Language.EMESAL)),
    )
    assert line.atf == (
        "#note: this is a note "
        "@i{italic text}"
        f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}")
    assert line.lemmatization == (
        LemmatizationToken("this is a note "),
        LemmatizationToken("@i{italic text}"),
        LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"),
        LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"),
    )
def test_lemmatization() -> None:
    line = TextLine.of_iterable(
        LINE_NUMBER,
        [
            Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), )),
            UnknownNumberOfSigns.of(),
            Word.of([Reading.of_name("nu")]),
        ],
    )

    assert line.lemmatization == (
        LemmatizationToken("bu", (WordId("nu I"), )),
        LemmatizationToken("..."),
        LemmatizationToken("nu", tuple()),
    )
def test_at_line_surface_no_status() -> None:
    at_line = SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"))

    assert at_line.lemmatization == (LemmatizationToken("surface Stone wig"), )
    assert at_line.surface_label == SurfaceLabel([], atf.Surface.SURFACE,
                                                 "Stone wig")
    assert at_line.display_value == "surface Stone wig"
def test_at_line_object() -> None:
    at_line = ObjectAtLine(
        ObjectLabel([atf.Status.CORRECTION], atf.Object.OBJECT, "Stone wig"))
    assert at_line.lemmatization == (LemmatizationToken("object Stone wig!"), )
    assert at_line.label == ObjectLabel([atf.Status.CORRECTION],
                                        atf.Object.OBJECT, "Stone wig")
    assert at_line.display_value == "object Stone wig!"
Пример #6
0
def test_update_lemmatization(client, fragmentarium, user, database):
    transliterated_fragment = TransliteratedFragmentFactory.build()
    fragmentarium.create(transliterated_fragment)
    tokens = [
        list(line)
        for line in transliterated_fragment.text.lemmatization.tokens
    ]
    tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I", ))
    lemmatization = Lemmatization(tokens)
    body = LemmatizationSchema().dumps(lemmatization)
    url = f"/fragments/{transliterated_fragment.number}/lemmatization"
    post_result = client.simulate_post(url, body=body)

    expected_json = create_response_dto(
        transliterated_fragment.update_lemmatization(lemmatization),
        user,
        transliterated_fragment.number == MuseumNumber("K", "1"),
    )

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected_json

    get_result = client.simulate_get(
        f"/fragments/{transliterated_fragment.number}")
    assert get_result.json == expected_json

    assert database["changelog"].find_one({
        "resource_id":
        str(transliterated_fragment.number),
        "resource_type":
        "fragments",
        "user_profile.name":
        user.profile["name"],
    })
def test_set_unique_lemma_empty() -> None:
    word = AkkadianWord.of((ValueToken.of("bu"), ),
                           unique_lemma=(WordId("nu I"), ))
    lemma = LemmatizationToken("bu", tuple())
    expected = AkkadianWord.of((ValueToken.of("bu"), ))

    assert word.set_unique_lemma(lemma) == expected
def test_update_lemmatization(
    fragment_updater, user, fragment_repository, parallel_line_injector, changelog, when
):
    transliterated_fragment = TransliteratedFragmentFactory.build()
    number = transliterated_fragment.number
    tokens = [list(line) for line in transliterated_fragment.text.lemmatization.tokens]
    tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I",))
    lemmatization = Lemmatization(tokens)
    lemmatized_fragment = transliterated_fragment.update_lemmatization(lemmatization)
    (
        when(fragment_repository)
        .query_by_museum_number(number)
        .thenReturn(transliterated_fragment)
    )
    injected_fragment = lemmatized_fragment.set_text(
        parallel_line_injector.inject_transliteration(lemmatized_fragment.text)
    )
    when(changelog).create(
        "fragments",
        user.profile,
        {"_id": str(number), **SCHEMA.dump(transliterated_fragment)},
        {"_id": str(number), **SCHEMA.dump(lemmatized_fragment)},
    ).thenReturn()
    when(fragment_repository).update_lemmatization(lemmatized_fragment).thenReturn()

    result = fragment_updater.update_lemmatization(number, lemmatization, user)
    assert result == (injected_fragment, False)
Пример #9
0
def test_update_lemmatization() -> None:
    tokens = [list(line) for line in TEXT.lemmatization.tokens]
    tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),))
    lemmatization = Lemmatization(tokens)

    expected = Text(
        (
            TextLine(
                LineNumber(1),
                (
                    Word.of(
                        unique_lemma=(WordId("nu I"),),
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ],
                    ),
                ),
            ),
            RulingDollarLine(atf.Ruling.SINGLE),
        ),
        TEXT.parser_version,
    )

    assert TEXT.update_lemmatization(lemmatization) == expected
def test_at_line_composite_constant() -> None:
    at_line = CompositeAtLine(atf.Composite.COMPOSITE, "")

    assert at_line.lemmatization == (LemmatizationToken("composite"), )
    assert at_line.composite == atf.Composite.COMPOSITE
    assert at_line.text == ""
    assert at_line.number is None
    assert at_line.display_value == "composite"
def test_state_dollar_line_range() -> None:
    scope = ScopeContainer(atf.Scope.LINES)
    actual = StateDollarLine(None, (2, 4), scope, atf.State.MISSING, None)

    assert actual.scope == scope
    assert actual.lemmatization == (LemmatizationToken(" 2-4 lines missing"), )
    assert actual.display_value == "2-4 lines missing"
    assert actual.is_end_of is False
def test_at_line_composite_milestone() -> None:
    at_line = CompositeAtLine(atf.Composite.MILESTONE, "o", 1)

    assert at_line.lemmatization == (LemmatizationToken("m=locator o 1"), )
    assert at_line.composite == atf.Composite.MILESTONE
    assert at_line.text == "o"
    assert at_line.number == 1
    assert at_line.display_value == "m=locator o 1"
def test_at_line_composite() -> None:
    at_line = CompositeAtLine(atf.Composite.DIV, "paragraph", 1)

    assert at_line.lemmatization == (LemmatizationToken("div paragraph 1"), )
    assert at_line.composite == atf.Composite.DIV
    assert at_line.text == "paragraph"
    assert at_line.number == 1
    assert at_line.display_value == "div paragraph 1"
def test_state_dollar_line_end_of() -> None:
    scope = ScopeContainer(atf.Surface.OBVERSE)
    actual = StateDollarLine(None, atf.Extent.END_OF, scope, None, None)

    assert actual.scope == scope
    assert actual.lemmatization == (LemmatizationToken(" end of obverse"), )
    assert actual.display_value == "end of obverse"
    assert actual.is_end_of is True
def test_update_lemmatization_wrong_lenght() -> None:
    line = TextLine.of_iterable(
        LINE_NUMBER,
        [Word.of([Reading.of_name("bu")]),
         Word.of([Reading.of_name("bu")])],
    )
    lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), )
    with pytest.raises(LemmatizationError):
        line.update_lemmatization(lemmatization)
def test_update_lemmatization() -> None:
    line = TextLine.of_iterable(LINE_NUMBER,
                                [Word.of([Reading.of_name("bu")])])
    lemmatization = (LemmatizationToken("bu", (WordId("nu I"), )), )
    expected = TextLine.of_iterable(
        LINE_NUMBER,
        [Word.of([Reading.of_name("bu")], unique_lemma=(WordId("nu I"), ))])

    assert line.update_lemmatization(lemmatization) == expected
def test_loose_dollar_line() -> None:
    text = "this is a loose line"
    loose_line = LooseDollarLine(text)

    assert loose_line.lemmatization == (LemmatizationToken(f" ({text})"), )
    assert loose_line.text == text
    assert loose_line.atf == f"$ ({text})"
    assert loose_line.display_value == f"({text})"
    assert loose_line.is_end_of is False
Пример #18
0
def test_control_line() -> None:
    prefix = "#"
    content = "only"
    line = ControlLine(prefix, content)

    assert line.prefix == prefix
    assert line.content == content
    assert line.key == f"ControlLine⁞#only⁞{hash(line)}"
    assert line.lemmatization == (LemmatizationToken(content),)
def test_parallel_fragment(parts, language, extent, prefix,
                           translation) -> None:
    line = TranslationLine(parts, language, extent)

    assert line.parts == parts
    assert line.language == language
    assert line.extent == extent
    assert line.translation == translation
    assert line.atf == Atf(f"{prefix}: {translation}")
    assert line.lemmatization == (LemmatizationToken(translation), )
def test_image_dollar_line() -> None:
    image = ImageDollarLine("1", "a", "great")

    assert image.lemmatization == (LemmatizationToken(" (image 1a = great)"), )
    assert image.number == "1"
    assert image.letter == "a"
    assert image.text == "great"
    assert image.atf == "$ (image 1a = great)"
    assert image.display_value == "(image 1a = great)"
    assert image.is_end_of is False
def test_update_update_lemmatization_not_found(
    fragment_updater, user, fragment_repository, when
):
    number = "K.1"
    (when(fragment_repository).query_by_museum_number(number).thenRaise(NotFoundError))

    with pytest.raises(NotFoundError):
        fragment_updater.update_lemmatization(
            number, Lemmatization(((LemmatizationToken("1.", tuple()),),)), user
        )
def test_ruling_dollar_line() -> None:
    ruling_line = RulingDollarLine(atf.Ruling.DOUBLE)

    assert ruling_line.lemmatization == (
        LemmatizationToken(" double ruling"), )
    assert ruling_line.number == atf.Ruling.DOUBLE
    assert ruling_line.status is None
    assert ruling_line.atf == "$ double ruling"
    assert ruling_line.display_value == "double ruling"
    assert ruling_line.is_end_of is False
def test_at_line_surface_instantiate_text_with_wrong_surface() -> None:
    with pytest.raises(ValueError):
        at_line = SurfaceAtLine(
            SurfaceLabel((atf.Status.CORRECTION, ), atf.Surface.OBVERSE,
                         "Stone wig"))
        assert at_line.lemmatization == (
            LemmatizationToken("obverse Stone wig!"), )
        assert at_line.surface_label == SurfaceLabel(
            (atf.Status.CORRECTION, ), atf.Surface.OBVERSE, "Stone wig")
        assert at_line.display_value == "obverse Stone wig!"
def test_at_line_surface() -> None:
    at_line = SurfaceAtLine(
        SurfaceLabel((atf.Status.CORRECTION, ), atf.Surface.SURFACE,
                     "Stone wig"))

    assert at_line.lemmatization == (
        LemmatizationToken("surface Stone wig!"), )
    assert at_line.surface_label == SurfaceLabel(
        (atf.Status.CORRECTION, ), atf.Surface.SURFACE, "Stone wig")
    assert at_line.display_value == "surface Stone wig!"
def test_ruling_dollar_line_status() -> None:
    ruling_line = RulingDollarLine(atf.Ruling.DOUBLE,
                                   atf.DollarStatus.EMENDED_NOT_COLLATED)

    assert ruling_line.lemmatization == (
        LemmatizationToken(" double ruling !"), )
    assert ruling_line.number == atf.Ruling.DOUBLE
    assert ruling_line.status == atf.DollarStatus.EMENDED_NOT_COLLATED
    assert ruling_line.atf == "$ double ruling !"
    assert ruling_line.display_value == "double ruling !"
    assert ruling_line.is_end_of is False
    def _merge_word(self, token: A) -> A:
        is_compatible = self._is_compatible(token)
        result = token

        if is_compatible and token.lemmatizable:
            result = result.set_unique_lemma(
                LemmatizationToken(token.value, self.unique_lemma))
        if is_compatible and token.alignable:
            result = result.set_alignment(self.alignment, self.variant)

        return result
def test_update_lemmatization(fragment_repository):
    transliterated_fragment = TransliteratedFragmentFactory.build()
    fragment_repository.create(transliterated_fragment)
    tokens = [list(line) for line in transliterated_fragment.text.lemmatization.tokens]
    tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I",))
    lemmatization = Lemmatization(tokens)
    updated_fragment = transliterated_fragment.update_lemmatization(lemmatization)

    fragment_repository.update_lemmatization(updated_fragment)
    result = fragment_repository.query_by_museum_number(transliterated_fragment.number)

    assert result == updated_fragment
def test_parallel_composition(cf, display_value) -> None:
    name = "my name"
    line_number = LineNumber(1)
    line = ParallelComposition(cf, name, line_number)

    assert line.has_cf is cf
    assert line.name == name
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )
def test_strict_dollar_line_with_none() -> None:
    scope = ScopeContainer(atf.Object.OBJECT, "what")
    actual = StateDollarLine(None, atf.Extent.SEVERAL, scope, None, None)

    assert scope.content == atf.Object.OBJECT
    assert scope.text == "what"

    assert actual.scope == scope
    assert actual.lemmatization == (
        LemmatizationToken(" several object what"), )
    assert actual.atf == "$ several object what"
    assert actual.display_value == "several object what"
    assert actual.is_end_of is False
def test_parallel_text(cf, chapter, display_value) -> None:
    text_id = TextId(Genre.LITERATURE, 1, 1)
    line_number = LineNumber(1)
    line = ParallelText(cf, text_id, chapter, line_number)

    assert line.has_cf is cf
    assert line.text == text_id
    assert line.chapter == chapter
    assert line.line_number == line_number

    assert line.display_value == display_value
    assert line.atf == Atf(f"// {display_value}")
    assert line.lemmatization == (LemmatizationToken(display_value), )