Пример #1
0
def test_fossey_schema():
    data = {
        "page": 405,
        "number": 25728,
        "suffix": "B",
        "reference": "Mai: MDP, VI, 11.I, 11",
        "newEdition": "Paulus AOAT 50, 981",
        "secondaryLiterature": "NABU 1997/1",
        "cdliNumber": "P123456",
        "museumNumber": MuseumNumberSchema().dump(MuseumNumber.of("K.4562")),
        "externalProject": "dcclt",
        "notes": "Das Zeichen ist eigentlich ZA₇",
        "date": "Marduk-apla-iddina I, 1171-1159 BC",
        "transliteration": "me-luḫ-ḫa",
        "sign":
        "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2",
    }
    fossey = Fossey(
        405,
        25728,
        "B",
        "Mai: MDP, VI, 11.I, 11",
        "Paulus AOAT 50, 981",
        "NABU 1997/1",
        "P123456",
        MuseumNumber.of("K.4562"),
        "dcclt",
        "Das Zeichen ist eigentlich ZA₇",
        "Marduk-apla-iddina I, 1171-1159 BC",
        "me-luḫ-ḫa",
        "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2",
    )

    assert FosseySchema().load(data) == fossey
    assert FosseySchema().dump(fossey) == data
def test_fossey():
    fossey = Fossey(
        405,
        25728,
        "B",
        "Mai: MDP, VI, 11.I, 11",
        "Paulus AOAT 50, 981",
        "NABU 1997/1",
        "P123456",
        MuseumNumber("K", "4562"),
        "dcclt",
        "Das Zeichen ist eigentlich ZA₇",
        "Marduk-apla-iddina I, 1171-1159 BC",
        "me-luḫ-ḫa",
        "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2",
    )
    assert fossey.page == 405
    assert fossey.number == 25728
    assert fossey.suffix == "B"
    assert fossey.reference == "Mai: MDP, VI, 11.I, 11"
    assert fossey.new_edition == "Paulus AOAT 50, 981"
    assert fossey.secondary_literature == "NABU 1997/1"
    assert fossey.cdli_number == "P123456"
    assert fossey.museum_number == MuseumNumber("K", "4562")
    assert fossey.external_project == "dcclt"
    assert fossey.notes == "Das Zeichen ist eigentlich ZA₇"
    assert fossey.date == "Marduk-apla-iddina I, 1171-1159 BC"
    assert fossey.transliteration == "me-luḫ-ḫa"
    assert (
        fossey.sign == "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2"
    )
def test_find_transliterated(database, fragment_repository):
    transliterated_fragment_1 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.1")
    )
    transliterated_fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.2")
    )
    database[COLLECTION].insert_many(
        [
            {
                **SCHEMA.dump(transliterated_fragment_2),
                "_id": str(transliterated_fragment_2.number),
            },
            SCHEMA.dump(FragmentFactory.build()),
            {
                **SCHEMA.dump(transliterated_fragment_1),
                "_id": str(transliterated_fragment_1.number),
            },
        ]
    )

    assert fragment_repository.query_transliterated_numbers() == [
        transliterated_fragment_1.number,
        transliterated_fragment_2.number,
    ]
def test_schema():
    previous = MuseumNumber.of("X.1")
    next = MuseumNumber.of("X.1")
    fragment_info_pager = FragmentPagerInfo(previous, next)
    assert FragmentPagerInfoSchema().dump(fragment_info_pager) == {
        "next": "X.1",
        "previous": "X.1",
    }
def test_order_equal() -> None:
    prefix = "X"
    number = "B"
    suffix = "C"
    assert_that(
        MuseumNumber(prefix, number, suffix),
        equal_to(MuseumNumber(prefix, number, suffix)),
    )
def test_get_fragment_pager(client, fragmentarium):
    fragment_0 = FragmentFactory.build(number=MuseumNumber("X", "0"))
    fragment_1 = FragmentFactory.build(number=MuseumNumber("X", "1"))
    fragment_2 = FragmentFactory.build(number=MuseumNumber("X", "2"))
    for fragment in [fragment_0, fragment_1, fragment_2]:
        fragmentarium.create(fragment)
    result = client.simulate_get(f"/fragments/{fragment_1.number}/pager")

    assert result.json == {"next": "X.2", "previous": "X.0"}
    assert result.status == falcon.HTTP_OK
    assert result.headers["Cache-Control"] == "private, max-age=600"
def test_fragment_matcher_route_error(client, fragmentarium, user):
    faulty_fragment_id = "X.-1"
    fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.0"))
    fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.1"),
        line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),),
    )
    fragmentarium.create(fragment_1)
    fragmentarium.create(fragment_2)
    get_result = client.simulate_get(f"/fragments/{faulty_fragment_id}/match")
    assert get_result.status == falcon.HTTP_UNPROCESSABLE_ENTITY
Пример #8
0
def test_joins_lowest():
    joins = Joins(
        (
            (Join(MuseumNumber("B", "2"), is_in_fragmentarium=True),),
            (
                Join(MuseumNumber("B", "1"), is_in_fragmentarium=True),
                Join(MuseumNumber("A", "1"), is_in_fragmentarium=False),
            ),
        )
    )

    assert_that(joins.lowest, equal_to(MuseumNumber("B", "1")))
def test_add_lowest_join_transliteration(user):
    fragment = FragmentFactory.build(
        number=MuseumNumber.of("X.2"),
        joins=Joins([[Join(MuseumNumber.of("X.1"),
                           is_in_fragmentarium=True)]]),
    )
    atf = Atf("1. x x")
    text = parse_atf_lark(atf)
    transliteration = TransliterationUpdate(text, fragment.notes)

    with pytest.raises(NotLowestJoinError):
        fragment.update_lowest_join_transliteration(transliteration, user)
def test_query_next_and_previous_fragment(museum_numbers, fragment_repository):
    for fragmentNumber in museum_numbers:
        fragment_repository.create(
            FragmentFactory.build(number=MuseumNumber.of(fragmentNumber))
        )
    for museum_number in museum_numbers:
        results = fragment_repository.query_next_and_previous_fragment(
            MuseumNumber.of(museum_number)
        )
        previous_index = (museum_numbers.index(museum_number) - 1) % len(museum_numbers)
        next_index = (museum_numbers.index(museum_number) + 1) % len(museum_numbers)
        assert results.previous == MuseumNumber.of(museum_numbers[previous_index])
        assert results.next == MuseumNumber.of(museum_numbers[next_index])
def test_get_fragment_pager_cache(cached_client, fragmentarium):
    fragment_0 = FragmentFactory.build(number=MuseumNumber("X", "0"))
    fragment_1 = FragmentFactory.build(number=MuseumNumber("X", "3"))
    for fragment in [fragment_0, fragment_1]:
        fragmentarium.create(fragment)

    first_result = cached_client.simulate_get(
        f"/fragments/{fragment_1.number}/pager")
    fragmentarium.create(FragmentFactory.build(number=MuseumNumber("X", "2")))
    second_result = cached_client.simulate_get(
        f"/fragments/{fragment_1.number}/pager")

    assert first_result.json == second_result.json
    assert first_result.status == second_result.status
Пример #12
0
def test_find(fragment_repository, when, fragment_matcher):
    line_to_vec = (LineToVecEncoding.from_list((1, 2, 1, 1)), )
    number = MuseumNumber.of("BM.11")
    fragment = FragmentFactory.build(number=number, line_to_vec=line_to_vec)
    (when(fragment_repository).query_by_museum_number(number).thenReturn(
        fragment))
    assert fragment_matcher._parse_candidate("BM.11") == line_to_vec
Пример #13
0
def test_signs_get(
    client,
    annotations_repository,
    photo_repository,
    when,
    fragment_repository,
    text_with_labels,
):
    fragment = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("K.2"), text=text_with_labels)
    fragment_repository.create(fragment)

    annotation_data = AnnotationDataFactory.build(sign_name="signName",
                                                  path=[2, 0, 0])
    annotation = AnnotationFactory.build(data=annotation_data)
    annotations_repository.create_or_update(
        AnnotationsFactory.build(fragment_number="K.2",
                                 annotations=[annotation]))

    result = client.simulate_get("/signs/signName/images")

    assert len(result.json) > 0
    result_json = result.json[0]

    assert result_json["fragmentNumber"] == str(fragment.number)
    assert isinstance(result_json["image"], str)
    assert result_json["script"] == fragment.script
    assert result_json["label"] == "i Stone wig Stone wig 2"

    assert result.status == falcon.HTTP_OK
def test_museum_number() -> None:
    museum_number = MuseumNumber(PREFIX, NUMBER, SUFFIX)

    assert museum_number.prefix == PREFIX
    assert museum_number.number == NUMBER
    assert museum_number.suffix == SUFFIX
    assert str(museum_number) == f"{PREFIX}.{NUMBER}.{SUFFIX}"
Пример #15
0
def test_update_references(
    client, fragmentarium, bibliography, parallel_line_injector, user
):
    fragment = FragmentFactory.build()
    fragmentarium.create(fragment)
    reference = ReferenceFactory.build(with_document=True)
    bibliography.create(reference.document, ANY_USER)
    references = [ReferenceSchema().dump(reference)]
    body = json.dumps({"references": references})
    url = f"/fragments/{fragment.number}/references"
    post_result = client.simulate_post(url, body=body)

    expected_json = create_response_dto(
        fragment.set_references((reference,)).set_text(
            parallel_line_injector.inject_transliteration(fragment.text)
        ),
        user,
        fragment.number == MuseumNumber("K", "1"),
    )

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected_json

    get_result = client.simulate_get(f"/fragments/{fragment.number}")
    assert get_result.json == expected_json
def test_generate_annotations(annotations_repository, photo_repository,
                              changelog, when):
    fragment_number = MuseumNumber.of("X.0")

    boundary_results = {
        "boundaryResults": [{
            "top_left_x": 0.0,
            "top_left_y": 0.0,
            "width": 10.0,
            "height": 10.0,
            "probability": 0.99,
        }]
    }

    httpretty.register_uri(
        httpretty.POST,
        "http://mock-localhost:8001/generate",
        body=json.dumps(boundary_results),
        content_type="image/jpeg",
    )

    image_file = create_test_photo(fragment_number)
    ebl_ai_client = EblAiClient("http://mock-localhost:8001")

    annotations = ebl_ai_client.generate_annotations(fragment_number,
                                                     image_file)
    assert annotations.fragment_number == fragment_number
    assert len(annotations.annotations) == 1
    assert annotations.annotations[0].geometry.x == 0.0
    assert annotations.annotations[0].geometry.y == 0.0
    def rank_line_to_vec(self, candidate: str) -> LineToVecRanking:
        candidate_line_to_vecs = self._parse_candidate(candidate)
        line_to_vec_entries = (
            self._fragment_repository.query_transliterated_line_to_vec())
        ranker = LineToVecRanker()
        if candidate_line_to_vecs:
            for entry in filter(
                    lambda line_to_vec_entry: line_to_vec_entry.museum_number
                    != MuseumNumber.of(candidate),
                    line_to_vec_entries,
            ):
                line_to_vec_score = LineToVecScore(
                    entry.museum_number,
                    entry.script,
                    score(candidate_line_to_vecs, entry.line_to_vec),
                )
                line_to_vec_weighted_score = LineToVecScore(
                    entry.museum_number,
                    entry.script,
                    score_weighted(candidate_line_to_vecs, entry.line_to_vec),
                )
                ranker.insert_score(line_to_vec_score,
                                    line_to_vec_weighted_score)

        return ranker.ranking
Пример #18
0
def test_generate_annotations(client, photo_repository):
    fragment_number = MuseumNumber.of("K.2")

    boundary_results = {
        "boundaryResults": [{
            "top_left_x": 0.0,
            "top_left_y": 0.0,
            "width": 10.0,
            "height": 10.0,
            "probability": 0.99,
        }]
    }
    httpretty.register_uri(
        httpretty.POST,
        "http://localhost:8001/generate",
        body=json.dumps(boundary_results),
        content_type="image/jpeg",
    )

    result = client.simulate_get(
        f"/fragments/{fragment_number}/annotations",
        params={"generateAnnotations": True},
    )

    assert result.status == falcon.HTTP_OK
    assert result.json is not None
    assert result.json["fragmentNumber"] == "K.2"
    assert result.json["annotations"][0]["geometry"]["x"] == 0.0
    assert result.json["annotations"][0]["geometry"]["y"] == 0.0
Пример #19
0
def test_update_lemmatization(client, fragmentarium, user, database):
    transliterated_fragment = TransliteratedFragmentFactory.build()
    fragmentarium.create(transliterated_fragment)
    tokens = [
        list(line)
        for line in transliterated_fragment.text.lemmatization.tokens
    ]
    tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I", ))
    lemmatization = Lemmatization(tokens)
    body = LemmatizationSchema().dumps(lemmatization)
    url = f"/fragments/{transliterated_fragment.number}/lemmatization"
    post_result = client.simulate_post(url, body=body)

    expected_json = create_response_dto(
        transliterated_fragment.update_lemmatization(lemmatization),
        user,
        transliterated_fragment.number == MuseumNumber("K", "1"),
    )

    assert post_result.status == falcon.HTTP_OK
    assert post_result.json == expected_json

    get_result = client.simulate_get(
        f"/fragments/{transliterated_fragment.number}")
    assert get_result.json == expected_json

    assert database["changelog"].find_one({
        "resource_id":
        str(transliterated_fragment.number),
        "resource_type":
        "fragments",
        "user_profile.name":
        user.profile["name"],
    })
def _find_adjacent_museum_number_from_sequence(
    museum_number: MuseumNumber, cursor: Sequence[dict], is_endpoint=False
) -> Tuple[Optional[MuseumNumber], Optional[MuseumNumber]]:
    first = None
    last = None
    current_prev = None
    current_next = None
    for current_cursor in cursor:
        museum_number_dict = current_cursor["museumNumber"]
        # Not use MuseumNumber().load(current_current["museumNumber"]) because of
        # performance reasons
        current_museum_number = MuseumNumber(
            prefix=museum_number_dict["prefix"],
            number=museum_number_dict["number"],
            suffix=museum_number_dict["suffix"],
        )

        current_prev = _select_museum_between_two_values(
            museum_number, current_museum_number, current_prev, operator.lt
        )
        current_next = _select_museum_between_two_values(
            museum_number, current_museum_number, current_next, operator.gt
        )

        if is_endpoint:
            first, last = _min_max_museum_numbers([first, last, current_museum_number])
    if is_endpoint:
        current_prev = current_prev or last
        current_next = current_next or first
    return current_prev, current_next
def test_find_not_found(fragment_finder, fragment_repository, when):
    number = MuseumNumber("unknown", "id")
    (when(fragment_repository).query_by_museum_number(number).thenRaise(
        NotFoundError))

    with pytest.raises(NotFoundError):
        fragment_finder.find(number)
def test_number_deserialization():
    number = MuseumNumber.of("Z.1.b")
    fragment = FragmentSchema().load({
        **FragmentSchema().dump(LemmatizedFragmentFactory.build()),
        "museumNumber":
        MuseumNumberSchema().dump(number),
    })
    assert fragment.number == number
class AnnotationsFactory(factory.Factory):
    class Meta:
        model = Annotations

    fragment_number = factory.Sequence(lambda n: MuseumNumber("X", str(n)))
    annotations = factory.List([
        factory.SubFactory(AnnotationFactory),
        factory.SubFactory(AnnotationFactory)
    ])
class JoinFactory(factory.Factory):
    class Meta:
        model = Join

    museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n)))
    is_checked = factory.Faker("boolean")
    joined_by = factory.Faker("last_name")
    date = factory.Faker("sentence")
    note = factory.Faker("sentence")
    legacy_data = factory.Faker("sentence")
class ParallelFragmentFactory(factory.Factory):
    class Meta:
        model = ParallelFragment

    has_cf = factory.Faker("boolean")
    museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n)))
    has_duplicates = factory.Faker("boolean")
    labels = factory.SubFactory(LabelsFactory)
    line_number = LineNumber(1)
    exists = None
def test_fragment_matcher_route(client, fragmentarium, user):
    fragment_id = "X.15"

    fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.15"))
    fragment_2 = TransliteratedFragmentFactory.build(
        number=MuseumNumber.of("X.326"),
        line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),),
    )
    expected_score = {
        "score": [{"museumNumber": "X.326", "script": fragment_2.script, "score": 3}],
        "scoreWeighted": [
            {"museumNumber": "X.326", "script": fragment_2.script, "score": 5}
        ],
    }
    fragmentarium.create(fragment_1)
    fragmentarium.create(fragment_2)
    get_result = client.simulate_get(f"/fragments/{fragment_id}/match")
    assert get_result.status == falcon.HTTP_OK
    assert get_result.json == expected_score
def test_interesting(client, fragmentarium):
    interesting_fragment = InterestingFragmentFactory.build(
        number=MuseumNumber("K", "1"))
    fragmentarium.create(interesting_fragment)

    result = client.simulate_get("/fragments", params={"interesting": True})

    assert result.status == falcon.HTTP_OK
    assert result.json == [expected_fragment_info_dto(interesting_fragment)]
    assert "Cache-Control" not in result.headers
Пример #28
0
def test_empty_line_to_vec(fragment_matcher, when):
    parameters = "BM.11"
    fragment_2_line_to_vec = (LineToVecEncoding.from_list([2, 1, 1]), )
    fragment_1 = FragmentFactory.build(number=MuseumNumber.of("BM.11"))
    fragment_2 = FragmentFactory.build(number=MuseumNumber.of("X.1"),
                                       line_to_vec=fragment_2_line_to_vec)
    fragment_3 = FragmentFactory.build(number=MuseumNumber.of("X.2"),
                                       line_to_vec=fragment_2_line_to_vec)

    (when(fragment_matcher._fragment_repository).query_by_museum_number(
        MuseumNumber.of(parameters)).thenReturn(fragment_1))
    (when(fragment_matcher._fragment_repository).
     query_transliterated_line_to_vec().thenReturn([
         LineToVecEntry(fragment_1.number, "N/A", tuple()),
         LineToVecEntry(fragment_2.number, "N/A", fragment_2_line_to_vec),
         LineToVecEntry(fragment_3.number, "N/A", fragment_2_line_to_vec),
     ]))
    assert fragment_matcher.rank_line_to_vec(parameters) == LineToVecRanking(
        [], [])
def test_update_transliteration_not_lowest_join(client,
                                                fragment_repository) -> None:
    number = MuseumNumber("X", "2")
    fragment = FragmentFactory.build(number=number)
    fragment_repository.create_join([[Join(number)],
                                     [Join(MuseumNumber("X", "1"))]])
    updates = {"transliteration": "1. kururu", "notes": ""}
    body = json.dumps(updates)
    url = f"/fragments/{fragment.number}/transliteration"
    post_result = client.simulate_post(url, body=body)

    assert post_result.status == falcon.HTTP_UNPROCESSABLE_ENTITY
    assert post_result.json == {
        "title": "422 Unprocessable Entity",
        "description": "Invalid transliteration",
        "errors": [{
            "description": "Invalid value",
            "lineNumber": 1
        }],
    }
def test_line_to_vec_ranking_schema():
    line_to_vec_ranking = LineToVecRanking(
        score=[
            LineToVecScore(MuseumNumber.of("X.0"), "N/A", 10),
            LineToVecScore(MuseumNumber.of("X.1"), "N/A", 4),
        ],
        score_weighted=[
            LineToVecScore(MuseumNumber.of("X.0"), "N/A", 15),
            LineToVecScore(MuseumNumber.of("X.1"), "N/A", 7),
        ],
    )
    assert LineToVecRankingSchema().dump(line_to_vec_ranking) == {
        "score": [
            {"museumNumber": "X.0", "score": 10, "script": "N/A"},
            {"museumNumber": "X.1", "score": 4, "script": "N/A"},
        ],
        "scoreWeighted": [
            {"museumNumber": "X.0", "score": 15, "script": "N/A"},
            {"museumNumber": "X.1", "score": 7, "script": "N/A"},
        ],
    }