def test_fossey_schema(): data = { "page": 405, "number": 25728, "suffix": "B", "reference": "Mai: MDP, VI, 11.I, 11", "newEdition": "Paulus AOAT 50, 981", "secondaryLiterature": "NABU 1997/1", "cdliNumber": "P123456", "museumNumber": MuseumNumberSchema().dump(MuseumNumber.of("K.4562")), "externalProject": "dcclt", "notes": "Das Zeichen ist eigentlich ZA₇", "date": "Marduk-apla-iddina I, 1171-1159 BC", "transliteration": "me-luḫ-ḫa", "sign": "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2", } fossey = Fossey( 405, 25728, "B", "Mai: MDP, VI, 11.I, 11", "Paulus AOAT 50, 981", "NABU 1997/1", "P123456", MuseumNumber.of("K.4562"), "dcclt", "Das Zeichen ist eigentlich ZA₇", "Marduk-apla-iddina I, 1171-1159 BC", "me-luḫ-ḫa", "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2", ) assert FosseySchema().load(data) == fossey assert FosseySchema().dump(fossey) == data
def test_fossey(): fossey = Fossey( 405, 25728, "B", "Mai: MDP, VI, 11.I, 11", "Paulus AOAT 50, 981", "NABU 1997/1", "P123456", MuseumNumber("K", "4562"), "dcclt", "Das Zeichen ist eigentlich ZA₇", "Marduk-apla-iddina I, 1171-1159 BC", "me-luḫ-ḫa", "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2", ) assert fossey.page == 405 assert fossey.number == 25728 assert fossey.suffix == "B" assert fossey.reference == "Mai: MDP, VI, 11.I, 11" assert fossey.new_edition == "Paulus AOAT 50, 981" assert fossey.secondary_literature == "NABU 1997/1" assert fossey.cdli_number == "P123456" assert fossey.museum_number == MuseumNumber("K", "4562") assert fossey.external_project == "dcclt" assert fossey.notes == "Das Zeichen ist eigentlich ZA₇" assert fossey.date == "Marduk-apla-iddina I, 1171-1159 BC" assert fossey.transliteration == "me-luḫ-ḫa" assert ( fossey.sign == "M15,21.7c-0.1-0.1-0.2-0.4-0.2-0.8c-0.1-1-0.1-1.2-0.5-1.3c-0.2" )
def test_find_transliterated(database, fragment_repository): transliterated_fragment_1 = TransliteratedFragmentFactory.build( number=MuseumNumber.of("X.1") ) transliterated_fragment_2 = TransliteratedFragmentFactory.build( number=MuseumNumber.of("X.2") ) database[COLLECTION].insert_many( [ { **SCHEMA.dump(transliterated_fragment_2), "_id": str(transliterated_fragment_2.number), }, SCHEMA.dump(FragmentFactory.build()), { **SCHEMA.dump(transliterated_fragment_1), "_id": str(transliterated_fragment_1.number), }, ] ) assert fragment_repository.query_transliterated_numbers() == [ transliterated_fragment_1.number, transliterated_fragment_2.number, ]
def test_schema(): previous = MuseumNumber.of("X.1") next = MuseumNumber.of("X.1") fragment_info_pager = FragmentPagerInfo(previous, next) assert FragmentPagerInfoSchema().dump(fragment_info_pager) == { "next": "X.1", "previous": "X.1", }
def test_order_equal() -> None: prefix = "X" number = "B" suffix = "C" assert_that( MuseumNumber(prefix, number, suffix), equal_to(MuseumNumber(prefix, number, suffix)), )
def test_get_fragment_pager(client, fragmentarium): fragment_0 = FragmentFactory.build(number=MuseumNumber("X", "0")) fragment_1 = FragmentFactory.build(number=MuseumNumber("X", "1")) fragment_2 = FragmentFactory.build(number=MuseumNumber("X", "2")) for fragment in [fragment_0, fragment_1, fragment_2]: fragmentarium.create(fragment) result = client.simulate_get(f"/fragments/{fragment_1.number}/pager") assert result.json == {"next": "X.2", "previous": "X.0"} assert result.status == falcon.HTTP_OK assert result.headers["Cache-Control"] == "private, max-age=600"
def test_fragment_matcher_route_error(client, fragmentarium, user): faulty_fragment_id = "X.-1" fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.0")) fragment_2 = TransliteratedFragmentFactory.build( number=MuseumNumber.of("X.1"), line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),), ) fragmentarium.create(fragment_1) fragmentarium.create(fragment_2) get_result = client.simulate_get(f"/fragments/{faulty_fragment_id}/match") assert get_result.status == falcon.HTTP_UNPROCESSABLE_ENTITY
def test_joins_lowest(): joins = Joins( ( (Join(MuseumNumber("B", "2"), is_in_fragmentarium=True),), ( Join(MuseumNumber("B", "1"), is_in_fragmentarium=True), Join(MuseumNumber("A", "1"), is_in_fragmentarium=False), ), ) ) assert_that(joins.lowest, equal_to(MuseumNumber("B", "1")))
def test_add_lowest_join_transliteration(user): fragment = FragmentFactory.build( number=MuseumNumber.of("X.2"), joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]), ) atf = Atf("1. x x") text = parse_atf_lark(atf) transliteration = TransliterationUpdate(text, fragment.notes) with pytest.raises(NotLowestJoinError): fragment.update_lowest_join_transliteration(transliteration, user)
def test_query_next_and_previous_fragment(museum_numbers, fragment_repository): for fragmentNumber in museum_numbers: fragment_repository.create( FragmentFactory.build(number=MuseumNumber.of(fragmentNumber)) ) for museum_number in museum_numbers: results = fragment_repository.query_next_and_previous_fragment( MuseumNumber.of(museum_number) ) previous_index = (museum_numbers.index(museum_number) - 1) % len(museum_numbers) next_index = (museum_numbers.index(museum_number) + 1) % len(museum_numbers) assert results.previous == MuseumNumber.of(museum_numbers[previous_index]) assert results.next == MuseumNumber.of(museum_numbers[next_index])
def test_get_fragment_pager_cache(cached_client, fragmentarium): fragment_0 = FragmentFactory.build(number=MuseumNumber("X", "0")) fragment_1 = FragmentFactory.build(number=MuseumNumber("X", "3")) for fragment in [fragment_0, fragment_1]: fragmentarium.create(fragment) first_result = cached_client.simulate_get( f"/fragments/{fragment_1.number}/pager") fragmentarium.create(FragmentFactory.build(number=MuseumNumber("X", "2"))) second_result = cached_client.simulate_get( f"/fragments/{fragment_1.number}/pager") assert first_result.json == second_result.json assert first_result.status == second_result.status
def test_find(fragment_repository, when, fragment_matcher): line_to_vec = (LineToVecEncoding.from_list((1, 2, 1, 1)), ) number = MuseumNumber.of("BM.11") fragment = FragmentFactory.build(number=number, line_to_vec=line_to_vec) (when(fragment_repository).query_by_museum_number(number).thenReturn( fragment)) assert fragment_matcher._parse_candidate("BM.11") == line_to_vec
def test_signs_get( client, annotations_repository, photo_repository, when, fragment_repository, text_with_labels, ): fragment = TransliteratedFragmentFactory.build( number=MuseumNumber.of("K.2"), text=text_with_labels) fragment_repository.create(fragment) annotation_data = AnnotationDataFactory.build(sign_name="signName", path=[2, 0, 0]) annotation = AnnotationFactory.build(data=annotation_data) annotations_repository.create_or_update( AnnotationsFactory.build(fragment_number="K.2", annotations=[annotation])) result = client.simulate_get("/signs/signName/images") assert len(result.json) > 0 result_json = result.json[0] assert result_json["fragmentNumber"] == str(fragment.number) assert isinstance(result_json["image"], str) assert result_json["script"] == fragment.script assert result_json["label"] == "i Stone wig Stone wig 2" assert result.status == falcon.HTTP_OK
def test_museum_number() -> None: museum_number = MuseumNumber(PREFIX, NUMBER, SUFFIX) assert museum_number.prefix == PREFIX assert museum_number.number == NUMBER assert museum_number.suffix == SUFFIX assert str(museum_number) == f"{PREFIX}.{NUMBER}.{SUFFIX}"
def test_update_references( client, fragmentarium, bibliography, parallel_line_injector, user ): fragment = FragmentFactory.build() fragmentarium.create(fragment) reference = ReferenceFactory.build(with_document=True) bibliography.create(reference.document, ANY_USER) references = [ReferenceSchema().dump(reference)] body = json.dumps({"references": references}) url = f"/fragments/{fragment.number}/references" post_result = client.simulate_post(url, body=body) expected_json = create_response_dto( fragment.set_references((reference,)).set_text( parallel_line_injector.inject_transliteration(fragment.text) ), user, fragment.number == MuseumNumber("K", "1"), ) assert post_result.status == falcon.HTTP_OK assert post_result.json == expected_json get_result = client.simulate_get(f"/fragments/{fragment.number}") assert get_result.json == expected_json
def test_generate_annotations(annotations_repository, photo_repository, changelog, when): fragment_number = MuseumNumber.of("X.0") boundary_results = { "boundaryResults": [{ "top_left_x": 0.0, "top_left_y": 0.0, "width": 10.0, "height": 10.0, "probability": 0.99, }] } httpretty.register_uri( httpretty.POST, "http://mock-localhost:8001/generate", body=json.dumps(boundary_results), content_type="image/jpeg", ) image_file = create_test_photo(fragment_number) ebl_ai_client = EblAiClient("http://mock-localhost:8001") annotations = ebl_ai_client.generate_annotations(fragment_number, image_file) assert annotations.fragment_number == fragment_number assert len(annotations.annotations) == 1 assert annotations.annotations[0].geometry.x == 0.0 assert annotations.annotations[0].geometry.y == 0.0
def rank_line_to_vec(self, candidate: str) -> LineToVecRanking: candidate_line_to_vecs = self._parse_candidate(candidate) line_to_vec_entries = ( self._fragment_repository.query_transliterated_line_to_vec()) ranker = LineToVecRanker() if candidate_line_to_vecs: for entry in filter( lambda line_to_vec_entry: line_to_vec_entry.museum_number != MuseumNumber.of(candidate), line_to_vec_entries, ): line_to_vec_score = LineToVecScore( entry.museum_number, entry.script, score(candidate_line_to_vecs, entry.line_to_vec), ) line_to_vec_weighted_score = LineToVecScore( entry.museum_number, entry.script, score_weighted(candidate_line_to_vecs, entry.line_to_vec), ) ranker.insert_score(line_to_vec_score, line_to_vec_weighted_score) return ranker.ranking
def test_generate_annotations(client, photo_repository): fragment_number = MuseumNumber.of("K.2") boundary_results = { "boundaryResults": [{ "top_left_x": 0.0, "top_left_y": 0.0, "width": 10.0, "height": 10.0, "probability": 0.99, }] } httpretty.register_uri( httpretty.POST, "http://localhost:8001/generate", body=json.dumps(boundary_results), content_type="image/jpeg", ) result = client.simulate_get( f"/fragments/{fragment_number}/annotations", params={"generateAnnotations": True}, ) assert result.status == falcon.HTTP_OK assert result.json is not None assert result.json["fragmentNumber"] == "K.2" assert result.json["annotations"][0]["geometry"]["x"] == 0.0 assert result.json["annotations"][0]["geometry"]["y"] == 0.0
def test_update_lemmatization(client, fragmentarium, user, database): transliterated_fragment = TransliteratedFragmentFactory.build() fragmentarium.create(transliterated_fragment) tokens = [ list(line) for line in transliterated_fragment.text.lemmatization.tokens ] tokens[1][3] = LemmatizationToken(tokens[1][3].value, ("aklu I", )) lemmatization = Lemmatization(tokens) body = LemmatizationSchema().dumps(lemmatization) url = f"/fragments/{transliterated_fragment.number}/lemmatization" post_result = client.simulate_post(url, body=body) expected_json = create_response_dto( transliterated_fragment.update_lemmatization(lemmatization), user, transliterated_fragment.number == MuseumNumber("K", "1"), ) assert post_result.status == falcon.HTTP_OK assert post_result.json == expected_json get_result = client.simulate_get( f"/fragments/{transliterated_fragment.number}") assert get_result.json == expected_json assert database["changelog"].find_one({ "resource_id": str(transliterated_fragment.number), "resource_type": "fragments", "user_profile.name": user.profile["name"], })
def _find_adjacent_museum_number_from_sequence( museum_number: MuseumNumber, cursor: Sequence[dict], is_endpoint=False ) -> Tuple[Optional[MuseumNumber], Optional[MuseumNumber]]: first = None last = None current_prev = None current_next = None for current_cursor in cursor: museum_number_dict = current_cursor["museumNumber"] # Not use MuseumNumber().load(current_current["museumNumber"]) because of # performance reasons current_museum_number = MuseumNumber( prefix=museum_number_dict["prefix"], number=museum_number_dict["number"], suffix=museum_number_dict["suffix"], ) current_prev = _select_museum_between_two_values( museum_number, current_museum_number, current_prev, operator.lt ) current_next = _select_museum_between_two_values( museum_number, current_museum_number, current_next, operator.gt ) if is_endpoint: first, last = _min_max_museum_numbers([first, last, current_museum_number]) if is_endpoint: current_prev = current_prev or last current_next = current_next or first return current_prev, current_next
def test_find_not_found(fragment_finder, fragment_repository, when): number = MuseumNumber("unknown", "id") (when(fragment_repository).query_by_museum_number(number).thenRaise( NotFoundError)) with pytest.raises(NotFoundError): fragment_finder.find(number)
def test_number_deserialization(): number = MuseumNumber.of("Z.1.b") fragment = FragmentSchema().load({ **FragmentSchema().dump(LemmatizedFragmentFactory.build()), "museumNumber": MuseumNumberSchema().dump(number), }) assert fragment.number == number
class AnnotationsFactory(factory.Factory): class Meta: model = Annotations fragment_number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) annotations = factory.List([ factory.SubFactory(AnnotationFactory), factory.SubFactory(AnnotationFactory) ])
class JoinFactory(factory.Factory): class Meta: model = Join museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) is_checked = factory.Faker("boolean") joined_by = factory.Faker("last_name") date = factory.Faker("sentence") note = factory.Faker("sentence") legacy_data = factory.Faker("sentence")
class ParallelFragmentFactory(factory.Factory): class Meta: model = ParallelFragment has_cf = factory.Faker("boolean") museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) has_duplicates = factory.Faker("boolean") labels = factory.SubFactory(LabelsFactory) line_number = LineNumber(1) exists = None
def test_fragment_matcher_route(client, fragmentarium, user): fragment_id = "X.15" fragment_1 = TransliteratedFragmentFactory.build(number=MuseumNumber.of("X.15")) fragment_2 = TransliteratedFragmentFactory.build( number=MuseumNumber.of("X.326"), line_to_vec=(LineToVecEncoding.from_list([1, 1, 2]),), ) expected_score = { "score": [{"museumNumber": "X.326", "script": fragment_2.script, "score": 3}], "scoreWeighted": [ {"museumNumber": "X.326", "script": fragment_2.script, "score": 5} ], } fragmentarium.create(fragment_1) fragmentarium.create(fragment_2) get_result = client.simulate_get(f"/fragments/{fragment_id}/match") assert get_result.status == falcon.HTTP_OK assert get_result.json == expected_score
def test_interesting(client, fragmentarium): interesting_fragment = InterestingFragmentFactory.build( number=MuseumNumber("K", "1")) fragmentarium.create(interesting_fragment) result = client.simulate_get("/fragments", params={"interesting": True}) assert result.status == falcon.HTTP_OK assert result.json == [expected_fragment_info_dto(interesting_fragment)] assert "Cache-Control" not in result.headers
def test_empty_line_to_vec(fragment_matcher, when): parameters = "BM.11" fragment_2_line_to_vec = (LineToVecEncoding.from_list([2, 1, 1]), ) fragment_1 = FragmentFactory.build(number=MuseumNumber.of("BM.11")) fragment_2 = FragmentFactory.build(number=MuseumNumber.of("X.1"), line_to_vec=fragment_2_line_to_vec) fragment_3 = FragmentFactory.build(number=MuseumNumber.of("X.2"), line_to_vec=fragment_2_line_to_vec) (when(fragment_matcher._fragment_repository).query_by_museum_number( MuseumNumber.of(parameters)).thenReturn(fragment_1)) (when(fragment_matcher._fragment_repository). query_transliterated_line_to_vec().thenReturn([ LineToVecEntry(fragment_1.number, "N/A", tuple()), LineToVecEntry(fragment_2.number, "N/A", fragment_2_line_to_vec), LineToVecEntry(fragment_3.number, "N/A", fragment_2_line_to_vec), ])) assert fragment_matcher.rank_line_to_vec(parameters) == LineToVecRanking( [], [])
def test_update_transliteration_not_lowest_join(client, fragment_repository) -> None: number = MuseumNumber("X", "2") fragment = FragmentFactory.build(number=number) fragment_repository.create_join([[Join(number)], [Join(MuseumNumber("X", "1"))]]) updates = {"transliteration": "1. kururu", "notes": ""} body = json.dumps(updates) url = f"/fragments/{fragment.number}/transliteration" post_result = client.simulate_post(url, body=body) assert post_result.status == falcon.HTTP_UNPROCESSABLE_ENTITY assert post_result.json == { "title": "422 Unprocessable Entity", "description": "Invalid transliteration", "errors": [{ "description": "Invalid value", "lineNumber": 1 }], }
def test_line_to_vec_ranking_schema(): line_to_vec_ranking = LineToVecRanking( score=[ LineToVecScore(MuseumNumber.of("X.0"), "N/A", 10), LineToVecScore(MuseumNumber.of("X.1"), "N/A", 4), ], score_weighted=[ LineToVecScore(MuseumNumber.of("X.0"), "N/A", 15), LineToVecScore(MuseumNumber.of("X.1"), "N/A", 7), ], ) assert LineToVecRankingSchema().dump(line_to_vec_ranking) == { "score": [ {"museumNumber": "X.0", "score": 10, "script": "N/A"}, {"museumNumber": "X.1", "score": 4, "script": "N/A"}, ], "scoreWeighted": [ {"museumNumber": "X.0", "score": 15, "script": "N/A"}, {"museumNumber": "X.1", "score": 7, "script": "N/A"}, ], }