def test_add_lowest_join_transliteration(user): fragment = FragmentFactory.build( number=MuseumNumber.of("X.2"), joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]), ) atf = Atf("1. x x") text = parse_atf_lark(atf) transliteration = TransliterationUpdate(text, fragment.notes) with pytest.raises(NotLowestJoinError): fragment.update_lowest_join_transliteration(transliteration, user)
def test_joins_lowest(): joins = Joins( ( (Join(MuseumNumber("B", "2"), is_in_fragmentarium=True),), ( Join(MuseumNumber("B", "1"), is_in_fragmentarium=True), Join(MuseumNumber("A", "1"), is_in_fragmentarium=False), ), ) ) assert_that(joins.lowest, equal_to(MuseumNumber("B", "1")))
def test_query_manuscripts_with_joins_by_chapter(database, text_repository) -> None: when_chapter_in_collection(database) join = Join(MUSEUM_NUMBER) database[JOINS_COLLECTION].insert_one({ "fragments": [{ **JoinSchema(exclude=["is_in_fragmentarium"]).dump(join), "group": 0 }] }) assert text_repository.query_manuscripts_with_joins_by_chapter( CHAPTER.id_) == [ attr.evolve(CHAPTER.manuscripts[0], joins=Joins(((join, ), ))) ]
def test_update_transliteration( number, ignore_lowest_join, fragment_updater, user, fragment_repository, changelog, parallel_line_injector, when, ): transliterated_fragment = TransliteratedFragmentFactory.build( number=number, joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]), line_to_vec=None, ) number = transliterated_fragment.number atf = Atf("1. x x\n2. x") transliteration = TransliterationUpdate( parse_atf_lark(atf), "updated notes", "X X\nX" ) transliterated_fragment = transliterated_fragment.update_transliteration( transliteration, user ) injected_fragment = transliterated_fragment.set_text( parallel_line_injector.inject_transliteration(transliterated_fragment.text) ) ( when(fragment_repository) .query_by_museum_number(number) .thenReturn(transliterated_fragment) ) when(changelog).create( "fragments", user.profile, {"_id": str(number), **SCHEMA.dump(transliterated_fragment)}, {"_id": str(number), **SCHEMA.dump(transliterated_fragment)}, ).thenReturn() ( when(fragment_repository) .update_transliteration(transliterated_fragment) .thenReturn() ) result = fragment_updater.update_transliteration( number, transliteration, user, ignore_lowest_join ) assert result == (injected_fragment, False)
class FragmentSchema(Schema): number = fields.Nested(MuseumNumberSchema, required=True, data_key="museumNumber") accession = fields.String(required=True) cdli_number = fields.String(required=True, data_key="cdliNumber") bm_id_number = fields.String(required=True, data_key="bmIdNumber") publication = fields.String(required=True) description = fields.String(required=True) collection = fields.String(required=True) script = fields.String(required=True) museum = fields.String(required=True) width = fields.Nested(MeasureSchema, required=True) length = fields.Nested(MeasureSchema, required=True) thickness = fields.Nested(MeasureSchema, required=True) joins = fields.Pluck(JoinsSchema, "fragments", load_default=Joins()) record = fields.Pluck(RecordSchema, "entries") folios = fields.Pluck(FoliosSchema, "entries") text = fields.Nested(TextSchema) signs = fields.String(load_default="") notes = fields.String(required=True) references = fields.Nested(ReferenceSchema, many=True, required=True) uncurated_references = fields.Nested( UncuratedReferenceSchema, many=True, data_key="uncuratedReferences", load_default=None, ) genres = fields.Nested(GenreSchema, many=True, load_default=tuple()) line_to_vec = fields.List( fields.List(ValueEnum(LineToVecEncoding)), load_default=tuple(), data_key="lineToVec", ) @post_load def make_fragment(self, data, **kwargs): data["references"] = tuple(data["references"]) data["genres"] = tuple(data["genres"]) data["line_to_vec"] = tuple(map(tuple, data["line_to_vec"])) if data["uncurated_references"] is not None: data["uncurated_references"] = tuple(data["uncurated_references"]) return Fragment(**data) @post_dump def filter_none(self, data, **kwargs): return pydash.omit_by(data, pydash.is_none)
class ApiManuscriptSchema(ManuscriptSchema): museum_number = MuseumNumberString(required=True, data_key="museumNumber") colophon = fields.Function( lambda manuscript: manuscript.colophon.atf, _deserialize_transliteration, required=True, ) unplaced_lines = fields.Function( lambda manuscript: manuscript.unplaced_lines.atf, _deserialize_transliteration, required=True, data_key="unplacedLines", ) references = fields.Nested(ApiReferenceSchema, many=True, required=True) joins = fields.Pluck(JoinsSchema, "fragments", load_default=Joins()) is_in_fragmentarium = fields.Boolean(load_default=False, data_key="isInFragmentarium")
def test_joins_fragments_sorting(): joins = Joins( ( (Join(MuseumNumber("B", "2")), Join(MuseumNumber("B", "1"))), (Join(MuseumNumber("Z", "0")), Join(MuseumNumber("A", "3"))), ) ) assert_that( joins.fragments, contains_exactly( contains_exactly( Join(MuseumNumber("A", "3")), Join(MuseumNumber("Z", "0")) ), contains_exactly( Join(MuseumNumber("B", "1")), Join(MuseumNumber("B", "2")) ), ), )
class Manuscript: id: int siglum_disambiguator: str = "" museum_number: Optional[MuseumNumber] = None accession: str = attr.ib(default="") period_modifier: PeriodModifier = PeriodModifier.NONE period: Period = Period.NEO_ASSYRIAN provenance: Provenance = attr.ib(default=Provenance.NINEVEH) type: ManuscriptType = ManuscriptType.LIBRARY notes: str = "" colophon: Text = Text() unplaced_lines: Text = Text() references: Sequence[Reference] = tuple() joins: Joins = Joins() is_in_fragmentarium: bool = False @accession.validator def validate_accession(self, _, value) -> None: if self.museum_number and value: raise ValueError("Accession given when museum number present.") @provenance.validator def validate_provenance(self, _, value) -> None: if is_invalid_standard_text(value, self.period, self.type): raise ValueError( "Manuscript must not have period and type when provenance is Standard Text." ) elif is_invalid_non_standard_text(value, self.period, self.type): raise ValueError( "Manuscript must have period and type unless provenance is Standard Text." ) @property def text_lines(self) -> Sequence[TextLine]: return [*self.colophon.text_lines, *self.unplaced_lines.text_lines] @property def siglum(self) -> Siglum: return Siglum(self.provenance, self.period, self.type, self.siglum_disambiguator)
def test_update_update_transliteration_not_lowest_join( fragment_updater, user, fragment_repository, when ): number = MuseumNumber.of("X.2") transliterated_fragment = TransliteratedFragmentFactory.build( number=number, joins=Joins([[Join(MuseumNumber.of("X.1"), is_in_fragmentarium=True)]]), ) ( when(fragment_repository) .query_by_museum_number(number) .thenReturn(transliterated_fragment) ) with pytest.raises(NotLowestJoinError): fragment_updater.update_transliteration( number, TransliterationUpdate(parse_atf_lark("1. x"), "updated notes", "X"), user, False, )
def test_query_by_museum_number_joins(database, fragment_repository): museum_number = MuseumNumber("X", "1") first_join = Join(museum_number, is_in_fragmentarium=True) second_join = Join(MuseumNumber("X", "2"), is_in_fragmentarium=False) fragment = LemmatizedFragmentFactory.build( number=museum_number, joins=Joins(((first_join,), (second_join,))) ) database[COLLECTION].insert_one(FragmentSchema(exclude=["joins"]).dump(fragment)) database[JOINS_COLLECTION].insert_one( { "fragments": [ { **JoinSchema(exclude=["is_in_fragmentarium"]).dump(first_join), "group": 0, }, { **JoinSchema(exclude=["is_in_fragmentarium"]).dump(second_join), "group": 1, }, ] } ) assert fragment_repository.query_by_museum_number(fragment.number) == fragment
def test_create_response_dto(user): lemmatized_fragment = LemmatizedFragmentFactory.build( joins=Joins(((JoinFactory.build(), ), ))) has_photo = True assert create_response_dto( lemmatized_fragment, user, has_photo) == pydash.omit_by( { "museumNumber": attr.asdict(lemmatized_fragment.number), "accession": lemmatized_fragment.accession, "cdliNumber": lemmatized_fragment.cdli_number, "bmIdNumber": lemmatized_fragment.bm_id_number, "publication": lemmatized_fragment.publication, "description": lemmatized_fragment.description, "joins": JoinsSchema().dump(lemmatized_fragment.joins)["fragments"], "length": attr.asdict(lemmatized_fragment.length, filter=lambda _, value: value is not None), "width": attr.asdict(lemmatized_fragment.width, filter=lambda _, value: value is not None), "thickness": attr.asdict(lemmatized_fragment.thickness, filter=lambda _, value: value is not None), "collection": lemmatized_fragment.collection, "script": lemmatized_fragment.script, "notes": lemmatized_fragment.notes, "museum": lemmatized_fragment.museum, "signs": lemmatized_fragment.signs, "record": [{ "user": entry.user, "type": entry.type.value, "date": entry.date } for entry in lemmatized_fragment.record.entries], "folios": [ attr.asdict(folio) for folio in lemmatized_fragment.folios.filter(user).entries ], "text": TextSchema().dump(lemmatized_fragment.text), "references": [{ "id": reference.id, "type": reference.type.name, "pages": reference.pages, "notes": reference.notes, "linesCited": list(reference.lines_cited), } for reference in lemmatized_fragment.references], "uncuratedReferences": ([ attr.asdict(reference) for reference in lemmatized_fragment.uncurated_references ] if lemmatized_fragment.uncurated_references is not None else None), "atf": lemmatized_fragment.text.atf, "hasPhoto": has_photo, "genres": [{ "category": genre.category, "uncertain": genre.uncertain } for genre in lemmatized_fragment.genres], "lineToVec": [[ line_to_vec_encoding.value for line_to_vec_encoding in line_to_vec_encodings ] for line_to_vec_encodings in lemmatized_fragment.line_to_vec ], }, pydash.is_none, )
def test_serialization_and_deserialization(): fragment = LemmatizedFragmentFactory.build( joins=Joins(((Join(MuseumNumber("X", "1")), ), ))) schema = FragmentSchema() data = schema.dump(fragment) assert schema.load(data) == fragment
class ManuscriptSchema(Schema): id = fields.Integer(required=True) siglum_disambiguator = fields.String(required=True, data_key="siglumDisambiguator") museum_number: fields.Field = fields.Nested(MuseumNumberSchema, required=True, allow_none=True, data_key="museumNumber") accession = fields.String(required=True) period_modifier = ValueEnum(PeriodModifier, required=True, data_key="periodModifier") period = fields.Function( lambda manuscript: manuscript.period.long_name, lambda value: Period.from_name(value), required=True, ) provenance = fields.Function( lambda manuscript: manuscript.provenance.long_name, lambda value: Provenance.from_name(value), required=True, ) type = fields.Function( lambda manuscript: manuscript.type.long_name, lambda value: ManuscriptType.from_name(value), required=True, ) notes = fields.String(required=True) colophon: fields.Field = fields.Nested(TransliterationSchema, load_default=Transliteration()) unplaced_lines: fields.Field = fields.Nested( TransliterationSchema, load_default=Transliteration(), data_key="unplacedLines") references = fields.Nested(ReferenceSchema, many=True, required=True) joins = fields.Pluck(JoinsSchema, "fragments", load_default=Joins(), load_only=True) is_in_fragmentarium = fields.Boolean(load_default=False, data_key="isInFragmentarium", load_only=True) @validates_schema def validate_provenance(self, data, **kwargs): provenace = data["provenance"] period = data["period"] type_ = data["type"] if is_invalid_standard_text(provenace, period, type_): raise ValidationError( "period and type must be None if provenance is Standard Text") elif is_invalid_non_standard_text(provenace, period, type_): raise ValidationError( "period and type must not be None if provenance is not Standard Text" ) @post_load def make_manuscript(self, data: dict, **kwargs) -> Manuscript: return Manuscript( data["id"], data["siglum_disambiguator"], data["museum_number"], data["accession"], data["period_modifier"], data["period"], data["provenance"], data["type"], data["notes"], data["colophon"], data["unplaced_lines"], tuple(data["references"]), data["joins"], data["is_in_fragmentarium"], )
def test_default_joins(): fragment = LemmatizedFragmentFactory.build(joins=Joins()) data = FragmentSchema(exclude=["joins"]).dump(fragment) assert FragmentSchema().load(data) == fragment
def test_joins(): fragment = FragmentFactory.build() assert fragment.joins == Joins()
def test_joins_lowest_when_no_fragments(): assert_that(Joins().lowest, equal_to(None))
def make_joins(self, data, **kwargs): return Joins(tuple(map(tuple, data["fragments"])))
class Fragment: number: MuseumNumber accession: str = "" cdli_number: str = "" bm_id_number: str = "" publication: str = "" description: str = "" collection: str = "" script: str = "" museum: str = "" width: Measure = Measure() length: Measure = Measure() thickness: Measure = Measure() joins: Joins = Joins() record: Record = Record() folios: Folios = Folios() text: Text = Text() signs: str = "" notes: str = "" references: Sequence[Reference] = tuple() uncurated_references: Optional[Sequence[UncuratedReference]] = None genres: Sequence[Genre] = tuple() line_to_vec: Tuple[LineToVecEncodings, ...] = tuple() @property def is_lowest_join(self) -> bool: return (self.joins.lowest or self.number) == self.number def set_references(self, references: Sequence[Reference]) -> "Fragment": return attr.evolve(self, references=references) def set_text(self, text: Text) -> "Fragment": return attr.evolve(self, text=text) def update_lowest_join_transliteration( self, transliteration: TransliterationUpdate, user: User) -> "Fragment": if transliteration.text.is_empty or self.is_lowest_join: return self.update_transliteration(transliteration, user) else: raise NotLowestJoinError( "Transliteration must be empty unless fragment is the lowest in join." ) def update_transliteration(self, transliteration: TransliterationUpdate, user: User) -> "Fragment": record = self.record.add_entry(self.text.atf, transliteration.text.atf, user) text = self.text.merge(transliteration.text) return attr.evolve( self, text=text, notes=transliteration.notes, signs=transliteration.signs, record=record, line_to_vec=create_line_to_vec(text.lines), ) def set_genres(self, genres_new: Sequence[Genre]) -> "Fragment": return attr.evolve(self, genres=tuple(genres_new)) def update_lemmatization(self, lemmatization: Lemmatization) -> "Fragment": text = self.text.update_lemmatization(lemmatization) return attr.evolve(self, text=text) def get_matching_lines(self, query: TransliterationQuery) -> Lines: line_numbers = query.match(self.signs) lines = [line.atf for line in self.text.text_lines] return tuple( tuple(lines[numbers[0]:numbers[1] + 1]) for numbers, _ in groupby(line_numbers))