def test_extent_overlapping_languages() -> None: Text.of_iterable( [ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), TranslationLine(tuple(), "en", Extent(LineNumber(2))), TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]), TranslationLine(tuple(), "de"), ] )
def test_extent_before_translation() -> None: with pytest.raises(ValueError): Text.of_iterable( [ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]), TranslationLine(tuple(), "en", Extent(LineNumber(1))), ] )
def text_with_labels(): return Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), ColumnAtLine(ColumnLabel.from_int(1)), SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")), ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")), TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]), ])
def test_exent_overlapping() -> None: with pytest.raises(ValueError): Text.of_iterable( [ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), TranslationLine(tuple(), extent=Extent(LineNumber(2))), TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]), TranslationLine(tuple()), ] )
def test_labels(text_with_labels) -> None: assert text_with_labels.labels == [ LineLabel(None, None, None, LineNumber(1)), LineLabel( ColumnLabel.from_int(1), SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"), ObjectLabel([], atf.Object.OBJECT, "Stone wig"), LineNumber(2), ), ]
def test_invalid_extent() -> None: with pytest.raises(ValueError): Chapter( TextId(GENRE, 0, 0), manuscripts=(Manuscript(MANUSCRIPT_ID), ), lines=(Line( LineNumber(1), (LINE_VARIANT_1, ), translation=(TranslationLine(tuple(), extent=Extent(LineNumber(2))), ), ), ), )
class ParallelCompositionFactory(factory.Factory): class Meta: model = ParallelComposition has_cf = factory.Faker("boolean") name = factory.Faker("sentence") line_number = LineNumber(1)
def make_line_number(self, data: dict, **kwargs) -> LineNumber: return LineNumber( data["number"], data["has_prime"], data["prefix_modifier"], data["suffix_modifier"], )
def test_dump_line(): text = Text( ( TextLine.of_iterable( LineNumber(1), [ Word.of( parts=[ Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am"), ] ) ], ), EmptyLine(), ControlLine("#", " comment"), ), "1.0.0", ) assert TextSchema().dump(text) == { "lines": OneOfLineSchema().dump(text.lines, many=True), "parser_version": text.parser_version, "numberOfLines": 1, }
class ManuscriptLineFactory(factory.Factory): class Meta: model = ManuscriptLine manuscript_id = factory.Sequence(lambda n: n) labels = ( SurfaceLabel.from_label(Surface.OBVERSE), ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]), ) line = factory.Sequence( lambda n: TextLine.of_iterable( LineNumber(n), ( Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ] ), ), ) ) paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE)) omitted_words = (1,)
def test_update_lemmatization() -> None: tokens = [list(line) for line in TEXT.lemmatization.tokens] tokens[0][0] = LemmatizationToken(tokens[0][0].value, (WordId("nu I"),)) lemmatization = Lemmatization(tokens) expected = Text( ( TextLine( LineNumber(1), ( Word.of( unique_lemma=(WordId("nu I"),), parts=[ Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am"), ], ), ), ), RulingDollarLine(atf.Ruling.SINGLE), ), TEXT.parser_version, ) assert TEXT.update_lemmatization(lemmatization) == expected
def test_updating(client, bibliography, sign_repository, signs, text_repository) -> None: allow_signs(signs, sign_repository) chapter = ChapterFactory.build() allow_references(chapter, bibliography) text_repository.create_chapter(chapter) updated_chapter = attr.evolve( chapter, lines=(attr.evolve(chapter.lines[0], number=LineNumber(1, True)), ), parser_version=ATF_PARSER_VERSION, ) body = { "new": [], "deleted": [], "edited": [{ "index": index, "line": line } for index, line in enumerate( create_chapter_dto(updated_chapter)["lines"])], } post_result = client.simulate_post(create_chapter_url(chapter, "/lines"), body=json.dumps(body)) assert post_result.status == falcon.HTTP_OK assert post_result.json == create_chapter_dto(updated_chapter) get_result = client.simulate_get(create_chapter_url(chapter)) assert get_result.status == falcon.HTTP_OK assert get_result.json == create_chapter_dto(updated_chapter)
def test_extant_lines() -> None: manuscript = Manuscript(MANUSCRIPT_ID) manuscript_line = LINE_VARIANT_1.manuscripts[0] chapter = Chapter( TEXT_ID, manuscripts=(manuscript, ), lines=( Line(LineNumber(1), (LINE_VARIANT_1, )), Line(LineNumber(2), (LINE_VARIANT_2, )), ), ) assert chapter.extant_lines == { manuscript.siglum: { manuscript_line.labels: [ExtantLine(manuscript_line.labels, LineNumber(1), True)] } }
def test_overlapping_languages() -> None: Chapter( TextId(GENRE, 0, 0), manuscripts=(Manuscript(MANUSCRIPT_ID), ), lines=( Line( LineNumber(1), (LINE_VARIANT_1, ), translation=(TranslationLine(tuple(), "en", Extent(LineNumber(2))), ), ), Line( LineNumber(2), (LINE_VARIANT_2, ), translation=(TranslationLine(tuple(), "de"), ), ), ), )
def test_statistics(database, fragment_repository): database[COLLECTION].insert_many( [ SCHEMA.dump( FragmentFactory.build( text=Text( ( TextLine( LineNumber(1), ( Word.of([Reading.of_name("first")]), Word.of([Reading.of_name("line")]), ), ), ControlLine("#", "ignore"), EmptyLine(), ) ) ) ), SCHEMA.dump( FragmentFactory.build( text=Text( ( ControlLine("#", "ignore"), TextLine( LineNumber(1), (Word.of([Reading.of_name("second")]),) ), TextLine( LineNumber(2), (Word.of([Reading.of_name("third")]),) ), ControlLine("#", "ignore"), TextLine( LineNumber(3), (Word.of([Reading.of_name("fourth")]),) ), ) ) ) ), SCHEMA.dump(FragmentFactory.build(text=Text())), ] ) assert fragment_repository.count_transliterated_fragments() == 2 assert fragment_repository.count_lines() == 4
def test_updating_lines_edit(corpus, text_repository, bibliography, changelog, signs, sign_repository, user, when) -> None: updated_chapter = attr.evolve( CHAPTER, lines=(attr.evolve( CHAPTER.lines[0], number=LineNumber(1, True), variants=(attr.evolve( CHAPTER.lines[0].variants[0], manuscripts=(attr.evolve( CHAPTER.lines[0].variants[0].manuscripts[0], line=TextLine.of_iterable( LineNumber(1, True), (Word.of([ Reading.of_name("nu"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ]), ), ), ), ), ), ), ), ), signs=("ABZ075 KU ABZ207a\\u002F207b\\u0020X\nKU\nABZ075", ), parser_version=ATF_PARSER_VERSION, ) expect_find_and_update_chapter( bibliography, changelog, CHAPTER_WITHOUT_DOCUMENTS, updated_chapter, signs, sign_repository, text_repository, user, when, ) assert (corpus.update_lines( CHAPTER.id_, LinesUpdate([], set(), {0: updated_chapter.lines[0]}), user) == updated_chapter)
class ParallelTextFactory(factory.Factory): class Meta: model = ParallelText has_cf = factory.Faker("boolean") text = factory.SubFactory(TextIdFactory) chapter = factory.SubFactory(ChapterNameFactory) line_number = LineNumber(1) exists = None implicit_chapter = None
class ParallelFragmentFactory(factory.Factory): class Meta: model = ParallelFragment has_cf = factory.Faker("boolean") museum_number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) has_duplicates = factory.Faker("boolean") labels = factory.SubFactory(LabelsFactory) line_number = LineNumber(1) exists = None
def test_parallel_composition(cf, display_value) -> None: name = "my name" line_number = LineNumber(1) line = ParallelComposition(cf, name, line_number) assert line.has_cf is cf assert line.name == name assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )
def test_query_lemmas_ignores_in_value(parts, expected, fragment_repository, lemma_repository): fragment = FragmentFactory.build( text=Text.of_iterable([ TextLine.of_iterable( LineNumber(1), [Word.of(parts, unique_lemma=(WordId("ana I"), ))]) ]), signs="DIŠ", ) fragment_repository.create(fragment) assert lemma_repository.query_lemmas("ana", False) == expected
def test_parallel_text(cf, chapter, display_value) -> None: text_id = TextId(Genre.LITERATURE, 1, 1) line_number = LineNumber(1) line = ParallelText(cf, text_id, chapter, line_number) assert line.has_cf is cf assert line.text == text_id assert line.chapter == chapter assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )
def test_parallel_fragment(cf, duplicates, labels, display_value) -> None: museum_number = MuseumNumber.of("K.1") line_number = LineNumber(1) line = ParallelFragment(cf, museum_number, duplicates, labels, line_number) assert line.has_cf is cf assert line.museum_number == museum_number assert line.has_duplicates is duplicates assert line.labels == labels assert line.line_number == line_number assert line.display_value == display_value assert line.atf == Atf(f"// {display_value}") assert line.lemmatization == (LemmatizationToken(display_value), )
class LineFactory(factory.Factory): class Meta: model = Line class Params: manuscript_id = factory.Sequence(lambda n: n) variant = factory.SubFactory( LineVariantFactory, manuscript_id=factory.SelfAttribute("..manuscript_id") ) number = factory.Sequence(lambda n: LineNumber(n)) variants = factory.List([factory.SelfAttribute("..variant")], TupleFactory) is_second_line_of_parallelism = factory.Faker("boolean") is_beginning_of_section = factory.Faker("boolean") translation = (TranslationLine((StringPart("foo"),), "en", None),)
class ManuscriptFactory(factory.Factory): class Meta: model = Manuscript id = factory.Sequence(lambda n: n + 1) siglum_disambiguator = factory.Faker("word") museum_number = factory.Sequence( lambda n: MuseumNumber("M", str(n)) if pydash.is_odd(n) else None ) accession = factory.Sequence(lambda n: f"A.{n}" if pydash.is_even(n) else "") period_modifier = factory.fuzzy.FuzzyChoice(PeriodModifier) period = factory.fuzzy.FuzzyChoice(set(Period) - {Period.NONE}) provenance = factory.fuzzy.FuzzyChoice(set(Provenance) - {Provenance.STANDARD_TEXT}) type = factory.fuzzy.FuzzyChoice(set(ManuscriptType) - {ManuscriptType.NONE}) notes = factory.Faker("sentence") colophon = Transliteration.of_iterable( [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ku")]),))] ) unplaced_lines = Transliteration.of_iterable( [TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("nu")]),))] ) references = factory.List( [factory.SubFactory(ReferenceFactory, with_document=True)], TupleFactory )
def test_updating_manuscripts(corpus, text_repository, bibliography, changelog, signs, sign_repository, user, when) -> None: uncertain_fragments = (MuseumNumber.of("K.1"), ) updated_chapter = attr.evolve( CHAPTER, manuscripts=(attr.evolve( CHAPTER.manuscripts[0], colophon=Transliteration.of_iterable([ TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ba")]), )) ]), unplaced_lines=Transliteration.of_iterable([ TextLine.of_iterable(LineNumber(1, True), (Word.of([Reading.of_name("ku")]), )) ]), notes="Updated manuscript.", ), ), uncertain_fragments=uncertain_fragments, signs=("KU ABZ075 ABZ207a\\u002F207b\\u0020X\nBA\nKU", ), ) expect_find_and_update_chapter( bibliography, changelog, CHAPTER_WITHOUT_DOCUMENTS, updated_chapter, signs, sign_repository, text_repository, user, when, ) manuscripts = (updated_chapter.manuscripts[0], ) assert (corpus.update_manuscripts(CHAPTER.id_, manuscripts, uncertain_fragments, user) == updated_chapter)
def test_parse_normalized_akkadain_shift() -> None: word = "ha" line = f"1. {word} %n {word} %sux {word}" expected = Text((TextLine.of_iterable( LineNumber(1), ( Word.of((Reading.of_name(word), ), DEFAULT_LANGUAGE), LanguageShift.normalized_akkadian(), AkkadianWord.of((ValueToken.of(word), )), LanguageShift.of("%sux"), Word.of((Reading.of_name(word), ), Language.SUMERIAN), ), ), )) assert parse_atf_lark(line).lines == expected.lines
def test_parse_atf_language_shifts(code: str, expected_language: Language) -> None: word = "ha-am" parts = [Reading.of_name("ha"), Joiner.hyphen(), Reading.of_name("am")] line = f"1. {word} {code} {word} %sb {word}" expected = Text((TextLine.of_iterable( LineNumber(1), ( Word.of(parts, DEFAULT_LANGUAGE), LanguageShift.of(code), Word.of(parts, expected_language), LanguageShift.of("%sb"), Word.of(parts, Language.AKKADIAN), ), ), )) assert parse_atf_lark(line).lines == expected.lines
def test_extant_lines_schema() -> None: manuscript = Manuscript(1) manuscript_line = ManuscriptLine(1, LABELS, MANUSCRIPT_TEXT_1) variant = LineVariant(tuple(), manuscripts=(manuscript_line, )) text_line = Line(LineNumber(1), (variant, )) chapter = Chapter(TextId(Genre.LITERATURE, 0, 0), manuscripts=(manuscript, ), lines=(text_line, )) assert ExtantLinesSchema().dump(chapter) == { "extantLines": { str(manuscript.siglum): { " ".join(label.to_value() for label in manuscript_line.labels): [{ "isSideBoundary": False, "lineNumber": OneOfLineNumberSchema().dump(text_line.number), }] } } }
def test_query_by_parallel_line_exists(database, fragment_repository): parallel_number = MuseumNumber.of("K.1") fragment = FragmentFactory.build( text=Text( ( ParallelFragment( False, parallel_number, True, Labels(), LineNumber(1), True ), ) ) ) parallel_fragment = FragmentFactory.build(number=parallel_number) database[COLLECTION].insert_many( [ FragmentSchema(exclude=["joins"]).dump(fragment), FragmentSchema(exclude=["joins"]).dump(parallel_fragment), ] ) assert fragment_repository.query_by_museum_number(fragment.number) == fragment
def test_parse_dividers() -> None: line, expected_tokens = ( r'1. :? :#! :# ::? :.@v /@19* :"@20@c ;@v@19!', [ TextLine.of_iterable( LineNumber(1), ( Divider.of(":", tuple(), (atf.Flag.UNCERTAIN, )), Divider.of(":", tuple(), (atf.Flag.DAMAGE, atf.Flag.CORRECTION)), Divider.of(":", tuple(), (atf.Flag.DAMAGE, )), Divider.of("::", tuple(), (atf.Flag.UNCERTAIN, )), Divider.of(":.", ("@v", ), tuple()), Divider.of("/", ("@19", ), (atf.Flag.COLLATION, )), Divider.of(':"', ("@20", "@c"), tuple()), Divider.of(";", ("@v", "@19"), (atf.Flag.CORRECTION, )), ), ) ], ) assert parse_atf_lark(line).lines == Text.of_iterable( expected_tokens).lines