def test_note_line(): parts = ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart.of_transliteration(Language.AKKADIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.SUMERIAN, TRANSLITERATION), LanguagePart.of_transliteration(Language.EMESAL, TRANSLITERATION), ) line = NoteLine(parts) assert line.parts == ( StringPart("this is a note "), EmphasisPart("italic text"), LanguagePart(Language.AKKADIAN, expected_transliteration(Language.AKKADIAN)), LanguagePart(Language.SUMERIAN, expected_transliteration(Language.SUMERIAN)), LanguagePart(Language.EMESAL, expected_transliteration(Language.EMESAL)), ) assert line.atf == ( "#note: this is a note " "@i{italic text}" f"@akk{{{EXPECTED_ATF}}}@sux{{{EXPECTED_ATF}}}@es{{{EXPECTED_ATF}}}") assert line.lemmatization == ( LemmatizationToken("this is a note "), LemmatizationToken("@i{italic text}"), LemmatizationToken(f"@akk{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@sux{{{EXPECTED_ATF}}}"), LemmatizationToken(f"@es{{{EXPECTED_ATF}}}"), )
class ManuscriptLineFactory(factory.Factory): class Meta: model = ManuscriptLine manuscript_id = factory.Sequence(lambda n: n) labels = ( SurfaceLabel.from_label(Surface.OBVERSE), ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]), ) line = factory.Sequence( lambda n: TextLine.of_iterable( LineNumber(n), ( Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ] ), ), ) ) paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE)) omitted_words = (1,)
class ChapterListingFactory(factory.Factory): class Meta: model = ChapterListing stage = factory.fuzzy.FuzzyChoice(Stage) name = factory.Faker("sentence") translation = (TranslationLine((StringPart("foo"),), "en", None),) uncertain_fragments = tuple()
def test_line_display_of_line() -> None: translation_lines = ( TranslationLine((StringPart("bar"),), "de", None), TranslationLine((StringPart("foo"),), DEFAULT_LANGUAGE, None), ) line = LineFactory.build(translation=translation_lines) line_display = LineDisplay.of_line(line) assert line_display == LineDisplay( line.number, line.is_second_line_of_parallelism, line.is_beginning_of_section, line.variants[0].intertext, line.variants[0].reconstruction, translation_lines, line.variants[0].note, line.variants[0].parallel_lines, ) assert line_display.title == make_title(translation_lines)
class LineVariantFactory(factory.Factory): class Meta: model = LineVariant class Params: manuscript_id = factory.Sequence(lambda n: n) manuscript = factory.SubFactory( ManuscriptLineFactory, manuscript_id=factory.SelfAttribute("..manuscript_id"), ) reconstruction = ( LanguageShift.normalized_akkadian(), AkkadianWord.of((ValueToken.of("buāru"),)), MetricalFootSeparator.uncertain(), BrokenAway.open(), UnknownNumberOfSigns.of(), Caesura.certain(), AkkadianWord.of( ( UnknownNumberOfSigns.of(), BrokenAway.close(), Joiner.hyphen(), ValueToken.of("buāru"), ), (Flag.DAMAGE,), ), ) note = factory.fuzzy.FuzzyChoice([None, NoteLine((StringPart("a note"),))]) manuscripts = factory.List([factory.SelfAttribute("..manuscript")], TupleFactory) intertext = factory.fuzzy.FuzzyChoice([tuple(), (StringPart("bar"),)]) parallel_lines = factory.List( [ factory.SubFactory(ParallelCompositionFactory), factory.SubFactory(ParallelTextFactory), factory.SubFactory(ParallelFragmentFactory), ], TupleFactory, )
class LineFactory(factory.Factory): class Meta: model = Line class Params: manuscript_id = factory.Sequence(lambda n: n) variant = factory.SubFactory( LineVariantFactory, manuscript_id=factory.SelfAttribute("..manuscript_id") ) number = factory.Sequence(lambda n: LineNumber(n)) variants = factory.List([factory.SelfAttribute("..variant")], TupleFactory) is_second_line_of_parallelism = factory.Faker("boolean") is_beginning_of_section = factory.Faker("boolean") translation = (TranslationLine((StringPart("foo"),), "en", None),)
PERIOD_MODIFIER = PeriodModifier.LATE PERIOD = Period.OLD_BABYLONIAN PROVENANCE = Provenance.NINEVEH TYPE = ManuscriptType.LIBRARY NOTES = "some notes" COLOPHON = Transliteration.of_iterable( [TextLine(LineNumber(1, True), (Word.of([Reading.of_name("ku")]), ))]) UNPLACED_LINES = Transliteration.of_iterable( [TextLine(LineNumber(4, True), (Word.of([Reading.of_name("bu")]), ))]) REFERENCES = (ReferenceFactory.build(), ) LINE_NUMBER = LineNumber(1) LINE_RECONSTRUCTION = (AkkadianWord.of((ValueToken.of("buāru"), )), ) IS_SECOND_LINE_OF_PARALLELISM = True IS_BEGINNING_OF_SECTION = True LABELS = (SurfaceLabel.from_label(Surface.OBVERSE), ) PARATEXT = (NoteLine((StringPart("note"), )), RulingDollarLine(Ruling.SINGLE)) OMITTED_WORDS = (1, ) NOTE = None PARALLEL_LINES = (ParallelComposition(False, "a composition", LineNumber(7)), ) TRANSLATION = (TranslationLine((StringPart("foo"), ), "en", None), ) SIGNS = ("FOO BAR", ) MANUSCRIPT_TEXT_1 = TextLine(LineNumber(1), (Word.of([Reading.of([ValueToken.of("ku")])]), )) LINE_VARIANT_1 = LineVariant( LINE_RECONSTRUCTION, NOTE, (ManuscriptLine(MANUSCRIPT_ID, LABELS, MANUSCRIPT_TEXT_1, PARATEXT, OMITTED_WORDS), ),
def ebl_atf_text_line__string_part(self, text: str) -> StringPart: return StringPart(text)
from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.translation_line import TranslationLine GENRE = Genre.LITERATURE CATEGORY = 1 INDEX = 2 NAME = "Palm & Vine" HAS_DOI = True VERSES = 100 APPROXIMATE = True CLASSIFICATION = Classification.ANCIENT STAGE = Stage.NEO_BABYLONIAN INTRO = "**Intro**" CHAPTER_NAME = "I" TRANSLATION = ( TranslationLine([StringPart("not the title")], "de"), TranslationLine([StringPart("the title,")], "en"), ) UNCERTAIN_FRAGMENTS = (UncertainFragment(MuseumNumber("X", "1"), True), ) CHAPTER = ChapterListing(STAGE, CHAPTER_NAME, TRANSLATION, UNCERTAIN_FRAGMENTS) TEXT = Text(GENRE, CATEGORY, INDEX, NAME, HAS_DOI, VERSES, APPROXIMATE, INTRO, (CHAPTER, )) def test_text_constructor_sets_correct_fields() -> None: assert TEXT.id == TextId(GENRE, CATEGORY, INDEX) assert TEXT.genre == GENRE assert TEXT.category == CATEGORY assert TEXT.index == INDEX assert TEXT.name == NAME
from ebl.transliteration.domain.text_line_transformer import TextLineTransformer def parse_text(atf: str): tree = LINE_PARSER.parse(atf, start="ebl_atf_text_line__text") return TextLineTransformer().transform(tree) def expected_language_part(language: Language, transliteration: str) -> LanguagePart: return LanguagePart.of_transliteration(language, parse_text(transliteration)) @pytest.mark.parametrize( # pyre-ignore[56] "atf,expected", [ ("this is a note ", (StringPart("this is a note "),)), ("@i{italic text}", (EmphasisPart("italic text"),)), ("@akk{{d}kur}", (expected_language_part(Language.AKKADIAN, "{d}kur"),)), ("@sux{kur}", (expected_language_part(Language.SUMERIAN, "kur"),)), ("@es{kur}", (expected_language_part(Language.EMESAL, "kur"),)), ( "@bib{RN123@x 2-3a}", (BibliographyPart.of(BibliographyId("RN123"), "x 2-3a"),), ), ("@bib{RN1\\}@2}", (BibliographyPart.of(BibliographyId("RN1}"), "2"),)), ("@bib{RN1@1\\}2}", (BibliographyPart.of(BibliographyId("RN1"), "1}2"),)), ("@bib{RN12\\@3@3}", (BibliographyPart.of(BibliographyId("RN12@3"), "3"),)), ("@bib{RN@1\\}\\@2}", (BibliographyPart.of(BibliographyId("RN"), "1}@2"),)), ( "this is a note @i{italic text}@akk{kur}@sux{kur}", (
[ (EmptyLine(), False), (TextLine(LineNumber(2)), False), (TextLine(LineNumber(1)), True), ], ) def test_is_beginning_of_side(line, is_beginning) -> None: line = ManuscriptLineFactory.build(line=line) assert line.is_beginning_of_side is is_beginning @pytest.mark.parametrize( # pyre-ignore[56] "paratext,is_end", [ (tuple(), False), ((NoteLine((StringPart("note"), )), ), False), ((StateDollarLine(None, atf.Extent.SEVERAL, None, None, None), ), False), ((StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True), ( ( StateDollarLine(None, atf.Extent.SEVERAL, None, None, None), StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True, ), ], ) def test_is_end_of_side(paratext, is_end) -> None: line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext) assert line.is_end_of_side is is_end
( "@prism!", [ ObjectAtLine( ObjectLabel([atf.Status.CORRECTION], atf.Object.PRISM)) ], ), ("@prism", [ObjectAtLine(ObjectLabel([], atf.Object.PRISM))]), ("@object stone", [ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "stone"))]), ("@fragment 1", [ObjectAtLine(ObjectLabel([], atf.Object.FRAGMENT, "1"))]), ("@edge a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.EDGE, "a"))]), ("@face a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.FACE, "a"))]), ("@h1", [HeadingAtLine(1, tuple())]), ("@h1 foo", [HeadingAtLine(1, (StringPart("foo"), ))]), ("@column 1", [ColumnAtLine(ColumnLabel.from_int(1))]), ( "@column 1!", [ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.CORRECTION, )))], ), ( "@column 1!*", [ ColumnAtLine( ColumnLabel.from_int( 1, (atf.Status.CORRECTION, atf.Status.COLLATION))) ], ), ("@date", [DiscourseAtLine(atf.Discourse.DATE)]), ],
import pytest from ebl.transliteration.domain.lark_parser import parse_translation_line from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.translation_line import Extent, TranslationLine from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel from ebl.transliteration.domain.line_number import LineNumber from ebl.transliteration.domain.atf import Surface @pytest.mark.parametrize( "atf,expected_line", [ ("#tr: translation", TranslationLine((StringPart("translation"),), "en", None)), ( "#tr.en: translation", TranslationLine((StringPart("translation"),), "en", None), ), ( "#tr.ar.(2): translation", TranslationLine( (StringPart("translation"),), "ar", Extent(LineNumber(2), tuple()) ), ), ( "#tr.(2): translation", TranslationLine( (StringPart("translation"),), "en", Extent(LineNumber(2), tuple()) ), ), (
Line( LineNumber(1), (LineVariant(RECONSTRUCTION, NOTE, tuple()), ), IS_SECOND_LINE_OF_PARALLELISM, IS_BEGINNING_OF_SECTION, ), Line(LineNumber(1), (LineVariant(RECONSTRUCTION, NOTE, tuple()), ), True, True), Line(LineNumber(1), (LineVariant(RECONSTRUCTION, NOTE, tuple()), ), True, True), ), ( Line( LineNumber(1), (LineVariant(RECONSTRUCTION, NoteLine( (StringPart("a note"), )), tuple()), ), IS_SECOND_LINE_OF_PARALLELISM, IS_BEGINNING_OF_SECTION, ), Line( LineNumber(1), (LineVariant(RECONSTRUCTION, NoteLine((StringPart("new note"), )), tuple()), ), IS_SECOND_LINE_OF_PARALLELISM, IS_BEGINNING_OF_SECTION, ), Line( LineNumber(1), (LineVariant(RECONSTRUCTION, NoteLine((StringPart("new note"), )), tuple()), ), IS_SECOND_LINE_OF_PARALLELISM,
import pytest from ebl.lemmatization.domain.lemmatization import LemmatizationToken from ebl.transliteration.domain.atf import Atf, Surface from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel from ebl.transliteration.domain.line_number import LineNumber from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.translation_line import Extent, TranslationLine @pytest.mark.parametrize( # pyre-ignore[56] "parts,language,extent,prefix,translation", [ ((StringPart("foo"), StringPart("bar")), "en", None, "#tr.en", "foobar"), ((StringPart("foo"), ), "de", Extent( LineNumber(1)), "#tr.de.(1)", "foo"), ( (StringPart("foo"), ), "de", Extent( LineNumber(1), (SurfaceLabel(tuple(), Surface.OBVERSE), ColumnLabel( tuple(), 2)), ), "#tr.de.(o ii 1)", "foo", ), ], ) def test_parallel_fragment(parts, language, extent, prefix,
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.parallel_line import ParallelComposition from ebl.transliteration.domain.translation_line import TranslationLine REFERENCES = (ReferenceFactory.build(with_document=True), ) MANUSCRIPT = ManuscriptFactory.build(references=REFERENCES) UNCERTAIN_FRAGMENTS = (MuseumNumber.of("K.1"), ) FIRST_MANUSCRIPT_LINE = ManuscriptLineFactory.build( manuscript_id=MANUSCRIPT.id) SECOND_MANUSCRIPT_LINE = ManuscriptLineFactory.build( manuscript_id=MANUSCRIPT.id) LINE_VARIANT = LineVariantFactory.build( manuscripts=(FIRST_MANUSCRIPT_LINE, SECOND_MANUSCRIPT_LINE), parallel_lines=(ParallelComposition(False, "name", LineNumber(2)), ), intertext=(StringPart("bar"), ), ) TRANSLATION_LINE = TranslationLine((StringPart("foo"), ), "en", None) LINE = LineFactory.build(variants=(LINE_VARIANT, ), translation=(TRANSLATION_LINE, )) CHAPTER = ChapterFactory.build(manuscripts=(MANUSCRIPT, ), uncertain_fragments=UNCERTAIN_FRAGMENTS, lines=(LINE, )) def strip_documents(chapter: Chapter) -> Chapter: return attr.evolve( chapter, manuscripts=tuple( attr.evolve( manuscript,
from ebl.transliteration.domain.at_line import ( DiscourseAtLine, SurfaceAtLine, ColumnAtLine, ObjectAtLine, CompositeAtLine, HeadingAtLine, ) from ebl.transliteration.domain.labels import SurfaceLabel, ColumnLabel, ObjectLabel from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.atf import Atf @pytest.mark.parametrize( # pyre-ignore[56] "parts,parts_text", [(tuple(), ""), ((StringPart("a"), StringPart("b c")), " ab c")]) def test_at_line_heading(parts, parts_text) -> None: at_line = HeadingAtLine(1, parts) assert at_line.atf == Atf(f"@h1{parts_text}") assert at_line.lemmatization == (LemmatizationToken(f"h1{parts_text}"), ) assert at_line.display_value == f"h1{parts_text}" def test_at_line_column() -> None: at_line = ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.COLLATION, ))) assert at_line.lemmatization == (LemmatizationToken("column 1*"), ) assert at_line.display_value == "column 1*"
"displayValue": "seal 1", }, ), ( HeadingAtLine(1), { "prefix": "@", "content": [OneOfTokenSchema().dump(ValueToken.of("h1"))], "type": "HeadingAtLine", "number": 1, "displayValue": "h1", "parts": [], }, ), ( HeadingAtLine(2, (StringPart("foo"),)), { "prefix": "@", "content": [OneOfTokenSchema().dump(ValueToken.of("h2 foo"))], "type": "HeadingAtLine", "number": 2, "displayValue": "h2 foo", "parts": [{"type": "StringPart", "text": "foo"}], }, ), ( StateDollarLine( atf.Qualification.AT_LEAST, atf.Extent.BEGINNING_OF, ScopeContainer(atf.Surface.OBVERSE), atf.State.BLANK,
PUNCTUATION = ";,:.-–—" TEXT = "sed nec tortor varius, iaculis." LANGUAGE_PART = LanguagePart( Language.AKKADIAN, [Reading.of_name("kur"), Divider.of(":")]) BIBLIOGRAPHY_PART = BibliographyPart( Reference(BibliographyId("1"), ReferenceType.DISCUSSION, TEXT + PUNCTUATION)) @pytest.mark.parametrize( # pyre-ignore[56] "part,expected", [ ( StringPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"), StringPart(f"{PUNCTUATION}A{PUNCTUATION}A"), ), ( EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A{PUNCTUATION}"), EmphasisPart(f"{PUNCTUATION}A{PUNCTUATION}A"), ), (LANGUAGE_PART, LANGUAGE_PART), (BIBLIOGRAPHY_PART, BIBLIOGRAPHY_PART), ], ) def test_part_rstrip(part: MarkupPart, expected: MarkupPart) -> None: assert part.rstrip() == expected @pytest.mark.parametrize( # pyre-ignore[56]
( Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]) ]), Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]) ]), Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]) ]), ), ( Text.of_iterable([NoteLine((StringPart("this is a note "), ))]), Text.of_iterable( [NoteLine((StringPart("this is another note "), ))]), Text.of_iterable( [NoteLine((StringPart("this is another note "), ))]), ), ( Text.of_iterable([NoteLine((StringPart("this is a note "), ))]), Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]), Text.of_iterable([NoteLine((EmphasisPart("this is a note "), ))]), ), ( Text.of_iterable([ NoteLine((LanguagePart.of_transliteration( Language.AKKADIAN, (ValueToken.of("bu"), )), )) ]),
def test_make_title() -> None: assert make_title(TRANSLATION) == (StringPart("The Title"),)
def make_part(self, data, **kwargs) -> StringPart: return StringPart(data["text"])