class ManuscriptLineFactory(factory.Factory): class Meta: model = ManuscriptLine manuscript_id = factory.Sequence(lambda n: n) labels = ( SurfaceLabel.from_label(Surface.OBVERSE), ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]), ) line = factory.Sequence( lambda n: TextLine.of_iterable( LineNumber(n), ( Word.of( [ Reading.of_name("ku"), Joiner.hyphen(), BrokenAway.open(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), BrokenAway.close(), ] ), ), ) ) paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE)) omitted_words = (1,)
def test_labels(text_with_labels) -> None: assert text_with_labels.labels == [ LineLabel(None, None, None, LineNumber(1)), LineLabel( ColumnLabel.from_int(1), SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"), ObjectLabel([], atf.Object.OBJECT, "Stone wig"), LineNumber(2), ), ]
def text_with_labels(): return Text.of_iterable([ TextLine.of_iterable(LineNumber(1), [Word.of([Reading.of_name("bu")])]), ColumnAtLine(ColumnLabel.from_int(1)), SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")), ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")), TextLine.of_iterable(LineNumber(2), [Word.of([Reading.of_name("bu")])]), ])
("l.e.", "", "@left", SurfaceLabel(tuple(), Surface.LEFT)), ("r.e.", "", "@right", SurfaceLabel(tuple(), Surface.RIGHT)), ("t.e.", "", "@top", SurfaceLabel(tuple(), Surface.TOP)), ("o", "'", "@obverse", SurfaceLabel((Status.PRIME, ), Surface.OBVERSE)), ("r", "?", "@reverse", SurfaceLabel((Status.UNCERTAIN, ), Surface.REVERSE)), ("b.e.", "!", "@bottom", SurfaceLabel((Status.CORRECTION, ), Surface.BOTTOM)), ("e.", "*", "@edge", SurfaceLabel((Status.COLLATION, ), Surface.EDGE)), ( "l.e.", "*!", "@left", SurfaceLabel((Status.COLLATION, Status.CORRECTION), Surface.LEFT), ), ("i", "", "@column 1", ColumnLabel(tuple(), 1)), ("ii", "", "@column 2", ColumnLabel(tuple(), 2)), ("iii", "", "@column 3", ColumnLabel(tuple(), 3)), ("iv", "", "@column 4", ColumnLabel(tuple(), 4)), ("v", "", "@column 5", ColumnLabel(tuple(), 5)), ("vi", "", "@column 6", ColumnLabel(tuple(), 6)), ("vii", "", "@column 7", ColumnLabel(tuple(), 7)), ("viii", "", "@column 8", ColumnLabel(tuple(), 8)), ("ix", "", "@column 9", ColumnLabel(tuple(), 9)), ("x", "", "@column 10", ColumnLabel(tuple(), 10)), ("i", "'", "@column 1", ColumnLabel((Status.PRIME, ), 1)), ("ii", "?", "@column 2", ColumnLabel((Status.UNCERTAIN, ), 2)), ("iii", "!", "@column 3", ColumnLabel((Status.CORRECTION, ), 3)), ("iv", "*", "@column 4", ColumnLabel((Status.COLLATION, ), 4)), ("v", "'?", "@column 5", ColumnLabel((Status.PRIME, Status.UNCERTAIN), 5)), ]
def test_at_line_column_no_status() -> None: at_line = ColumnAtLine(ColumnLabel.from_int(1)) assert at_line.lemmatization == (LemmatizationToken("column 1"), ) assert at_line.display_value == "column 1"
def test_at_line_column() -> None: at_line = ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.COLLATION, ))) assert at_line.lemmatization == (LemmatizationToken("column 1*"), ) assert at_line.display_value == "column 1*"
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
@pytest.mark.parametrize( "line,expected_line", [ ( "// cf. F K.1 &d tablet* o! iii? 1", ParallelFragment( True, MuseumNumber.of("K.1"), True, Labels( ObjectLabel.from_object(atf.Object.TABLET, [atf.Status.COLLATION]), SurfaceLabel.from_label(atf.Surface.OBVERSE, [atf.Status.CORRECTION]), ColumnLabel.from_int(3, [atf.Status.UNCERTAIN]), ), LineNumber(1), ), ), ( "// F K.1 1", ParallelFragment(False, MuseumNumber.of("K.1"), False, Labels(), LineNumber(1)), ), ( '// cf. L I.1 OB "my name" 1', ParallelText( True, TextId(Genre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
), { "prefix": "@", "content": [OneOfTokenSchema().dump(ValueToken.of("surface stone wig!*"))], "type": "SurfaceAtLine", "surface_label": { "status": ["CORRECTION", "COLLATION"], "surface": "SURFACE", "text": "stone wig", "abbreviation": "stone wig", }, "displayValue": "surface stone wig!*", }, ), ( ColumnAtLine(ColumnLabel([atf.Status.CORRECTION, atf.Status.COLLATION], 1)), { "prefix": "@", "content": [OneOfTokenSchema().dump(ValueToken.of("column 1!*"))], "type": "ColumnAtLine", "column_label": { "status": ["CORRECTION", "COLLATION"], "column": 1, "abbreviation": "i", }, "displayValue": "column 1!*", }, ), ( SealAtLine(1), {
"@prism!", [ ObjectAtLine( ObjectLabel([atf.Status.CORRECTION], atf.Object.PRISM)) ], ), ("@prism", [ObjectAtLine(ObjectLabel([], atf.Object.PRISM))]), ("@object stone", [ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "stone"))]), ("@fragment 1", [ObjectAtLine(ObjectLabel([], atf.Object.FRAGMENT, "1"))]), ("@edge a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.EDGE, "a"))]), ("@face a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.FACE, "a"))]), ("@h1", [HeadingAtLine(1, tuple())]), ("@h1 foo", [HeadingAtLine(1, (StringPart("foo"), ))]), ("@column 1", [ColumnAtLine(ColumnLabel.from_int(1))]), ( "@column 1!", [ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.CORRECTION, )))], ), ( "@column 1!*", [ ColumnAtLine( ColumnLabel.from_int( 1, (atf.Status.CORRECTION, atf.Status.COLLATION))) ], ), ("@date", [DiscourseAtLine(atf.Discourse.DATE)]), ], )
def make_label(self, data, **kwargs) -> ColumnLabel: return ColumnLabel(data["status"], data["column"])
AnnotationFactory, AnnotationDataFactory, ) from ebl.tests.factories.fragment import TransliteratedFragmentFactory from ebl.transliteration.domain import atf from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel, ObjectLabel from ebl.transliteration.domain.line_label import LineLabel from ebl.transliteration.domain.line_number import LineNumber, LineNumberRange @pytest.mark.parametrize( "line_label, expected", [ ( LineLabel( ColumnLabel.from_int(1), SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"), ObjectLabel([], atf.Object.OBJECT, "Stone wig"), LineNumber(2), ), "i Stone wig Stone wig 2", ), ( LineLabel(None, None, None, LineNumberRange(LineNumber(1, True), LineNumber(3))), "1'-3", ), ], ) def test_format_line_label(line_label, expected, annotations_repository, photo_repository, fragment_repository):
"#tr.en: translation", TranslationLine((StringPart("translation"),), "en", None), ), ( "#tr.ar.(2): translation", TranslationLine( (StringPart("translation"),), "ar", Extent(LineNumber(2), tuple()) ), ), ( "#tr.(2): translation", TranslationLine( (StringPart("translation"),), "en", Extent(LineNumber(2), tuple()) ), ), ( "#tr.de.(o iii 1): translation", TranslationLine( (StringPart("translation"),), "de", Extent( LineNumber(1), (SurfaceLabel(tuple(), Surface.OBVERSE), ColumnLabel(tuple(), 3)), ), ), ), ], ) def test_parse_translation_line(atf, expected_line) -> None: assert parse_translation_line(atf) == expected_line
from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.domain.atf import Surface from ebl.transliteration.domain.enclosure_tokens import BrokenAway from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel from ebl.transliteration.domain.line_number import LineNumber from ebl.transliteration.domain.markup import StringPart from ebl.transliteration.domain.normalized_akkadian import AkkadianWord, Caesura from ebl.transliteration.domain.note_line import NoteLine from ebl.transliteration.domain.sign_tokens import Reading from ebl.transliteration.domain.text_line import TextLine from ebl.transliteration.domain.tokens import Joiner, ValueToken from ebl.transliteration.domain.word_tokens import Word from ebl.transliteration.domain.genre import Genre MANUSCRIPT_ID = 1 LABELS = (ColumnLabel.from_int(1), ) TEXT_LINE = TextLine( LineNumber(1), ( Word.of([Reading.of_name("kur")], unique_lemma=(WordId("word1"), ), alignment=0), Word.of([Reading.of_name("ra")], unique_lemma=(WordId("word2"), ), alignment=None), ), ) NEW_MANUSCRIPT_ID = 2 NEW_LABELS = (SurfaceLabel.from_label(Surface.REVERSE), ) NEW_TEXT_LINE = TextLine(
def test_parse_labels_multiple() -> None: labels = (SurfaceLabel.from_label(Surface.OBVERSE), ColumnLabel.from_int(3)) assert parse_labels(" ".join(label.to_value() for label in labels)) == labels
def ebl_atf_text_line__column(self, number, statuses): return ColumnAtLine(ColumnLabel.from_int(number, statuses))
def test_load_and_dump_column_label_schema(): column_label = ColumnLabel([], 1) dump = ColumnLabelSchema().dump(column_label) assert dump == {"column": 1, "status": [], "abbreviation": "i"} assert ColumnLabelSchema().load(dump) == column_label