class ManuscriptLineFactory(factory.Factory):
    class Meta:
        model = ManuscriptLine

    manuscript_id = factory.Sequence(lambda n: n)
    labels = (
        SurfaceLabel.from_label(Surface.OBVERSE),
        ColumnLabel.from_label("iii", [Status.COLLATION, Status.CORRECTION]),
    )
    line = factory.Sequence(
        lambda n: TextLine.of_iterable(
            LineNumber(n),
            (
                Word.of(
                    [
                        Reading.of_name("ku"),
                        Joiner.hyphen(),
                        BrokenAway.open(),
                        Reading.of_name("nu"),
                        Joiner.hyphen(),
                        Reading.of_name("ši"),
                        BrokenAway.close(),
                    ]
                ),
            ),
        )
    )
    paratext = (NoteLine((StringPart("note"),)), RulingDollarLine(Ruling.SINGLE))
    omitted_words = (1,)
示例#2
0
def test_labels(text_with_labels) -> None:
    assert text_with_labels.labels == [
        LineLabel(None, None, None, LineNumber(1)),
        LineLabel(
            ColumnLabel.from_int(1),
            SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"),
            ObjectLabel([], atf.Object.OBJECT, "Stone wig"),
            LineNumber(2),
        ),
    ]
def text_with_labels():
    return Text.of_iterable([
        TextLine.of_iterable(LineNumber(1),
                             [Word.of([Reading.of_name("bu")])]),
        ColumnAtLine(ColumnLabel.from_int(1)),
        SurfaceAtLine(SurfaceLabel([], atf.Surface.SURFACE, "Stone wig")),
        ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "Stone wig")),
        TextLine.of_iterable(LineNumber(2),
                             [Word.of([Reading.of_name("bu")])]),
    ])
    ("l.e.", "", "@left", SurfaceLabel(tuple(), Surface.LEFT)),
    ("r.e.", "", "@right", SurfaceLabel(tuple(), Surface.RIGHT)),
    ("t.e.", "", "@top", SurfaceLabel(tuple(), Surface.TOP)),
    ("o", "'", "@obverse", SurfaceLabel((Status.PRIME, ), Surface.OBVERSE)),
    ("r", "?", "@reverse", SurfaceLabel((Status.UNCERTAIN, ),
                                        Surface.REVERSE)),
    ("b.e.", "!", "@bottom", SurfaceLabel((Status.CORRECTION, ),
                                          Surface.BOTTOM)),
    ("e.", "*", "@edge", SurfaceLabel((Status.COLLATION, ), Surface.EDGE)),
    (
        "l.e.",
        "*!",
        "@left",
        SurfaceLabel((Status.COLLATION, Status.CORRECTION), Surface.LEFT),
    ),
    ("i", "", "@column 1", ColumnLabel(tuple(), 1)),
    ("ii", "", "@column 2", ColumnLabel(tuple(), 2)),
    ("iii", "", "@column 3", ColumnLabel(tuple(), 3)),
    ("iv", "", "@column 4", ColumnLabel(tuple(), 4)),
    ("v", "", "@column 5", ColumnLabel(tuple(), 5)),
    ("vi", "", "@column 6", ColumnLabel(tuple(), 6)),
    ("vii", "", "@column 7", ColumnLabel(tuple(), 7)),
    ("viii", "", "@column 8", ColumnLabel(tuple(), 8)),
    ("ix", "", "@column 9", ColumnLabel(tuple(), 9)),
    ("x", "", "@column 10", ColumnLabel(tuple(), 10)),
    ("i", "'", "@column 1", ColumnLabel((Status.PRIME, ), 1)),
    ("ii", "?", "@column 2", ColumnLabel((Status.UNCERTAIN, ), 2)),
    ("iii", "!", "@column 3", ColumnLabel((Status.CORRECTION, ), 3)),
    ("iv", "*", "@column 4", ColumnLabel((Status.COLLATION, ), 4)),
    ("v", "'?", "@column 5", ColumnLabel((Status.PRIME, Status.UNCERTAIN), 5)),
]
def test_at_line_column_no_status() -> None:
    at_line = ColumnAtLine(ColumnLabel.from_int(1))

    assert at_line.lemmatization == (LemmatizationToken("column 1"), )
    assert at_line.display_value == "column 1"
def test_at_line_column() -> None:
    at_line = ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.COLLATION, )))

    assert at_line.lemmatization == (LemmatizationToken("column 1*"), )
    assert at_line.display_value == "column 1*"
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
示例#9
0
@pytest.mark.parametrize(
    "line,expected_line",
    [
        (
            "// cf. F K.1 &d tablet* o! iii? 1",
            ParallelFragment(
                True,
                MuseumNumber.of("K.1"),
                True,
                Labels(
                    ObjectLabel.from_object(atf.Object.TABLET,
                                            [atf.Status.COLLATION]),
                    SurfaceLabel.from_label(atf.Surface.OBVERSE,
                                            [atf.Status.CORRECTION]),
                    ColumnLabel.from_int(3, [atf.Status.UNCERTAIN]),
                ),
                LineNumber(1),
            ),
        ),
        (
            "// F K.1 1",
            ParallelFragment(False, MuseumNumber.of("K.1"), False, Labels(),
                             LineNumber(1)),
        ),
        (
            '// cf. L I.1 OB "my name" 1',
            ParallelText(
                True,
                TextId(Genre.LITERATURE, 1, 1),
                ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
     ),
     {
         "prefix": "@",
         "content": [OneOfTokenSchema().dump(ValueToken.of("surface stone wig!*"))],
         "type": "SurfaceAtLine",
         "surface_label": {
             "status": ["CORRECTION", "COLLATION"],
             "surface": "SURFACE",
             "text": "stone wig",
             "abbreviation": "stone wig",
         },
         "displayValue": "surface stone wig!*",
     },
 ),
 (
     ColumnAtLine(ColumnLabel([atf.Status.CORRECTION, atf.Status.COLLATION], 1)),
     {
         "prefix": "@",
         "content": [OneOfTokenSchema().dump(ValueToken.of("column 1!*"))],
         "type": "ColumnAtLine",
         "column_label": {
             "status": ["CORRECTION", "COLLATION"],
             "column": 1,
             "abbreviation": "i",
         },
         "displayValue": "column 1!*",
     },
 ),
 (
     SealAtLine(1),
     {
示例#11
0
            "@prism!",
            [
                ObjectAtLine(
                    ObjectLabel([atf.Status.CORRECTION], atf.Object.PRISM))
            ],
        ),
        ("@prism", [ObjectAtLine(ObjectLabel([], atf.Object.PRISM))]),
        ("@object stone",
         [ObjectAtLine(ObjectLabel([], atf.Object.OBJECT, "stone"))]),
        ("@fragment 1",
         [ObjectAtLine(ObjectLabel([], atf.Object.FRAGMENT, "1"))]),
        ("@edge a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.EDGE, "a"))]),
        ("@face a", [SurfaceAtLine(SurfaceLabel([], atf.Surface.FACE, "a"))]),
        ("@h1", [HeadingAtLine(1, tuple())]),
        ("@h1 foo", [HeadingAtLine(1, (StringPart("foo"), ))]),
        ("@column 1", [ColumnAtLine(ColumnLabel.from_int(1))]),
        (
            "@column 1!",
            [ColumnAtLine(ColumnLabel.from_int(1, (atf.Status.CORRECTION, )))],
        ),
        (
            "@column 1!*",
            [
                ColumnAtLine(
                    ColumnLabel.from_int(
                        1, (atf.Status.CORRECTION, atf.Status.COLLATION)))
            ],
        ),
        ("@date", [DiscourseAtLine(atf.Discourse.DATE)]),
    ],
)
示例#12
0
 def make_label(self, data, **kwargs) -> ColumnLabel:
     return ColumnLabel(data["status"], data["column"])
    AnnotationFactory,
    AnnotationDataFactory,
)
from ebl.tests.factories.fragment import TransliteratedFragmentFactory
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel, ObjectLabel
from ebl.transliteration.domain.line_label import LineLabel
from ebl.transliteration.domain.line_number import LineNumber, LineNumberRange


@pytest.mark.parametrize(
    "line_label, expected",
    [
        (
            LineLabel(
                ColumnLabel.from_int(1),
                SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"),
                ObjectLabel([], atf.Object.OBJECT, "Stone wig"),
                LineNumber(2),
            ),
            "i Stone wig Stone wig 2",
        ),
        (
            LineLabel(None, None, None,
                      LineNumberRange(LineNumber(1, True), LineNumber(3))),
            "1'-3",
        ),
    ],
)
def test_format_line_label(line_label, expected, annotations_repository,
                           photo_repository, fragment_repository):
            "#tr.en: translation",
            TranslationLine((StringPart("translation"),), "en", None),
        ),
        (
            "#tr.ar.(2): translation",
            TranslationLine(
                (StringPart("translation"),), "ar", Extent(LineNumber(2), tuple())
            ),
        ),
        (
            "#tr.(2): translation",
            TranslationLine(
                (StringPart("translation"),), "en", Extent(LineNumber(2), tuple())
            ),
        ),
        (
            "#tr.de.(o iii 1): translation",
            TranslationLine(
                (StringPart("translation"),),
                "de",
                Extent(
                    LineNumber(1),
                    (SurfaceLabel(tuple(), Surface.OBVERSE), ColumnLabel(tuple(), 3)),
                ),
            ),
        ),
    ],
)
def test_parse_translation_line(atf, expected_line) -> None:
    assert parse_translation_line(atf) == expected_line
示例#15
0
from ebl.transliteration.domain.museum_number import MuseumNumber
from ebl.transliteration.domain.atf import Surface
from ebl.transliteration.domain.enclosure_tokens import BrokenAway
from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.markup import StringPart
from ebl.transliteration.domain.normalized_akkadian import AkkadianWord, Caesura
from ebl.transliteration.domain.note_line import NoteLine
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import Joiner, ValueToken
from ebl.transliteration.domain.word_tokens import Word
from ebl.transliteration.domain.genre import Genre

MANUSCRIPT_ID = 1
LABELS = (ColumnLabel.from_int(1), )
TEXT_LINE = TextLine(
    LineNumber(1),
    (
        Word.of([Reading.of_name("kur")],
                unique_lemma=(WordId("word1"), ),
                alignment=0),
        Word.of([Reading.of_name("ra")],
                unique_lemma=(WordId("word2"), ),
                alignment=None),
    ),
)

NEW_MANUSCRIPT_ID = 2
NEW_LABELS = (SurfaceLabel.from_label(Surface.REVERSE), )
NEW_TEXT_LINE = TextLine(
def test_parse_labels_multiple() -> None:
    labels = (SurfaceLabel.from_label(Surface.OBVERSE),
              ColumnLabel.from_int(3))
    assert parse_labels(" ".join(label.to_value()
                                 for label in labels)) == labels
示例#17
0
 def ebl_atf_text_line__column(self, number, statuses):
     return ColumnAtLine(ColumnLabel.from_int(number, statuses))
示例#18
0
def test_load_and_dump_column_label_schema():
    column_label = ColumnLabel([], 1)
    dump = ColumnLabelSchema().dump(column_label)
    assert dump == {"column": 1, "status": [], "abbreviation": "i"}
    assert ColumnLabelSchema().load(dump) == column_label