def make_line(self, data, **kwargs) -> StateDollarLine:
     return StateDollarLine(
         data["qualification"],
         StateDollarLineSchema.load_extent(data["extent"]),
         data["scope"],
         data["state"],
         data["status"],
     )
def test_state_dollar_line_range() -> None:
    scope = ScopeContainer(atf.Scope.LINES)
    actual = StateDollarLine(None, (2, 4), scope, atf.State.MISSING, None)

    assert actual.scope == scope
    assert actual.lemmatization == (LemmatizationToken(" 2-4 lines missing"), )
    assert actual.display_value == "2-4 lines missing"
    assert actual.is_end_of is False
 def ebl_atf_dollar_line__state(self,
                                qualification,
                                extent=None,
                                scope_container=None,
                                state=None,
                                status=None):
     return StateDollarLine(qualification, extent, scope_container, state,
                            status)
def test_state_dollar_line_end_of() -> None:
    scope = ScopeContainer(atf.Surface.OBVERSE)
    actual = StateDollarLine(None, atf.Extent.END_OF, scope, None, None)

    assert actual.scope == scope
    assert actual.lemmatization == (LemmatizationToken(" end of obverse"), )
    assert actual.display_value == "end of obverse"
    assert actual.is_end_of is True
def test_strict_dollar_line_with_none() -> None:
    scope = ScopeContainer(atf.Object.OBJECT, "what")
    actual = StateDollarLine(None, atf.Extent.SEVERAL, scope, None, None)

    assert scope.content == atf.Object.OBJECT
    assert scope.text == "what"

    assert actual.scope == scope
    assert actual.lemmatization == (
        LemmatizationToken(" several object what"), )
    assert actual.atf == "$ several object what"
    assert actual.display_value == "several object what"
    assert actual.is_end_of is False
def test_state_dollar_line_content() -> None:
    scope = ScopeContainer(atf.Surface.OBVERSE)
    actual = StateDollarLine(
        atf.Qualification.AT_LEAST,
        1,
        scope,
        atf.State.BLANK,
        atf.DollarStatus.UNCERTAIN,
    )

    assert actual.scope == scope
    assert actual.lemmatization == (
        LemmatizationToken(" at least 1 obverse blank ?"), )
    assert actual.display_value == "at least 1 obverse blank ?"
    assert actual.is_end_of is False
def test_state_dollar_line() -> None:
    scope = ScopeContainer(atf.Scope.COLUMNS, "")
    actual = StateDollarLine(
        atf.Qualification.AT_LEAST,
        atf.Extent.SEVERAL,
        scope,
        atf.State.BLANK,
        atf.DollarStatus.UNCERTAIN,
    )

    assert actual.qualification == atf.Qualification.AT_LEAST
    assert actual.scope == scope
    assert actual.extent == atf.Extent.SEVERAL
    assert actual.state == atf.State.BLANK
    assert actual.status == atf.DollarStatus.UNCERTAIN
    assert actual.lemmatization == (
        LemmatizationToken(" at least several columns blank ?"), )
    assert actual.atf == "$ at least several columns blank ?"
    assert actual.display_value == "at least several columns blank ?"
    assert actual.is_end_of is False
示例#8
0
def test_combinations(
    qualification,
    extent,
    scope,
    state,
    status,
    expected_qualification,
    expected_extent,
    expected_scope,
    expected_state,
    expected_status,
):
    line = " ".join(["$", qualification, extent, scope, state, status])
    expected_line = StateDollarLine(
        expected_qualification,
        expected_extent,
        expected_scope,
        expected_state,
        expected_status,
    )
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
        (EmptyLine(), False),
        (TextLine(LineNumber(2)), False),
        (TextLine(LineNumber(1)), True),
    ],
)
def test_is_beginning_of_side(line, is_beginning) -> None:
    line = ManuscriptLineFactory.build(line=line)
    assert line.is_beginning_of_side is is_beginning


@pytest.mark.parametrize(  # pyre-ignore[56]
    "paratext,is_end",
    [
        (tuple(), False),
        ((NoteLine((StringPart("note"), )), ), False),
        ((StateDollarLine(None, atf.Extent.SEVERAL, None, None,
                          None), ), False),
        ((StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True),
        (
            (
                StateDollarLine(None, atf.Extent.SEVERAL, None, None, None),
                StateDollarLine(None, atf.Extent.END_OF, None, None, None),
            ),
            True,
        ),
    ],
)
def test_is_end_of_side(paratext, is_end) -> None:
    line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext)
    assert line.is_end_of_side is is_end
class LemmatizedFragmentFactory(TransliteratedFragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([Logogram.of_name("GI", 6)],
                        unique_lemma=(WordId("ginâ I"), )),
                Word.of([Reading.of_name("ana")],
                        unique_lemma=(WordId("ana I"), )),
                Word.of(
                    [
                        Reading.of_name("u₄"),
                        Joiner.hyphen(),
                        Reading.of_name("š[u"),
                    ],
                    unique_lemma=(WordId("ūsu I"), ),
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of(
                    unique_lemma=(WordId("kīdu I"), ),
                    parts=[
                        Reading.of((
                            ValueToken.of("k"),
                            BrokenAway.close(),
                            ValueToken.of("i"),
                        )),
                        Joiner.hyphen(),
                        Reading.of_name("du"),
                    ],
                ),
                Word.of(unique_lemma=(WordId("u I"), ),
                        parts=[Reading.of_name("u")]),
                Word.of(
                    unique_lemma=(WordId("bamātu I"), ),
                    parts=[
                        Reading.of_name("ba"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        Joiner.hyphen(),
                        Reading.of((
                            ValueToken.of("t"),
                            BrokenAway.open(),
                            ValueToken.of("i"),
                        )),
                    ],
                ),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of(unique_lemma=(WordId("mu I"), ),
                        parts=[Reading.of_name("mu")]),
                Word.of(
                    unique_lemma=(WordId("tamalāku I"), ),
                    parts=[
                        Reading.of_name("ta"),
                        Joiner.hyphen(),
                        Reading.of_name("ma"),
                        InWordNewline.of(),
                        Joiner.hyphen(),
                        Reading.of_name("tu", 2),
                    ],
                ),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")],
                                unique_lemma=(WordId("normalized I"), )),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
class TransliteratedFragmentFactory(FragmentFactory):
    text = Text((
        TextLine.of_iterable(
            LineNumber(1, True),
            (
                Word.of([UnidentifiedSign.of()]),
                Word.of([
                    Logogram.of_name(
                        "BA",
                        surrogate=[
                            Reading.of_name("ku"),
                            Joiner.hyphen(),
                            Reading.of_name("u", 4),
                        ],
                    )
                ]),
                Column.of(),
                Tabulation.of(),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    Joiner.hyphen(),
                    Reading.of_name("ku"),
                    BrokenAway.close(),
                    Joiner.hyphen(),
                    Reading.of_name("nu"),
                    Joiner.hyphen(),
                    Reading.of_name("ši"),
                ]),
                Variant.of(Divider.of(":"), Reading.of_name("ku")),
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Column.of(2),
                Divider.of(":", ("@v", ), (Flag.DAMAGE, )),
                CommentaryProtocol.of("!qt"),
                Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(2, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([Logogram.of_name("GI", 6)]),
                Word.of([Reading.of_name("ana")]),
                Word.of([
                    Reading.of_name("u", 4),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("š"),
                        BrokenAway.open(),
                        ValueToken.of("u"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(3, True),
            (
                Word.of([BrokenAway.open(),
                         UnknownNumberOfSigns.of()]),
                Word.of([
                    Reading.of((
                        ValueToken.of("k"),
                        BrokenAway.close(),
                        ValueToken.of("i"),
                    )),
                    Joiner.hyphen(),
                    Reading.of_name("du"),
                ]),
                Word.of([Reading.of_name("u")]),
                Word.of([
                    Reading.of_name("ba"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    Joiner.hyphen(),
                    Reading.of((
                        ValueToken.of("t"),
                        BrokenAway.open(),
                        ValueToken.of("i"),
                    )),
                ]),
                Word.of([UnknownNumberOfSigns.of(),
                         BrokenAway.close()]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(6, True),
            (
                Word.of([
                    BrokenAway.open(),
                    UnknownNumberOfSigns.of(),
                    BrokenAway.close(),
                ]),
                Word.of([UnclearSign.of([Flag.DAMAGE])]),
                Word.of([Reading.of_name("mu")]),
                Word.of([
                    Reading.of_name("ta"),
                    Joiner.hyphen(),
                    Reading.of_name("ma"),
                    InWordNewline.of(),
                    Joiner.hyphen(),
                    Reading.of_name("tu", 2),
                ]),
            ),
        ),
        TextLine.of_iterable(
            LineNumber(7, True),
            (
                Word.of([
                    Variant.of(Reading.of_name("šu"),
                               CompoundGrapheme.of(["BI×IS"]))
                ]),
                LanguageShift.normalized_akkadian(),
                AkkadianWord.of([ValueToken.of("kur")]),
            ),
        ),
        StateDollarLine(
            atf.Qualification.AT_LEAST,
            1,
            ScopeContainer(atf.Surface.OBVERSE, ""),
            atf.State.MISSING,
            None,
        ),
        ImageDollarLine("1", None, "numbered diagram of triangle"),
        RulingDollarLine(atf.Ruling.SINGLE),
        LooseDollarLine("this is a loose line"),
        SealDollarLine(1),
        SealAtLine(1),
        HeadingAtLine(1),
        ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)),
        SurfaceAtLine(
            SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE,
                         "stone wig")),
        ObjectAtLine(
            ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT,
                        "stone wig")),
        DiscourseAtLine(atf.Discourse.DATE),
        DivisionAtLine("paragraph", 5),
        CompositeAtLine(atf.Composite.DIV, "part", 1),
        NoteLine((
            StringPart("a note "),
            EmphasisPart("italic"),
            LanguagePart.of_transliteration(
                Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )),
        )),
        ParallelComposition(False, "my name", LineNumber(1)),
        ParallelText(
            True,
            TextId(CorpusGenre.LITERATURE, 1, 1),
            ChapterName(Stage.OLD_BABYLONIAN, "", "my name"),
            LineNumber(1),
            False,
        ),
        ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(),
                         LineNumber(1), False),
    ))
    signs = (
        "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n"
        "MI DIŠ UD ŠU\n"
        "KI DU ABZ411 BA MA TI\n"
        "X MU TA MA UD\n"
        "ŠU/|BI×IS|")
    folios = Folios((Folio("WGL", "3"), Folio("XXX", "3")))
    record = Record((RecordEntry("test", RecordType.TRANSLITERATION), ))
    line_to_vec = ((
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.TEXT_LINE,
        LineToVecEncoding.SINGLE_RULING,
    ), )
示例#12
0
def test_status(status, expected_status):
    line = f"$ {status}"
    expected_line = StateDollarLine(None, None, None, None, expected_status)
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
 def ebl_atf_dollar_line__state_status(self, status):
     return StateDollarLine(None, None, None, None, status)
 def ebl_atf_dollar_line__state_scope(self,
                                      scope_container,
                                      state=None,
                                      status=None):
     return StateDollarLine(None, None, scope_container, state, status)
 def ebl_atf_dollar_line__state_extent(self,
                                       extent,
                                       scope_container=None,
                                       state=None,
                                       status=None):
     return StateDollarLine(None, extent, scope_container, state, status)
 (
     HeadingAtLine(2, (StringPart("foo"),)),
     {
         "prefix": "@",
         "content": [OneOfTokenSchema().dump(ValueToken.of("h2 foo"))],
         "type": "HeadingAtLine",
         "number": 2,
         "displayValue": "h2 foo",
         "parts": [{"type": "StringPart", "text": "foo"}],
     },
 ),
 (
     StateDollarLine(
         atf.Qualification.AT_LEAST,
         atf.Extent.BEGINNING_OF,
         ScopeContainer(atf.Surface.OBVERSE),
         atf.State.BLANK,
         atf.DollarStatus.UNCERTAIN,
     ),
     {
         "prefix": "$",
         "content": [
             OneOfTokenSchema().dump(
                 ValueToken.of(" at least beginning of obverse blank ?")
             )
         ],
         "type": "StateDollarLine",
         "qualification": "AT_LEAST",
         "extent": "BEGINNING_OF",
         "scope": {"type": "Surface", "content": "OBVERSE", "text": ""},
         "state": "BLANK",
示例#17
0
def test_qualification(qualification, expected_qualification):
    line = f"$ {qualification}"
    expected_line = StateDollarLine(expected_qualification, None, None, None,
                                    None)
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
def test_state_dollar_line_non_empty_string_error() -> None:
    with pytest.raises(ValueError):
        StateDollarLine(None, None, ScopeContainer(atf.Surface.REVERSE,
                                                   "test"), None, None)
示例#19
0
def test_scope(scope, expected_scope):
    line = f"$ {scope}"
    expected_line = StateDollarLine(None, None, expected_scope, None, None)
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
示例#20
0
def test_extent(extent, expected_extent):
    line = f"$ {extent}"
    expected_line = StateDollarLine(None, expected_extent, None, None, None)
    assert parse_atf_lark(line).lines == Text.of_iterable([expected_line
                                                           ]).lines
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.dollar_line import ScopeContainer, StateDollarLine
from ebl.transliteration.domain.lark_parser import parse_atf_lark
from ebl.transliteration.domain.text import Text
from ebl.transliteration.domain.transliteration_error import TransliterationError


@pytest.mark.parametrize("prefix", ["$ ", "$"])
@pytest.mark.parametrize("parenthesis", [False, True])
@pytest.mark.parametrize(
    "line,expected_line",
    [
        (
            "2-4 lines missing",
            StateDollarLine(None, (2, 4), ScopeContainer(atf.Scope.LINES),
                            atf.State.MISSING, None),
        ),
        (
            "at least 1 obverse missing",
            StateDollarLine(
                atf.Qualification.AT_LEAST,
                1,
                ScopeContainer(atf.Surface.OBVERSE, ""),
                atf.State.MISSING,
                None,
            ),
        ),
        (
            "2 lines",
            StateDollarLine(None, 2, ScopeContainer(atf.Scope.LINES), None,
                            None),
            "#first\n \n#second",
            [
                ControlLine("#", "first"),
                EmptyLine(),
                ControlLine("#", "second")
            ],
        ),
        ("&K11111", [ControlLine("&", "K11111")]),
        ("@reverse", [SurfaceAtLine(SurfaceLabel([], atf.Surface.REVERSE))]),
        (
            "$ (end of side)",
            [
                StateDollarLine(
                    None,
                    atf.Extent.END_OF,
                    ScopeContainer(atf.Scope.SIDE, ""),
                    None,
                    None,
                )
            ],
        ),
        ("#some notes", [ControlLine("#", "some notes")]),
        ("=: continuation", [ControlLine("=:", " continuation")]),
    ],
)
def test_parse_atf(line: str, expected_tokens: List[Line]) -> None:
    assert parse_atf_lark(line).lines == Text.of_iterable(
        expected_tokens).lines


@pytest.mark.parametrize(