def make_line(self, data, **kwargs) -> StateDollarLine: return StateDollarLine( data["qualification"], StateDollarLineSchema.load_extent(data["extent"]), data["scope"], data["state"], data["status"], )
def test_state_dollar_line_range() -> None: scope = ScopeContainer(atf.Scope.LINES) actual = StateDollarLine(None, (2, 4), scope, atf.State.MISSING, None) assert actual.scope == scope assert actual.lemmatization == (LemmatizationToken(" 2-4 lines missing"), ) assert actual.display_value == "2-4 lines missing" assert actual.is_end_of is False
def ebl_atf_dollar_line__state(self, qualification, extent=None, scope_container=None, state=None, status=None): return StateDollarLine(qualification, extent, scope_container, state, status)
def test_state_dollar_line_end_of() -> None: scope = ScopeContainer(atf.Surface.OBVERSE) actual = StateDollarLine(None, atf.Extent.END_OF, scope, None, None) assert actual.scope == scope assert actual.lemmatization == (LemmatizationToken(" end of obverse"), ) assert actual.display_value == "end of obverse" assert actual.is_end_of is True
def test_strict_dollar_line_with_none() -> None: scope = ScopeContainer(atf.Object.OBJECT, "what") actual = StateDollarLine(None, atf.Extent.SEVERAL, scope, None, None) assert scope.content == atf.Object.OBJECT assert scope.text == "what" assert actual.scope == scope assert actual.lemmatization == ( LemmatizationToken(" several object what"), ) assert actual.atf == "$ several object what" assert actual.display_value == "several object what" assert actual.is_end_of is False
def test_state_dollar_line_content() -> None: scope = ScopeContainer(atf.Surface.OBVERSE) actual = StateDollarLine( atf.Qualification.AT_LEAST, 1, scope, atf.State.BLANK, atf.DollarStatus.UNCERTAIN, ) assert actual.scope == scope assert actual.lemmatization == ( LemmatizationToken(" at least 1 obverse blank ?"), ) assert actual.display_value == "at least 1 obverse blank ?" assert actual.is_end_of is False
def test_state_dollar_line() -> None: scope = ScopeContainer(atf.Scope.COLUMNS, "") actual = StateDollarLine( atf.Qualification.AT_LEAST, atf.Extent.SEVERAL, scope, atf.State.BLANK, atf.DollarStatus.UNCERTAIN, ) assert actual.qualification == atf.Qualification.AT_LEAST assert actual.scope == scope assert actual.extent == atf.Extent.SEVERAL assert actual.state == atf.State.BLANK assert actual.status == atf.DollarStatus.UNCERTAIN assert actual.lemmatization == ( LemmatizationToken(" at least several columns blank ?"), ) assert actual.atf == "$ at least several columns blank ?" assert actual.display_value == "at least several columns blank ?" assert actual.is_end_of is False
def test_combinations( qualification, extent, scope, state, status, expected_qualification, expected_extent, expected_scope, expected_state, expected_status, ): line = " ".join(["$", qualification, extent, scope, state, status]) expected_line = StateDollarLine( expected_qualification, expected_extent, expected_scope, expected_state, expected_status, ) assert parse_atf_lark(line).lines == Text.of_iterable([expected_line ]).lines
(EmptyLine(), False), (TextLine(LineNumber(2)), False), (TextLine(LineNumber(1)), True), ], ) def test_is_beginning_of_side(line, is_beginning) -> None: line = ManuscriptLineFactory.build(line=line) assert line.is_beginning_of_side is is_beginning @pytest.mark.parametrize( # pyre-ignore[56] "paratext,is_end", [ (tuple(), False), ((NoteLine((StringPart("note"), )), ), False), ((StateDollarLine(None, atf.Extent.SEVERAL, None, None, None), ), False), ((StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True), ( ( StateDollarLine(None, atf.Extent.SEVERAL, None, None, None), StateDollarLine(None, atf.Extent.END_OF, None, None, None), ), True, ), ], ) def test_is_end_of_side(paratext, is_end) -> None: line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext) assert line.is_end_of_side is is_end
class LemmatizedFragmentFactory(TransliteratedFragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([Logogram.of_name("GI", 6)], unique_lemma=(WordId("ginâ I"), )), Word.of([Reading.of_name("ana")], unique_lemma=(WordId("ana I"), )), Word.of( [ Reading.of_name("u₄"), Joiner.hyphen(), Reading.of_name("š[u"), ], unique_lemma=(WordId("ūsu I"), ), ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of( unique_lemma=(WordId("kīdu I"), ), parts=[ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ], ), Word.of(unique_lemma=(WordId("u I"), ), parts=[Reading.of_name("u")]), Word.of( unique_lemma=(WordId("bamātu I"), ), parts=[ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ], ), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of(unique_lemma=(WordId("mu I"), ), parts=[Reading.of_name("mu")]), Word.of( unique_lemma=(WordId("tamalāku I"), ), parts=[ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ], ), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")], unique_lemma=(WordId("normalized I"), )), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), ))
class TransliteratedFragmentFactory(FragmentFactory): text = Text(( TextLine.of_iterable( LineNumber(1, True), ( Word.of([UnidentifiedSign.of()]), Word.of([ Logogram.of_name( "BA", surrogate=[ Reading.of_name("ku"), Joiner.hyphen(), Reading.of_name("u", 4), ], ) ]), Column.of(), Tabulation.of(), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), Joiner.hyphen(), Reading.of_name("ku"), BrokenAway.close(), Joiner.hyphen(), Reading.of_name("nu"), Joiner.hyphen(), Reading.of_name("ši"), ]), Variant.of(Divider.of(":"), Reading.of_name("ku")), Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Column.of(2), Divider.of(":", ("@v", ), (Flag.DAMAGE, )), CommentaryProtocol.of("!qt"), Word.of([Number.of_name("10", flags=[Flag.DAMAGE])]), ), ), TextLine.of_iterable( LineNumber(2, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([Logogram.of_name("GI", 6)]), Word.of([Reading.of_name("ana")]), Word.of([ Reading.of_name("u", 4), Joiner.hyphen(), Reading.of(( ValueToken.of("š"), BrokenAway.open(), ValueToken.of("u"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(3, True), ( Word.of([BrokenAway.open(), UnknownNumberOfSigns.of()]), Word.of([ Reading.of(( ValueToken.of("k"), BrokenAway.close(), ValueToken.of("i"), )), Joiner.hyphen(), Reading.of_name("du"), ]), Word.of([Reading.of_name("u")]), Word.of([ Reading.of_name("ba"), Joiner.hyphen(), Reading.of_name("ma"), Joiner.hyphen(), Reading.of(( ValueToken.of("t"), BrokenAway.open(), ValueToken.of("i"), )), ]), Word.of([UnknownNumberOfSigns.of(), BrokenAway.close()]), ), ), TextLine.of_iterable( LineNumber(6, True), ( Word.of([ BrokenAway.open(), UnknownNumberOfSigns.of(), BrokenAway.close(), ]), Word.of([UnclearSign.of([Flag.DAMAGE])]), Word.of([Reading.of_name("mu")]), Word.of([ Reading.of_name("ta"), Joiner.hyphen(), Reading.of_name("ma"), InWordNewline.of(), Joiner.hyphen(), Reading.of_name("tu", 2), ]), ), ), TextLine.of_iterable( LineNumber(7, True), ( Word.of([ Variant.of(Reading.of_name("šu"), CompoundGrapheme.of(["BI×IS"])) ]), LanguageShift.normalized_akkadian(), AkkadianWord.of([ValueToken.of("kur")]), ), ), StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ImageDollarLine("1", None, "numbered diagram of triangle"), RulingDollarLine(atf.Ruling.SINGLE), LooseDollarLine("this is a loose line"), SealDollarLine(1), SealAtLine(1), HeadingAtLine(1), ColumnAtLine(ColumnLabel([atf.Status.COLLATION], 1)), SurfaceAtLine( SurfaceLabel([atf.Status.COLLATION], atf.Surface.SURFACE, "stone wig")), ObjectAtLine( ObjectLabel([atf.Status.COLLATION], atf.Object.OBJECT, "stone wig")), DiscourseAtLine(atf.Discourse.DATE), DivisionAtLine("paragraph", 5), CompositeAtLine(atf.Composite.DIV, "part", 1), NoteLine(( StringPart("a note "), EmphasisPart("italic"), LanguagePart.of_transliteration( Language.AKKADIAN, (Word.of([Reading.of_name("bu")]), )), )), ParallelComposition(False, "my name", LineNumber(1)), ParallelText( True, TextId(CorpusGenre.LITERATURE, 1, 1), ChapterName(Stage.OLD_BABYLONIAN, "", "my name"), LineNumber(1), False, ), ParallelFragment(False, MuseumNumber.of("K.1"), True, Labels(), LineNumber(1), False), )) signs = ( "X BA KU ABZ075 ABZ207a\\u002F207b\\u0020X ABZ377n1/KU ABZ377n1 ABZ411\n" "MI DIŠ UD ŠU\n" "KI DU ABZ411 BA MA TI\n" "X MU TA MA UD\n" "ŠU/|BI×IS|") folios = Folios((Folio("WGL", "3"), Folio("XXX", "3"))) record = Record((RecordEntry("test", RecordType.TRANSLITERATION), )) line_to_vec = (( LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.TEXT_LINE, LineToVecEncoding.SINGLE_RULING, ), )
def test_status(status, expected_status): line = f"$ {status}" expected_line = StateDollarLine(None, None, None, None, expected_status) assert parse_atf_lark(line).lines == Text.of_iterable([expected_line ]).lines
def ebl_atf_dollar_line__state_status(self, status): return StateDollarLine(None, None, None, None, status)
def ebl_atf_dollar_line__state_scope(self, scope_container, state=None, status=None): return StateDollarLine(None, None, scope_container, state, status)
def ebl_atf_dollar_line__state_extent(self, extent, scope_container=None, state=None, status=None): return StateDollarLine(None, extent, scope_container, state, status)
( HeadingAtLine(2, (StringPart("foo"),)), { "prefix": "@", "content": [OneOfTokenSchema().dump(ValueToken.of("h2 foo"))], "type": "HeadingAtLine", "number": 2, "displayValue": "h2 foo", "parts": [{"type": "StringPart", "text": "foo"}], }, ), ( StateDollarLine( atf.Qualification.AT_LEAST, atf.Extent.BEGINNING_OF, ScopeContainer(atf.Surface.OBVERSE), atf.State.BLANK, atf.DollarStatus.UNCERTAIN, ), { "prefix": "$", "content": [ OneOfTokenSchema().dump( ValueToken.of(" at least beginning of obverse blank ?") ) ], "type": "StateDollarLine", "qualification": "AT_LEAST", "extent": "BEGINNING_OF", "scope": {"type": "Surface", "content": "OBVERSE", "text": ""}, "state": "BLANK",
def test_qualification(qualification, expected_qualification): line = f"$ {qualification}" expected_line = StateDollarLine(expected_qualification, None, None, None, None) assert parse_atf_lark(line).lines == Text.of_iterable([expected_line ]).lines
def test_state_dollar_line_non_empty_string_error() -> None: with pytest.raises(ValueError): StateDollarLine(None, None, ScopeContainer(atf.Surface.REVERSE, "test"), None, None)
def test_scope(scope, expected_scope): line = f"$ {scope}" expected_line = StateDollarLine(None, None, expected_scope, None, None) assert parse_atf_lark(line).lines == Text.of_iterable([expected_line ]).lines
def test_extent(extent, expected_extent): line = f"$ {extent}" expected_line = StateDollarLine(None, expected_extent, None, None, None) assert parse_atf_lark(line).lines == Text.of_iterable([expected_line ]).lines
from ebl.transliteration.domain import atf from ebl.transliteration.domain.dollar_line import ScopeContainer, StateDollarLine from ebl.transliteration.domain.lark_parser import parse_atf_lark from ebl.transliteration.domain.text import Text from ebl.transliteration.domain.transliteration_error import TransliterationError @pytest.mark.parametrize("prefix", ["$ ", "$"]) @pytest.mark.parametrize("parenthesis", [False, True]) @pytest.mark.parametrize( "line,expected_line", [ ( "2-4 lines missing", StateDollarLine(None, (2, 4), ScopeContainer(atf.Scope.LINES), atf.State.MISSING, None), ), ( "at least 1 obverse missing", StateDollarLine( atf.Qualification.AT_LEAST, 1, ScopeContainer(atf.Surface.OBVERSE, ""), atf.State.MISSING, None, ), ), ( "2 lines", StateDollarLine(None, 2, ScopeContainer(atf.Scope.LINES), None, None),
"#first\n \n#second", [ ControlLine("#", "first"), EmptyLine(), ControlLine("#", "second") ], ), ("&K11111", [ControlLine("&", "K11111")]), ("@reverse", [SurfaceAtLine(SurfaceLabel([], atf.Surface.REVERSE))]), ( "$ (end of side)", [ StateDollarLine( None, atf.Extent.END_OF, ScopeContainer(atf.Scope.SIDE, ""), None, None, ) ], ), ("#some notes", [ControlLine("#", "some notes")]), ("=: continuation", [ControlLine("=:", " continuation")]), ], ) def test_parse_atf(line: str, expected_tokens: List[Line]) -> None: assert parse_atf_lark(line).lines == Text.of_iterable( expected_tokens).lines @pytest.mark.parametrize(