Пример #1
0
def test_dump_line():
    text = Text(
        (
            TextLine.of_iterable(
                LineNumber(1),
                [
                    Word.of(
                        parts=[
                            Reading.of_name("ha"),
                            Joiner.hyphen(),
                            Reading.of_name("am"),
                        ]
                    )
                ],
            ),
            EmptyLine(),
            ControlLine("#", " comment"),
        ),
        "1.0.0",
    )

    assert TextSchema().dump(text) == {
        "lines": OneOfLineSchema().dump(text.lines, many=True),
        "parser_version": text.parser_version,
        "numberOfLines": 1,
    }
 def parse_line_(line: str, line_number: int):
     try:
         parsed_line = parse_line(line) if line else EmptyLine()
         validate_line(parsed_line)
         return parsed_line, None
     except PARSE_ERRORS as ex:
         return (None,
                 create_transliteration_error_data(ex, line, line_number))
 def make_manuscript_line(self, data: dict, **kwargs) -> ManuscriptLine:
     has_text_line = len(data["number"]) > 0
     lines = data["atf"].split("\n")
     try:
         text = (parse_text_line(f"{data['number']}. {lines[0]}")
                 if has_text_line else EmptyLine())
         paratext = lines[1:] if has_text_line else lines
         return ManuscriptLine(
             data["manuscript_id"],
             tuple(data["labels"]),
             text,
             tuple(parse_paratext(line) for line in paratext),
             tuple(data["omitted_words"]),
         )
     except PARSE_ERRORS as error:
         raise ValidationError(f"Invalid manuscript line: {data['atf']}.",
                               "atf") from error
def test_statistics(database, fragment_repository):
    database[COLLECTION].insert_many(
        [
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            TextLine(
                                LineNumber(1),
                                (
                                    Word.of([Reading.of_name("first")]),
                                    Word.of([Reading.of_name("line")]),
                                ),
                            ),
                            ControlLine("#", "ignore"),
                            EmptyLine(),
                        )
                    )
                )
            ),
            SCHEMA.dump(
                FragmentFactory.build(
                    text=Text(
                        (
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(1), (Word.of([Reading.of_name("second")]),)
                            ),
                            TextLine(
                                LineNumber(2), (Word.of([Reading.of_name("third")]),)
                            ),
                            ControlLine("#", "ignore"),
                            TextLine(
                                LineNumber(3), (Word.of([Reading.of_name("fourth")]),)
                            ),
                        )
                    )
                )
            ),
            SCHEMA.dump(FragmentFactory.build(text=Text())),
        ]
    )
    assert fragment_repository.count_transliterated_fragments() == 2
    assert fragment_repository.count_lines() == 4
 def empty_line(self, _):
     return EmptyLine()
from ebl.dictionary.domain.word import WordId
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.enclosure_tokens import BrokenAway, PerhapsBrokenAway
from ebl.transliteration.domain.line import ControlLine, EmptyLine, Line
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.sign_tokens import Reading
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.tokens import Joiner, LanguageShift
from ebl.transliteration.domain.word_tokens import Word


@pytest.mark.parametrize(  # pyre-ignore[56]
    "old,new,expected",
    [
        (
            EmptyLine(),
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
        ),
        (
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            ControlLine("#", " comment"),
            ControlLine("#", " comment"),
        ),
        (
            TextLine.of_iterable(LineNumber(1),
                                 [Word.of([Reading.of_name("bu")])]),
            TextLine.of_iterable(LineNumber(2),
def test_is_end_of_side(paratext, is_end) -> None:
    line = ManuscriptLineFactory.build(line=EmptyLine(), paratext=paratext)
    assert line.is_end_of_side is is_end
import pytest

from ebl.tests.factories.corpus import ManuscriptLineFactory
from ebl.transliteration.domain import atf
from ebl.transliteration.domain.dollar_line import StateDollarLine
from ebl.transliteration.domain.line import EmptyLine
from ebl.transliteration.domain.line_number import LineNumber
from ebl.transliteration.domain.text_line import TextLine
from ebl.transliteration.domain.note_line import NoteLine
from ebl.transliteration.domain.markup import StringPart


@pytest.mark.parametrize(
    "line,is_beginning",
    [
        (EmptyLine(), False),
        (TextLine(LineNumber(2)), False),
        (TextLine(LineNumber(1)), True),
    ],
)
def test_is_beginning_of_side(line, is_beginning) -> None:
    line = ManuscriptLineFactory.build(line=line)
    assert line.is_beginning_of_side is is_beginning


@pytest.mark.parametrize(  # pyre-ignore[56]
    "paratext,is_end",
    [
        (tuple(), False),
        ((NoteLine((StringPart("note"), )), ), False),
        ((StateDollarLine(None, atf.Extent.SEVERAL, None, None,
                           parse_text_line("1. kur")),
        ),
        (
            [f" {MANUSCRIPTS[0].siglum} 1. kur"],
            ManuscriptLine(MANUSCRIPTS[0].id, tuple(),
                           parse_text_line("1. kur")),
        ),
        (
            [
                f"{MANUSCRIPTS[0].siglum} o iii", "#note: a note",
                "$ single ruling"
            ],
            ManuscriptLine(
                MANUSCRIPTS[0].id,
                parse_labels("o iii"),
                EmptyLine(),
                (parse_paratext("#note: a note"),
                 parse_paratext("$ single ruling")),
            ),
        ),
        (
            [f"{MANUSCRIPTS[0].siglum}"],
            ManuscriptLine(MANUSCRIPTS[0].id, tuple(), EmptyLine()),
        ),
    ],
)
def test_parse_manuscript(lines, expected) -> None:
    atf = "\n".join(lines)
    assert parse_manuscript(atf) == expected

Пример #10
0
def test_empty_line() -> None:
    line = EmptyLine()

    assert line.lemmatization == tuple()
    assert line.key == f"EmptyLine⁞⁞{hash(line)}"
    assert line.atf == ""
Пример #11
0

def test_empty_line() -> None:
    line = EmptyLine()

    assert line.lemmatization == tuple()
    assert line.key == f"EmptyLine⁞⁞{hash(line)}"
    assert line.atf == ""


def test_control_line() -> None:
    prefix = "#"
    content = "only"
    line = ControlLine(prefix, content)

    assert line.prefix == prefix
    assert line.content == content
    assert line.key == f"ControlLine⁞#only⁞{hash(line)}"
    assert line.lemmatization == (LemmatizationToken(content),)


@pytest.mark.parametrize(  # pyre-ignore[56]
    "line,lemmatization",
    [
        (ControlLine("#", " a comment"), (LemmatizationToken(" a comment"),)),
        (EmptyLine(), tuple()),
    ],
)
def test_update_lemmatization(line, lemmatization) -> None:
    assert line.update_lemmatization(lemmatization) == line
    NOTE,
    (ManuscriptLine(MANUSCRIPT_ID, LABELS, MANUSCRIPT_TEXT_1, PARATEXT,
                    OMITTED_WORDS), ),
    PARALLEL_LINES,
)
LINE_1 = Line(
    LINE_NUMBER,
    (LINE_VARIANT_1, ),
    IS_SECOND_LINE_OF_PARALLELISM,
    IS_BEGINNING_OF_SECTION,
    TRANSLATION,
)

LINE_VARIANT_2 = LineVariant(
    LINE_RECONSTRUCTION, None,
    (ManuscriptLine(MANUSCRIPT_ID, tuple(), EmptyLine()), ))
LINE_2 = Line(LineNumber(2), (LINE_VARIANT_2, ))

MANUSCRIPT_TEXT_3 = attr.evolve(MANUSCRIPT_TEXT_1, line_number=LineNumber(3))
LINE_VARIANT_3 = LineVariant(
    LINE_RECONSTRUCTION,
    None,
    (ManuscriptLine(MANUSCRIPT_ID, tuple(), MANUSCRIPT_TEXT_3), ),
)
LINE_3 = Line(LineNumber(3), (LINE_VARIANT_3, ))

RECORD = Record(
    (Author("Author", "Test", AuthorRole.EDITOR, ""), ),
    (Translator("Author", "Test", "", "en"), ),
    "",
)
Пример #13
0
     Text.of_iterable([
         TextLine.of_iterable(
             LineNumber(1),
             [
                 Word.of([
                     Reading.of_name("ha"),
                     Joiner.hyphen(),
                     Reading.of_name("am"),
                 ])
             ],
         ),
         ControlLine("#", " comment"),
     ]),
 ),
 (
     Text.of_iterable([EmptyLine()]),
     Text.of_iterable([RulingDollarLine(atf.Ruling.SINGLE)]),
     Text.of_iterable([RulingDollarLine(atf.Ruling.SINGLE)]),
 ),
 (
     Text.of_iterable([
         RulingDollarLine(atf.Ruling.DOUBLE),
         RulingDollarLine(atf.Ruling.SINGLE),
         EmptyLine(),
     ]),
     Text.of_iterable(
         [RulingDollarLine(atf.Ruling.DOUBLE),
          EmptyLine()]),
     Text.of_iterable(
         [RulingDollarLine(atf.Ruling.DOUBLE),
          EmptyLine()]),
                         ).set_enclosure_type(
                             frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})
                         )
                     ]
                 ).set_enclosure_type(
                     frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})
                 ),
                 DocumentOrientedGloss.close().set_enclosure_type(
                     frozenset({EnclosureType.DOCUMENT_ORIENTED_GLOSS})
                 ),
             ],
             many=True,
         ),
     },
 ),
 (EmptyLine(), {"type": "EmptyLine", "prefix": "", "content": []}),
 (
     LooseDollarLine("end of side"),
     {
         "type": "LooseDollarLine",
         "prefix": "$",
         "content": [OneOfTokenSchema().dump(ValueToken.of(" (end of side)"))],
         "text": "end of side",
         "displayValue": "(end of side)",
     },
 ),
 (
     ImageDollarLine("1", "a", "great"),
     {
         "type": "ImageDollarLine",
         "prefix": "$",
Пример #15
0
 def make_line(self, data, **kwargs) -> EmptyLine:
     return EmptyLine()
Пример #16
0
            ControlLine("#", " comment"),
        ),
        "1.0.0",
    )

    assert TextSchema().dump(text) == {
        "lines": OneOfLineSchema().dump(text.lines, many=True),
        "parser_version": text.parser_version,
        "numberOfLines": 1,
    }


@pytest.mark.parametrize(
    "lines",
    [
        [EmptyLine()],
        [ControlLine("#", " comment")],
        [
            TextLine.of_iterable(
                LineNumber(1),
                [
                    Word.of(
                        unique_lemma=(WordId("nu I"),), parts=[Reading.of_name("nu")]
                    ),
                    Word.of(alignment=1, parts=[Reading.of_name("nu")]),
                    LanguageShift.of("%sux"),
                    LoneDeterminative.of(
                        [Determinative.of([Reading.of_name("nu")])],
                        language=Language.SUMERIAN,
                    ),
                    Erasure.open(),