Пример #1
0
def test_simple_document():
    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("This is\nsimple text.", None), ),
    ]
Пример #2
0
def test_empty_paragraph():
    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("Paragraph", None), ),
    ]
Пример #3
0
def test_process_paragraph_with_chunked_text():
    handler = AnnotatedTextHandler()
    paragraph = handler._process_paragraph([
        (" 1", ("b", "del")),
        (" 2", ("b", "del")),
        (" 3", None),
        (" 4", None),
        (" 5", None),
        (" 6", ("em", )),
    ])

    assert paragraph == (
        ("1 2", ("b", "del")),
        ("3 4 5", None),
        ("6", ("em", )),
    )
Пример #4
0
def test_process_paragraph_with_chunked_text():
    handler = AnnotatedTextHandler()
    paragraph = handler._process_paragraph([
        (" 1", ("b", "del")),
        (" 2", ("b", "del")),
        (" 3", None),
        (" 4", None),
        (" 5", None),
        (" 6", ("em",)),
    ])

    assert paragraph == (
        ("1 2", ("b", "del")),
        ("3 4 5", None),
        ("6", ("em",)),
    )
    def test_process_paragraph_with_chunked_text(self):
        handler = AnnotatedTextHandler()
        paragraph = handler._process_paragraph([
            (" 1", ("b", "del")),
            (" 2", ("b", "del")),
            (" 3", None),
            (" 4", None),
            (" 5", None),
            (" 6", ("em",)),
        ])

        expected = (
            ("1 2", ("b", "del")),
            ("3 4 5", None),
            ("6", ("em",)),
        )
        self.assertEqual(paragraph, expected)
Пример #6
0
def test_simple_document():
    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("This is\nsimple text.", None),
        ),
    ]
Пример #7
0
def test_empty_paragraph():
    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("Paragraph", None),
        ),
    ]
    def test_empty_paragraph(self):
        dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("Paragraph", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)
    def test_simple_document(self):
        dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("This is\nsimple text.", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)
Пример #10
0
def test_multiple_paragraphs():
    dom = fragment_fromstring(
        "<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("1 first", None), ),
        (("2 second", None), ),
        (("3\nthird", None), ),
    ]
Пример #11
0
def test_single_annotation():
    dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em", )),
        ),
        (("last", None), ),
    ]
Пример #12
0
def test_annotations_without_explicit_paragraph():
    dom = fragment_fromstring("<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("strong",)),
            ("hmm", ("b",)),
        ),
    ]
Пример #13
0
def test_annotations_without_explicit_paragraph():
    dom = fragment_fromstring(
        "<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("strong", )),
            ("hmm", ("b", )),
        ),
    ]
Пример #14
0
def test_include_heading():
    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))

    assert annotated_text == [
        (
            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"',
             ("h1", )),
            ("Toto je prvý odstavec a to je fajn.", None),
        ),
        (("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.",
          None), ),
    ]
Пример #15
0
def test_recursive_annotation():
    dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em", "i")),
        ),
        (
            ("last", None),
        ),
    ]
Пример #16
0
def test_include_heading():
    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))

    assert annotated_text == [
        (
            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"', ("h1",)),
            ("Toto je prvý odstavec a to je fajn.", None),
        ),
        (
            ("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.", None),
        ),
    ]
    def test_single_annotation(self):
        dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("text", None),
                ("emphasis", ("em",)),
            ),
            (
                ("last", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)
    def test_recursive_annotation(self):
        dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("text", None),
                ("emphasis", ("em", "i")),
            ),
            (
                ("last", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)
Пример #19
0
def test_multiple_paragraphs():
    dom = fragment_fromstring("<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("1 first", None),
        ),
        (
            ("2 second", None),
        ),
        (
            ("3\nthird", None),
        ),
    ]