Exemplos de AnnotatedTextHandler em Python, exemplos de breadability.annotated_text.AnnotatedTextHandler em Python

Exemplo n.º 1

0

Exibir arquivo

def test_simple_document():
    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("This is\nsimple text.", None), ),
    ]

Exemplo n.º 2

0

Exibir arquivo

def test_empty_paragraph():
    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("Paragraph", None), ),
    ]

Exemplo n.º 3

0

Exibir arquivo

def test_process_paragraph_with_chunked_text():
    handler = AnnotatedTextHandler()
    paragraph = handler._process_paragraph([
        (" 1", ("b", "del")),
        (" 2", ("b", "del")),
        (" 3", None),
        (" 4", None),
        (" 5", None),
        (" 6", ("em", )),
    ])

    assert paragraph == (
        ("1 2", ("b", "del")),
        ("3 4 5", None),
        ("6", ("em", )),
    )

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_process_paragraph_with_chunked_text():
    handler = AnnotatedTextHandler()
    paragraph = handler._process_paragraph([
        (" 1", ("b", "del")),
        (" 2", ("b", "del")),
        (" 3", None),
        (" 4", None),
        (" 5", None),
        (" 6", ("em",)),
    ])

    assert paragraph == (
        ("1 2", ("b", "del")),
        ("3 4 5", None),
        ("6", ("em",)),
    )

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: pombredanne/readability.py

    def test_process_paragraph_with_chunked_text(self):
        handler = AnnotatedTextHandler()
        paragraph = handler._process_paragraph([
            (" 1", ("b", "del")),
            (" 2", ("b", "del")),
            (" 3", None),
            (" 4", None),
            (" 5", None),
            (" 6", ("em",)),
        ])

        expected = (
            ("1 2", ("b", "del")),
            ("3 4 5", None),
            ("6", ("em",)),
        )
        self.assertEqual(paragraph, expected)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_simple_document():
    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("This is\nsimple text.", None),
        ),
    ]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_empty_paragraph():
    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("Paragraph", None),
        ),
    ]

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: pombredanne/readability.py

    def test_empty_paragraph(self):
        dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("Paragraph", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: pombredanne/readability.py

    def test_simple_document(self):
        dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("This is\nsimple text.", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)

Exemplo n.º 10

0

Exibir arquivo

def test_multiple_paragraphs():
    dom = fragment_fromstring(
        "<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (("1 first", None), ),
        (("2 second", None), ),
        (("3\nthird", None), ),
    ]

Exemplo n.º 11

0

Exibir arquivo

def test_single_annotation():
    dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em", )),
        ),
        (("last", None), ),
    ]

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_annotations_without_explicit_paragraph():
    dom = fragment_fromstring("<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("strong",)),
            ("hmm", ("b",)),
        ),
    ]

Exemplo n.º 13

0

Exibir arquivo

def test_annotations_without_explicit_paragraph():
    dom = fragment_fromstring(
        "<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("strong", )),
            ("hmm", ("b", )),
        ),
    ]

Exemplo n.º 14

0

Exibir arquivo

def test_include_heading():
    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))

    assert annotated_text == [
        (
            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"',
             ("h1", )),
            ("Toto je prvý odstavec a to je fajn.", None),
        ),
        (("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.",
          None), ),
    ]

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_recursive_annotation():
    dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em", "i")),
        ),
        (
            ("last", None),
        ),
    ]

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_include_heading():
    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))

    assert annotated_text == [
        (
            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"', ("h1",)),
            ("Toto je prvý odstavec a to je fajn.", None),
        ),
        (
            ("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.", None),
        ),
    ]

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: appscluster/breadability

    def test_single_annotation(self):
        dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("text", None),
                ("emphasis", ("em",)),
            ),
            (
                ("last", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: pombredanne/readability.py

    def test_recursive_annotation(self):
        dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
        annotated_text = AnnotatedTextHandler.parse(dom)

        expected = [
            (
                ("text", None),
                ("emphasis", ("em", "i")),
            ),
            (
                ("last", None),
            ),
        ]
        self.assertEqual(annotated_text, expected)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_annotated_text.py Projeto: bookieio/breadability

def test_multiple_paragraphs():
    dom = fragment_fromstring("<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

    assert annotated_text == [
        (
            ("1 first", None),
        ),
        (
            ("2 second", None),
        ),
        (
            ("3\nthird", None),
        ),
    ]