Exemplo n.º 1
0
def test_extract_text_from_paragraph_that_contains_link():
    test_html = """
    <html>
    <p><a href="">Login</a> at this awesome website</p>
    </html>
    """

    soup = BeautifulSoup(test_html, 'html.parser')
    result_text = converter.extract_text(soup)

    assert result_text == "Login at this awesome website"
Exemplo n.º 2
0
def test_extract_text_with_comment_in_javascriptcode():
    test_html = """
    <html>
        <header>
        </header>
        <p>
            <script>
            <!-- Text in comment-->
            (function() {})
            </script>
        </p>
    </html>
    """

    soup = BeautifulSoup(test_html, 'html.parser')
    result_text = converter.extract_text(soup)

    assert result_text == ""
Exemplo n.º 3
0
def test_extract_text_from_multiple_paragraphs():
    test_html = """
    <html>
    <header>
        <script>
            (function () {
                log.console('test javascript');
            })
        </script>
    </header>
    <p>
    This is a test text
    </p>
    <p>
    Another test text!
    </p>
    </html>
    """

    soup = BeautifulSoup(test_html, 'html.parser')
    result_text = converter.extract_text(soup)

    assert result_text == "This is a test text Another test text!"