Пример #1
0
def test_find_in_ahref_internal():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_string = "Python 3.8.6 is now available"
    internal_links = page.find_in_ahref(search_string, external=False)
    all_links = page.find_in_ahref(search_string)
    check.is_true(all_links)
    check.equal(len(internal_links), 0)
Пример #2
0
def test_find_in_ahref_fuzzy():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_string = "donate"
    links = page.find_in_ahref(search_string, fuzzy_score=90)
    print(links)
    assert (
        str(links[0])
        == "https://psfmember.org/civicrm/contribute/transact?reset=1&id=2"
    )
Пример #3
0
def test_css_first():
    url = "https://www.getevents.nl"
    html_string = """<html>
    <head>
        <title>Een test pagina</title>
    </head>
    <body>
        <script>Een script tag</script>
        <style>Een style tag</style>
        <p class="select">Een stukje content</p>
    </body>
    </html>"""

    page = BasePage(html_string, url)
    node = page.css_first("p.select")
    assert node.text() == "Een stukje content"
Пример #4
0
def test_internal_links():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    nr_absolute_links = len(page.internal_links)
    domain_in_links = len(
        [i for i in page.internal_links if TEST_HTML_URL in i.absolute_url]
    )
    check.equal(nr_absolute_links, domain_in_links)
    check.is_true(page.internal_links)
Пример #5
0
def test_find_in_text():
    url = "https://www.google.com"
    html_string = """<html>
    <head>
        <title>Een test pagina</title>
    </head>
    <body>
        <script>Een script tag</script>
        <style>Een style tag</style>
        <p class="select">Een test tag</p>
    </body>
    </html>"""

    page = BasePage(html_string, url)
    nodes_p = page.find_in_text("p", "test")
    nodes_star = page.find_in_text("*", "test")
    check.equal(nodes_p[0].html, '<p class="select">Een test tag</p>')
    check.equal(len(nodes_star), 2)
Пример #6
0
def test_get_first_block():
    html_string = """<html>
    <head>
        <title>Test Page</title>
    </head>
    <body>
        <article>
            <ul class="blockParent">
                <li>test1</li>
                <li id="selectMe">test2</li>
                <li>test3</li>
            </ul>
        </article>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    li_node = page.css_first("li#selectMe")
    block_parent = page.get_first_block(li_node)
    assert block_parent.attributes["class"] == "blockParent"
Пример #7
0
def test_get_first_block_with_inner_inline():
    html_string = """<html>
    <head>
        <title>Test Page</title>
    </head>
    <body>
        <atricle>
            <ul class="BlockParent">
                <li>Test 1</li>
                <li>Test 2</li>
                <li>Test <strong id="SelectMe">Nog 1</strong></li>
            </ul>
        </article>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    strong_node = page.css_first("strong#SelectMe")
    block_parent = page.get_first_block(strong_node)
    assert block_parent.attributes["class"] == "BlockParent"
Пример #8
0
def test_next_page_rel_next():
    html_string = """<html>
    <head>
        <title>Page 1</title>
        <link rel="next" href="https://www.getevents.nl/p2" />
    </head>
    <body>
        <p>Een p tag</p>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    assert page.next_page_url == "https://www.getevents.nl/p2"
Пример #9
0
def test_next_page_url():
    html_string = """<html>
    <head>
        <title>Page 1</title>
    </head>
    <body>
        <p>Een p tag</p>
        <a href="/p2" class="next">Next</a>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    assert page.next_page_url == "https://www.getevents.nl/p2"
Пример #10
0
def test_base_url():
    url = (
        "https://www.google.com/search?source=hp&ei=gF1nX5HVE5GxkwWNkaWgAw&q=url+tester"
        "&oq=url+tester&gs_lcp=CgZwc3ktYWIQAzICCAAyAggAMgIIADIGCAAQFhAeMgYIABAWEB4yBggAE"
        "BYQHjIGCAAQFhAeMgYIABAWEB4yBggAEBYQHjIGCAAQFhAeOgsILhCxAxCDARCTAjoICAAQsQMQgwE6B"
        "QgAELEDOgUILhCxAzoICC4QsQMQgwE6AgguUJwKWMMVYL8WaAFwAHgAgAGAAYgBswaSAQM2LjOYAQCg"
        "AQGqAQdnd3Mtd2l6sAEA&sclient=psy-ab&ved=0ahUKEwiRld3T7_frAhWR2KQKHY1ICTQQ4dUDCA"
        "c&uact=5"
    )
    html_string = "<p>Een stukje html</p>"

    page = BasePage(html_string, url)
    assert page.scheme_domain == "https://www.google.com/"
Пример #11
0
def test_prev_page_rel_prev():
    html_string = """<html>
    <head>
        <title>Page 2</title>
        <link rel="prev" href="https://www.getevents.nl/"
    </head>
    <body>
        <p>Een p tag</p>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/p2"
    page = BasePage(html_string, mock_url)
    assert page.previous_page_url == "https://www.getevents.nl/"
Пример #12
0
def test_get_parent():
    html_string = """<html>
    <head>
        <title>Test page</title>
    </head>
    <body class="grand_parent">
        <article class="parent">
            <h1>Headline</h1>
            <p>Some text ...</p>
        </article>
        <sidebar>
            <div>
                Widget
            </div>
        </sidebar>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    p_node = page.css_first("p")
    parent = page.get_parent(p_node)
    grand_parent = page.get_parent(p_node, 2)
    check.equal(parent.attributes["class"], "parent")
    check.equal(grand_parent.attributes["class"], "grand_parent")
Пример #13
0
def test_get_children():
    html_string = """<html>
    <head>
        <title>Test page</title>
    </head>
    <body class="grand_parent">
        <article class="parent">
            <h1>Headline</h1>
            <p>Some text ...</p>
            <p>Antother tag</p>
            <button>Click me!</button>
        </article>
        <sidebar>
            <div>
                Widget
            </div>
        </sidebar>
    </body>
    </html>"""
    mock_url = "https://www.getevents.nl/"
    page = BasePage(html_string, mock_url)
    a_node = page.css_first("article")
    children = BasePage.get_children(a_node)
    assert len(children) == 4
Пример #14
0
def test_text():
    url = "https://www.google.com/"
    html_string = """<html>
    <head>
        <title>Een test pagina</title>
        <!--
        - DEze
        - Multiline comment moet eruit
        -->
    </head>
    <body>
        <!-- Deze comment moet eruit -->
        <script>Een script tag</script>
        <style>Een style tag</style>
        <p>Deze content moet bewaard blijven
        </p>
    </body>
    </html>"""

    page = BasePage(html_string, url)
    assert page.text == "Deze content moet bewaard blijven"
Пример #15
0
def test_find_in_text_regex():
    url = "https://www.google.com/"
    page = BasePage(TEST_HTML_1, url)
    reg = re.compile(r"t\wst")
    nodes = page.find_in_text_regex("p", reg)
    assert nodes[0].text() == "Een test tag"
Пример #16
0
def test_find_in_ahref_text():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_string = "documentation"
    links = page.find_in_ahref(search_string, in_path=False)
    assert links[0].href == "/doc/"
Пример #17
0
def test_links():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    assert len(page.links) == 197
Пример #18
0
def test_external_links():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    check.is_not_in(TEST_HTML_PAGE, [i.absolute_url for i in page.external_links])
    check.is_true(page.external_links)
Пример #19
0
def test_find_in_ahref():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_string = "learn more"
    first_found = page.find_in_ahref(search_string)[0]
    assert first_found.href == "/doc/"
Пример #20
0
def test_find_in_ahref_internal_searchlist():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_list = ["donate", "facebook"]
    links = page.find_in_ahref(search_list)
    print(links)
    assert len(links) == 2
Пример #21
0
def test_find_in_ahref_path():
    page = BasePage(TEST_HTML_PAGE, TEST_HTML_URL)
    search_string = "doc"
    links = page.find_in_ahref(search_string, in_text=False, internal=True)
    assert links[0].text.lower() == "documentation"