Пример #1
0
def test_extract_article_full_page_content_digest_node_indexes():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_simple_article_from_full_page_content_digest_node_indexes.json",
        content_digests=True,
        node_indexes=True)
Пример #2
0
def test_extract_article_list_items_node_indexes():
    check_extract_article(
        "list_items_full_page.html",
        "list_items_simple_article_from_full_page_node_indexes.json",
        node_indexes=True)
Пример #3
0
def test_extract_article_full_page():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_simple_article_from_full_page.json")
Пример #4
0
def test_extract_article_headers_and_non_paragraph_blockquote_text():
    check_extract_article(
        "davidwolfe.com-1_full_page.html",
        "davidwolfe.com-1_simple_article_from_full_page.json")
Пример #5
0
def test_extract_article_list_items_content_digests():
    check_extract_article(
        "list_items_full_page.html",
        "list_items_simple_article_from_full_page_content_digests.json",
        content_digests=True)
Пример #6
0
def test_extract_article_list_items():
    check_extract_article("list_items_full_page.html",
                          "list_items_simple_article_from_full_page.json")
Пример #7
0
def test_extract_article_unicode_normalisation():
    check_extract_article(
        "conservativehq.com-1_full_page.html",
        "conservativehq.com-1_simple_article_from_full_page.json")
Пример #8
0
def test_extract_article_non_article():
    check_extract_article("non_article_full_page.html",
                          "non_article_full_page.json")
Пример #9
0
def test_extract_simple_article_with_readability_js():
    check_extract_article(
        "plain-content-test_full_article.html",
        "plain-content-test_full_article_javascript.json",
        use_readability_js=True
    )
Пример #10
0
def test_extract_article_from_page_with_readability_js():
    check_extract_article(
        "addictinginfo.com-1_full_page.html",
        "addictinginfo.com-1_full_page_javascript.json",
        use_readability_js=True
    )