def test_incorrect_url():
    """
    Tests that the scrape_page function raises an assertion error
    when a url is entered with an incorrect domain name (not 'https://webmd.com...')
    """
    scraper = WebMDScraper()
    with pytest.raises(AssertionError):
        scraper.scrape_page(
            'https://www.drugs.com/comments/aripiprazole/abilify.html?page=1')
示例#2
0
def test_webmd_scrape_page():
    """Test webmd scrape page"""
    input_url = 'https://www.webmd.com/drugs/drugreview-1701-citalopram-oral.aspx?drugid=1701&drugname=citalopram-oral'
    webmd_scraper = WebMDScraper()
    webmd_scraper.scrape_page(input_url)
    assert webmd_scraper.review_list

    keys = list(webmd_scraper.review_list[-1].keys())
    assert 'comment' in keys
    assert 'effectiveness' in keys
    assert 'ease of use' in keys
    assert 'satisfaction' in keys
def test_scrape_page_with_parameters():
    """
    Tests to make sure that calling the scrape_page function
    on a scraper object with non-default parameters (collect_url
    and collect_user_id true) collects the correct types of
    data ('comment', 'rating', 'date', 'drug', 'url', and 'user id')
    """
    scraper = WebMDScraper(collect_user_ids=True, collect_urls=True)
    scraper.scrape_page(
        'https://www.webmd.com/drugs/drugreview-64439-abilify.aspx?drugid=64439&drugname=abilify'
    )
    data_collected = list(scraper.reviews[0].keys())
    assert len(data_collected) == 6
    assert 'user id' in data_collected
    assert 'url' in data_collected
def test_scrape_page_default_parameters():
    """
    Tests to make sure that calling the scrape_page function
    on a scraper object with default parameters collects the
    correct types of data ('comment', 'rating', 'date', and 'drug')
    and that the correct number of reviews (5) were collected
    """
    scraper = WebMDScraper()
    scraper.scrape_page(
        'https://www.webmd.com/drugs/drugreview-64439-abilify.aspx?drugid=64439&drugname=abilify'
    )
    data_collected = list(scraper.reviews[0].keys())
    assert len(data_collected) == 4
    assert 'comment' in data_collected
    assert 'rating' in data_collected
    assert 'date' in data_collected
    assert 'drug' in data_collected
    assert len(scraper.reviews) == 5
def test_no_reviews():
    """
    Tests that the scrape page function returns 0 when
    no reviews are found on the page
    """
    scraper = WebMDScraper()
    returned = scraper.scrape_page(
        'https://www.webmd.com/drugs/drugreview-155251-T-Plus-topical.aspx?drugid=155251&drugname=T-Plus-topical'
    )
    assert returned == 0