def test_incorrect_url(): """ Tests that the scrape_page function raises an assertion error when a url is entered with an incorrect domain name (not 'https://webmd.com...') """ scraper = WebMDScraper() with pytest.raises(AssertionError): scraper.scrape_page( 'https://www.drugs.com/comments/aripiprazole/abilify.html?page=1')
def test_webmd_scrape_page(): """Test webmd scrape page""" input_url = 'https://www.webmd.com/drugs/drugreview-1701-citalopram-oral.aspx?drugid=1701&drugname=citalopram-oral' webmd_scraper = WebMDScraper() webmd_scraper.scrape_page(input_url) assert webmd_scraper.review_list keys = list(webmd_scraper.review_list[-1].keys()) assert 'comment' in keys assert 'effectiveness' in keys assert 'ease of use' in keys assert 'satisfaction' in keys
def test_scrape_page_with_parameters(): """ Tests to make sure that calling the scrape_page function on a scraper object with non-default parameters (collect_url and collect_user_id true) collects the correct types of data ('comment', 'rating', 'date', 'drug', 'url', and 'user id') """ scraper = WebMDScraper(collect_user_ids=True, collect_urls=True) scraper.scrape_page( 'https://www.webmd.com/drugs/drugreview-64439-abilify.aspx?drugid=64439&drugname=abilify' ) data_collected = list(scraper.reviews[0].keys()) assert len(data_collected) == 6 assert 'user id' in data_collected assert 'url' in data_collected
def test_scrape_page_default_parameters(): """ Tests to make sure that calling the scrape_page function on a scraper object with default parameters collects the correct types of data ('comment', 'rating', 'date', and 'drug') and that the correct number of reviews (5) were collected """ scraper = WebMDScraper() scraper.scrape_page( 'https://www.webmd.com/drugs/drugreview-64439-abilify.aspx?drugid=64439&drugname=abilify' ) data_collected = list(scraper.reviews[0].keys()) assert len(data_collected) == 4 assert 'comment' in data_collected assert 'rating' in data_collected assert 'date' in data_collected assert 'drug' in data_collected assert len(scraper.reviews) == 5
def test_no_reviews(): """ Tests that the scrape page function returns 0 when no reviews are found on the page """ scraper = WebMDScraper() returned = scraper.scrape_page( 'https://www.webmd.com/drugs/drugreview-155251-T-Plus-topical.aspx?drugid=155251&drugname=T-Plus-topical' ) assert returned == 0