def test_get_top_level_genre_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) genre_urls = scraper.get_top_level_genre_urls() assert_equal(len(genre_urls), 16) assert_equal(genre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2") assert_equal(genre_urls[1].href, "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
def test_get_subgenre_tags(): scraper = Scraper(read_text_from_file(first_page_filename)) subgenre_tags = scraper.get_subgenre_tags() assert_equal(len(subgenre_tags), 6) assert_equal(subgenre_tags[0].string, "Design") assert_equal(subgenre_tags[1].string, "Fashion & Beauty")
def test_get_top_level_genre_tags(): scraper = Scraper(read_text_from_file(first_page_filename)) genre_urls = scraper.get_top_level_genre_tags() assert_equal(len(genre_urls), 16) assert_equal(genre_urls[0].string, "Arts") assert_equal(genre_urls[1].string, "Business")
def test_get_subgenre_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) subgenre_urls = scraper.get_subgenre_urls() assert_equal(len(subgenre_urls), 6) assert_equal( subgenre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
def test_get_number_of_pages(): scraper = Scraper(read_text_from_file(page_num_filename)) num_pages = scraper.get_number_of_pages() assert_equal(num_pages, 7) scraper = Scraper(read_text_from_file(first_page_filename)) num_pages = scraper.get_number_of_pages() assert_equal(num_pages, 0)
def test_get_itunes_podcast_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) itunes_urls = scraper.get_itunes_podcast_urls() assert_equal(len(itunes_urls), 240) assert_equal( itunes_urls[0], "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2" )
def test_get_current_subgenre_tag(): # Test a page that has a subgenre scraper = Scraper(read_text_from_file(food_page_filename)) current_subgenre = scraper._get_currently_selected_subgenre_tag() assert_equal(current_subgenre.string, "Food") # Test a page that doesn't have one scraper = Scraper(read_text_from_file(first_page_filename)) current_subgenre = scraper._get_currently_selected_subgenre_tag() assert_equal(current_subgenre, None)
def test_get_top_level_genre_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) genre_urls = scraper.get_top_level_genre_urls() assert_equal(len(genre_urls), 16) assert_equal( genre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2") assert_equal( genre_urls[1].href, "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
def test_return_urls_not_in_history_real_tags_single_element(): fetcher = MockFetcher(fetch_values) driver = Driver(test_url3, fetcher) driver.populate_state() scraper = Scraper(text_from_file(test_url3_file)) tags = scraper.get_letter_urls() #Using two different scrapers because the tags will come from #two different scrapers in real life scraper2 = Scraper(text_from_file(test_url3_file)) tags2 = scraper2.get_letter_urls() driver.history = tags2[0] new_urls = driver.return_urls_not_in_history(tags) assert_equal(new_urls, tags[1:])
def test_get_current_page(): #Test a page with a number selected scraper = Scraper(read_text_from_file(page_num_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page.string, "1") #Test a page that's not page 1 scraper = Scraper(read_text_from_file(society_n2_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page.string, "2") #Test a page without a paginator scraper = Scraper(read_text_from_file(first_page_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page, None)
def test_get_current_letter(): #Test a page with a letter selected scraper = Scraper(read_text_from_file(page_num_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter.string, "A") #Test a page with a letter other than "A" scraper = Scraper(read_text_from_file(society_n2_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter.string, "N") #Test a page without a letter selected scraper = Scraper(read_text_from_file(first_page_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter, None)
def test_get_current_genre(): #Test that a page has a genre selected scraper = Scraper(read_text_from_file(first_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Arts") #Test that a page that isn't the base page scraper = Scraper(read_text_from_file(music_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Music") #Test a page that has a subgenre selected scraper = Scraper(read_text_from_file(food_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Arts")
def fetch(self, url): """ Get a url, Return a Scraper of the page Raises: HTTPError: if status code is not HTTP 200 OK """ r = requests.get(url) if r.status_code != 200: r.raise_for_status() scraper = Scraper(r.text) return scraper
def test_get_current_subgenre_tag(): #Test a page that has a subgenre scraper = Scraper(read_text_from_file(food_page_filename)) current_subgenre = scraper._get_currently_selected_subgenre_tag() assert_equal(current_subgenre.string, "Food") #Test a page that doesn't have one scraper = Scraper(read_text_from_file(first_page_filename)) current_subgenre = scraper._get_currently_selected_subgenre_tag() assert_equal(current_subgenre, None)
def test_get_current_page(): # Test a page with a number selected scraper = Scraper(read_text_from_file(page_num_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page.string, "1") # Test a page that's not page 1 scraper = Scraper(read_text_from_file(society_n2_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page.string, "2") # Test a page without a paginator scraper = Scraper(read_text_from_file(first_page_filename)) current_page = scraper.get_currently_selected_page() assert_equal(current_page, None)
def test_get_current_letter(): # Test a page with a letter selected scraper = Scraper(read_text_from_file(page_num_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter.string, "A") # Test a page with a letter other than "A" scraper = Scraper(read_text_from_file(society_n2_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter.string, "N") # Test a page without a letter selected scraper = Scraper(read_text_from_file(first_page_filename)) current_letter = scraper.get_currently_selected_letter() assert_equal(current_letter, None)
def test_get_current_genre(): # Test that a page has a genre selected scraper = Scraper(read_text_from_file(first_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Arts") # Test that a page that isn't the base page scraper = Scraper(read_text_from_file(music_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Music") # Test a page that has a subgenre selected scraper = Scraper(read_text_from_file(food_page_filename)) current_genre = scraper.get_currently_selected_genre() assert_equal(current_genre.string, "Arts")
def test_get_subgenre_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) subgenre_urls = scraper.get_subgenre_urls() assert_equal(len(subgenre_urls), 6) assert_equal(subgenre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
def test_get_letter_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) letter_tags = scraper.get_letter_urls() assert_equal(letter_tags[0].string, "A") assert_equal(letter_tags[-1].string, "#")
def test_get_page_urls(): scraper = Scraper(read_text_from_file(page_num_filename)) page_tags = scraper.get_page_urls() assert_equal(page_tags[0].string, "2") assert_equal(page_tags[-1].string, "7")
def test_get_itunes_podcast_urls(): scraper = Scraper(read_text_from_file(first_page_filename)) itunes_urls = scraper.get_itunes_podcast_urls() assert_equal(len(itunes_urls), 240) assert_equal(itunes_urls[0], "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2")
def test_make_soup(): scraper = Scraper(read_text_from_file(first_page_filename)) assert_equal(scraper.soup.title.string, "Arts - Podcasts Downloads on iTunes")
def fetch(self, url): text = text_from_file(self.return_values[url]) return Scraper(text)
def test_get_subgenre_tags_no_entries(): scraper = Scraper(read_text_from_file(music_page_filename)) subgenre_tags = scraper.get_subgenre_tags() assert_equal(subgenre_tags, None)
def fetch(self, url): return Scraper(self.return_text)