Exemplo n.º 1
0
def test_get_top_level_genre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_urls()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2")
    assert_equal(genre_urls[1].href, "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
Exemplo n.º 2
0
def test_get_subgenre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(len(subgenre_tags), 6)
    assert_equal(subgenre_tags[0].string, "Design")
    assert_equal(subgenre_tags[1].string, "Fashion & Beauty")
Exemplo n.º 3
0
def test_get_top_level_genre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_tags()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].string, "Arts")
    assert_equal(genre_urls[1].string, "Business")
Exemplo n.º 4
0
def test_get_top_level_genre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_tags()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].string, "Arts")
    assert_equal(genre_urls[1].string, "Business")
Exemplo n.º 5
0
def test_get_subgenre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(len(subgenre_tags), 6)
    assert_equal(subgenre_tags[0].string, "Design")
    assert_equal(subgenre_tags[1].string, "Fashion & Beauty")
Exemplo n.º 6
0
def test_get_subgenre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_urls = scraper.get_subgenre_urls()

    assert_equal(len(subgenre_urls), 6)
    assert_equal(
        subgenre_urls[0].href,
        "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
Exemplo n.º 7
0
def test_get_number_of_pages():
    scraper = Scraper(read_text_from_file(page_num_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 7)

    scraper = Scraper(read_text_from_file(first_page_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 0)
Exemplo n.º 8
0
def test_get_itunes_podcast_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    itunes_urls = scraper.get_itunes_podcast_urls()

    assert_equal(len(itunes_urls), 240)
    assert_equal(
        itunes_urls[0],
        "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2"
    )
Exemplo n.º 9
0
def test_get_current_subgenre_tag():
    # Test a page that has a subgenre
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre.string, "Food")

    # Test a page that doesn't have one
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre, None)
Exemplo n.º 10
0
def test_get_top_level_genre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_urls()

    assert_equal(len(genre_urls), 16)
    assert_equal(
        genre_urls[0].href,
        "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2")
    assert_equal(
        genre_urls[1].href,
        "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
Exemplo n.º 11
0
def test_return_urls_not_in_history_real_tags_single_element():
    fetcher = MockFetcher(fetch_values)
    driver = Driver(test_url3, fetcher)
    driver.populate_state()

    scraper = Scraper(text_from_file(test_url3_file))
    tags = scraper.get_letter_urls()
    #Using two different scrapers because the tags will come from 
    #two different scrapers in real life
    scraper2 = Scraper(text_from_file(test_url3_file))
    tags2 = scraper2.get_letter_urls()

    driver.history = tags2[0]
    new_urls = driver.return_urls_not_in_history(tags)
    assert_equal(new_urls, tags[1:])
Exemplo n.º 12
0
def test_get_current_page():
    #Test a page with a number selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "1")

    #Test a page that's not page 1
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "2")

    #Test a page without a paginator
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page, None)
Exemplo n.º 13
0
def test_get_current_letter():
    #Test a page with a letter selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "A")

    #Test a page with a letter other than "A"
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "N")

    #Test a page without a letter selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter, None)
Exemplo n.º 14
0
def test_get_current_genre():
    #Test that a page has a genre selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")

    #Test that a page that isn't the base page
    scraper = Scraper(read_text_from_file(music_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Music")

    #Test a page that has a subgenre selected
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")
Exemplo n.º 15
0
    def fetch(self, url):
        """
        Get a url, Return a Scraper of the page

        Raises:
            HTTPError: if status code is not HTTP 200 OK
        """
        r = requests.get(url)

        if r.status_code != 200:
            r.raise_for_status()
        scraper = Scraper(r.text)
        return scraper
Exemplo n.º 16
0
def test_get_number_of_pages():
    scraper = Scraper(read_text_from_file(page_num_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 7)

    scraper = Scraper(read_text_from_file(first_page_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 0)
Exemplo n.º 17
0
def test_get_current_subgenre_tag():
    #Test a page that has a subgenre
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre.string, "Food")

    #Test a page that doesn't have one
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre, None)
Exemplo n.º 18
0
def test_get_current_page():
    # Test a page with a number selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "1")

    # Test a page that's not page 1
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "2")

    # Test a page without a paginator
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page, None)
Exemplo n.º 19
0
def test_get_current_letter():
    # Test a page with a letter selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "A")

    # Test a page with a letter other than "A"
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "N")

    # Test a page without a letter selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter, None)
Exemplo n.º 20
0
def test_get_current_genre():
    # Test that a page has a genre selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")

    # Test that a page that isn't the base page
    scraper = Scraper(read_text_from_file(music_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Music")

    # Test a page that has a subgenre selected
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")
Exemplo n.º 21
0
def test_return_urls_not_in_history_real_tags_single_element():
    fetcher = MockFetcher(fetch_values)
    driver = Driver(test_url3, fetcher)
    driver.populate_state()

    scraper = Scraper(text_from_file(test_url3_file))
    tags = scraper.get_letter_urls()
    #Using two different scrapers because the tags will come from
    #two different scrapers in real life
    scraper2 = Scraper(text_from_file(test_url3_file))
    tags2 = scraper2.get_letter_urls()

    driver.history = tags2[0]
    new_urls = driver.return_urls_not_in_history(tags)
    assert_equal(new_urls, tags[1:])
Exemplo n.º 22
0
def test_get_subgenre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_urls = scraper.get_subgenre_urls()

    assert_equal(len(subgenre_urls), 6)
    assert_equal(subgenre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
Exemplo n.º 23
0
def test_get_letter_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    letter_tags = scraper.get_letter_urls()
    assert_equal(letter_tags[0].string, "A")
    assert_equal(letter_tags[-1].string, "#")
Exemplo n.º 24
0
def test_get_page_urls():
    scraper = Scraper(read_text_from_file(page_num_filename))
    page_tags = scraper.get_page_urls()
    assert_equal(page_tags[0].string, "2")
    assert_equal(page_tags[-1].string, "7")
Exemplo n.º 25
0
def test_get_itunes_podcast_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    itunes_urls = scraper.get_itunes_podcast_urls()

    assert_equal(len(itunes_urls), 240)
    assert_equal(itunes_urls[0], "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2")
Exemplo n.º 26
0
def test_get_page_urls():
    scraper = Scraper(read_text_from_file(page_num_filename))
    page_tags = scraper.get_page_urls()
    assert_equal(page_tags[0].string, "2")
    assert_equal(page_tags[-1].string, "7")
Exemplo n.º 27
0
def test_make_soup():
    scraper = Scraper(read_text_from_file(first_page_filename))
    assert_equal(scraper.soup.title.string,
                 "Arts - Podcasts Downloads on iTunes")
Exemplo n.º 28
0
 def fetch(self, url):
     text = text_from_file(self.return_values[url])
     return Scraper(text)
Exemplo n.º 29
0
def test_get_subgenre_tags_no_entries():
    scraper = Scraper(read_text_from_file(music_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(subgenre_tags, None)
Exemplo n.º 30
0
def test_get_subgenre_tags_no_entries():
    scraper = Scraper(read_text_from_file(music_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(subgenre_tags, None)
Exemplo n.º 31
0
def test_get_letter_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    letter_tags = scraper.get_letter_urls()
    assert_equal(letter_tags[0].string, "A")
    assert_equal(letter_tags[-1].string, "#")
Exemplo n.º 32
0
 def fetch(self, url):
     return Scraper(self.return_text)