示例#1
0
def test_get_movielist_movie_attributes():
    crit_scraper = CritickerScraper()
    raw_html = read_file(
        'test/fixtures/criticker-normal-movie-in-movie-list.html')
    html_info = BeautifulSoup(raw_html, "lxml").find('li')
    movie_info = crit_scraper.get_movielist_movie_attributes(html_info)
    assert set(movie_info.keys()) == {
        'crit_id', 'crit_url', 'title', 'year', 'date_added', 'my_ratings'
    }
    assert movie_info['my_ratings']['tijl'] == {'psi': 55}
    assert movie_info['crit_id'] == 26496
    assert movie_info[
        'crit_url'] == 'https://www.criticker.com/film/Issiz-adam/'
    assert movie_info['title'] == 'Issiz adam'
    assert movie_info['year'] == 2008
    assert 'date_added' in movie_info
    assert arrow.get(movie_info['date_added']).humanize() == 'just now'

    raw_html = read_file(
        'test/fixtures/criticker-rated-movie-in-movie-list.html')
    html_info = BeautifulSoup(raw_html, "lxml").find('li')
    movie_info = crit_scraper.get_movielist_movie_attributes(html_info)
    assert set(movie_info.keys()) == {
        'crit_id', 'crit_url', 'title', 'year', 'date_added', 'my_ratings'
    }
    assert movie_info['my_ratings']['tijl'] == {'rating': 61}

    raw_html = read_file(
        'test/fixtures/criticker-nopsi-movie-in-movie-list.html')
    html_info = BeautifulSoup(raw_html, "lxml").find('li')
    movie_info = crit_scraper.get_movielist_movie_attributes(html_info)
    assert set(movie_info.keys()) == {
        'crit_id', 'crit_url', 'title', 'year', 'date_added'
    }
示例#2
0
def test_get_movie_info_no_votes():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('https://www.criticker.com/film/16-Fathoms-Deep/',
              text=read_file('test/fixtures/criticker-16-fathoms-deep.html'))
        movie_info = crit_scraper.get_movie_info(
            'https://www.criticker.com/film/16-Fathoms-Deep/')
    assert movie_info.get('crit_votes') == 0
示例#3
0
def test_get_movie_info_no_poster():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('https://www.criticker.com/film/8-Tire-on-the-Ice/',
              text=read_file('test/fixtures/criticker-8-tire-on-the-ice.html'))
        movie_info = crit_scraper.get_movie_info(
            'https://www.criticker.com/film/8-Tire-on-the-Ice/')
    assert movie_info.get('poster_url') is None
示例#4
0
def test_get_movie_info_no_rating_of_my_own():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('http://www.criticker.com/film/The-Mask/',
              text=read_file('test/fixtures/criticker-the-mask.html'))
        movie_info = crit_scraper.get_movie_info(
            'http://www.criticker.com/film/The-Mask/')
    assert movie_info['my_ratings']['tijl'].get('rating') is None
示例#5
0
def test_get_movie_info_no_trailer():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('http://www.criticker.com/film/Daens/',
              text=read_file('test/fixtures/criticker-daens.html'))
        movie_info = crit_scraper.get_movie_info(
            'http://www.criticker.com/film/Daens/')
    assert movie_info['trailer_url'] is None
示例#6
0
def test_get_movie_list_html():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('https://www.criticker.com/films/?filter=or&view=all',
              text=read_file('test/fixtures/criticker-movie-list.html'))
        movie_list, nr_pages = crit_scraper.get_movie_list_html(
            'https://www.criticker.com/films/?filter=or&view=all')
    assert nr_pages == 2283
    assert len(movie_list) == 60
    assert isinstance(movie_list, ResultSet)
    assert isinstance(movie_list[0], Tag)
示例#7
0
def test_get_movie_info():
    crit_scraper = CritickerScraper()
    with requests_mock.mock() as m:
        m.get('http://www.criticker.com/film/The-Matrix/',
              text=read_file('test/fixtures/criticker-the-matrix.html'))
        movie_info = crit_scraper.get_movie_info(
            'http://www.criticker.com/film/The-Matrix/')
    assert movie_info[
        'poster_url'] == 'https://www.criticker.com/img/films/posters/The-Matrix.jpg'
    assert movie_info['imdbid'] == 133093
    assert movie_info['crit_rating'] == pytest.approx(7.71, 0.3)
    assert movie_info['crit_votes'] == pytest.approx(27493, 1000)
    assert movie_info['my_ratings']['tijl']['rating'] == 93
    assert movie_info['my_ratings']['tijl']['psi'] == pytest.approx(80, 10)
    assert movie_info[
        'trailer_url'] == 'https://www.youtube.com/watch?v=vKQi3bBA1y8'
示例#8
0
def test_get_movies_for_genre_page():
    create_test_tables(variant='updates')
    db = MySQLDatabase(schema='qmdb_test', env='tst')
    netflix_scraper = NetflixScraper(db)
    with requests_mock.Mocker() as m:
        url = 'https://unogs-unogs-v1.p.mashape.com/aaapi.cgi?q={query}-!1800,2050-!0,5-!0,10-!10673-!Any-!Any-!Any-' \
              '!Any-!{downloadable}&t=ns&cl=67&st=adv&ob=Relevance&p=1&sa=and'
        headers = {'X-RateLimit-requests-Remaining': '100'}
        m.get(url,
              text=read_file('test/fixtures/unogs-get-genre-page.html'),
              headers=headers)
        nr_pages, movies = netflix_scraper.get_movies_for_genre_page(
            10673, country_code=67, pagenr=1)
    assert nr_pages == 1
    assert len(movies) == 59
    assert set(movies[0].keys()) == {
        'netflix_id', 'netflix_title', 'netflix_rating', 'imdbid', 'crit_id',
        'unogs_updated'
    }
    remove_test_tables(db)