def test_http_client(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch): monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock( content=podcasts_page_1_html, raise_for_status=lambda: False, )) monkeypatch.chdir(tmpdir) http_client = HttpClient(delay=0) http_client.get(base_url)
def test_http_client_cache_read_exception(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch): monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock( content=podcasts_page_1_html, raise_for_status=lambda: False, )) monkeypatch.chdir(tmpdir) monkeypatch.setattr(os.path, 'isfile', lambda x: True) http_client = HttpClient(delay=0) with pytest.raises(DarkKeeperCacheReadError): http_client.get(base_url)
def test_http_client_timeout_exception(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch): monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock( content=podcasts_page_1_html, raise_for_status=raise_exception(exc_type=Timeout, exc_msg='Timeout.'), )) monkeypatch.chdir(tmpdir) http_client = HttpClient(delay=0) with pytest.raises(DarkKeeperRequestResponseError) as e: http_client.get(base_url) assert str(e.value) == 'Timeout.'
def build_kwargs_dark_keeper(base_url_raw, mongo_uri_raw): return dict( http_client=HttpClient( delay=0, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125', ), urls_storage=UrlsStorage(base_url=base_url_raw), data_storage=DataStorage(), export_mongo=ExportMongo(mongo_uri=mongo_uri_raw), )
data = [] for post_item in content.get_block_items( '.posts-list .posts-list-item'): post_data = dict( title=post_item.parse_text('.number-title'), desc=post_item.parse_text('.post-podcast-content'), mp3=post_item.parse_attr('.post-podcast-content audio', 'src'), ) if post_data['title'] and post_data['mp3']: data.append(post_data) return data if __name__ == '__main__': pk = DarkKeeper( http_client=HttpClient( delay=2, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125', ), parser=PodcastParser(), urls_storage=UrlsStorage(base_url='https://radio-t.com/'), data_storage=DataStorage(), export_mongo=ExportMongo( mongo_uri='mongodb://localhost/podcasts.radio-t.com'), ) pk.run()