示例#1
0
    def test_http_client(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch):
        monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock(
            content=podcasts_page_1_html,
            raise_for_status=lambda: False,
        ))
        monkeypatch.chdir(tmpdir)

        http_client = HttpClient(delay=0)
        http_client.get(base_url)
示例#2
0
    def test_http_client_cache_read_exception(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch):
        monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock(
            content=podcasts_page_1_html,
            raise_for_status=lambda: False,
        ))
        monkeypatch.chdir(tmpdir)
        monkeypatch.setattr(os.path, 'isfile', lambda x: True)

        http_client = HttpClient(delay=0)
        with pytest.raises(DarkKeeperCacheReadError):
            http_client.get(base_url)
示例#3
0
    def test_http_client_timeout_exception(self, base_url, podcasts_page_1_html, tmpdir, monkeypatch):
        monkeypatch.setattr(requests, 'get', lambda x, headers: MagicMock(
            content=podcasts_page_1_html,
            raise_for_status=raise_exception(exc_type=Timeout, exc_msg='Timeout.'),
        ))
        monkeypatch.chdir(tmpdir)

        http_client = HttpClient(delay=0)
        with pytest.raises(DarkKeeperRequestResponseError) as e:
            http_client.get(base_url)
        assert str(e.value) == 'Timeout.'
示例#4
0
def build_kwargs_dark_keeper(base_url_raw, mongo_uri_raw):
    return dict(
        http_client=HttpClient(
            delay=0,
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125',
        ),
        urls_storage=UrlsStorage(base_url=base_url_raw),
        data_storage=DataStorage(),
        export_mongo=ExportMongo(mongo_uri=mongo_uri_raw),
    )
示例#5
0
        data = []
        for post_item in content.get_block_items(
                '.posts-list .posts-list-item'):
            post_data = dict(
                title=post_item.parse_text('.number-title'),
                desc=post_item.parse_text('.post-podcast-content'),
                mp3=post_item.parse_attr('.post-podcast-content audio', 'src'),
            )

            if post_data['title'] and post_data['mp3']:
                data.append(post_data)

        return data


if __name__ == '__main__':
    pk = DarkKeeper(
        http_client=HttpClient(
            delay=2,
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125',
        ),
        parser=PodcastParser(),
        urls_storage=UrlsStorage(base_url='https://radio-t.com/'),
        data_storage=DataStorage(),
        export_mongo=ExportMongo(
            mongo_uri='mongodb://localhost/podcasts.radio-t.com'),
    )
    pk.run()