def test_runner_exceptions(): class TokenExpiryExceptionWebClient(st.WebClient): count_dict = dict({ 'https://api.twitter.com/2/search/adaptive.json': 0, 'https://api.twitter.com/1.1/guest/activate.json': 0 }) def run_request( self, params: st.http_request.RequestDetails ) -> st.http_request.RequestResponse: self.count_dict[params.url] = self.count_dict[params.url] + 1 if params.url == 'https://api.twitter.com/2/search/adaptive.json': if self.count_dict[params.url] == 1: return st.http_request.RequestResponse(429, None) else: return st.http_request.RequestResponse(400, '') else: return st.http_request.RequestResponse( 200, '{"guest_token":"1350356785648062465"}') with pytest.raises(ScrapBatchBadResponse): search_tweets_task = st.SearchTweetsTask(all_words='#koronawirus') st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[], web_client=TokenExpiryExceptionWebClient(), auth_token_provider_factory=st.auth. SimpleAuthTokenProviderFactory()).run()
def _scrap_tweets_with_count_assert(count: int): phrase = '#covid19' search_tweets_task = st.SearchTweetsTask(all_words=phrase, tweets_limit=count) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() assert len(tweets_collector.get_scrapped_tweets()) == count
def test_return_tweets_from_user(): username = '******' search_tweets_task = st.SearchTweetsTask(from_username=username, tweets_limit=100) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition(tweets_collector.get_scrapped_tweets(), lambda tweet: tweet.user_name == username)
def test_exact_words(): exact_phrase = 'duda kaczyński kempa' search_tweets_task = st.SearchTweetsTask(exact_words=exact_phrase) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_raw_list(), lambda tweet: to_base_text( exact_phrase) in to_base_text(tweet.full_text))
def _run_test_between_dates(since: Arrow, until: Arrow): search_tweets_task = st.SearchTweetsTask(any_word="#koronawirus #covid19", since=since, until=until) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_scrapped_tweets(), lambda tweet: since <= tweet.created_at <= until)
def test_search_to_username(): username = '******' search_tweets_task = st.SearchTweetsTask(to_username=username, tweets_limit=100) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_scrapped_tweets(), lambda tweet: to_base_text(username) in to_base_text(tweet.full_text))
def _run_search_test_covid_tweets_in_language(language: st.Language): search_tweets_task = st.SearchTweetsTask(all_words='#covid19', tweets_limit=100, language=language) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_scrapped_tweets(), lambda tweet: tweet.lang == language.short_value)
def get_tweets_to_tweet_output_test(tweet_output: List[st.TweetOutput]): phrase = '#koronawirus' search_tweets_task = st.SearchTweetsTask( all_words=phrase, tweets_limit=200 ) st.TweetSearchRunner( search_tweets_task=search_tweets_task, tweet_outputs=tweet_output ).run()
def run_test_for_single_language(language: st.Language): search_tweets_task = st.SearchTweetsTask(all_words='#covid19', tweets_limit=10, language=language) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_raw_list(), lambda tweet: tweet.lang in language.short_value)
def search_by_hashtag(): phrase = '#koronawirus' search_tweets_task = st.SearchTweetsTask(all_words=phrase, tweets_limit=200) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() scrapped_tweets = tweets_collector.get_raw_list() assert all([ phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text ]) is True
def test_using_proxy_client(): task = st.SearchTweetsTask(all_words='#covid19', tweets_limit=200) proxy_client = st.RequestsWebClient( st.RequestsWebClientProxyConfig(http_proxy='http://localhost:3128', https_proxy='http://localhost:3128')) tweets_collector = st.CollectorTweetOutput() result = st.TweetSearchRunner(search_tweets_task=task, tweet_outputs=[tweets_collector], web_client=proxy_client).run() scrapped_tweets = tweets_collector.get_scrapped_tweets() assert isinstance(result, st.SearchTweetsResult) assert len(scrapped_tweets) == task.tweets_limit
def test_any_word(): any_phrase = 'kaczynski tusk' search_tweets_task = st.SearchTweetsTask(any_word=any_phrase, tweets_limit=100) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() tweet_list_assert_condition( tweets_collector.get_raw_list(), lambda tweet: contains_any_word(any_phrase, tweet.full_text) or contains_any_word(any_phrase, tweet.user_full_name ) or contains_any_word(any_phrase, tweet.user_name))
def twitter_report(): username = request.form['twitteruser'] if username: # Configure search_tweets_task = st.SearchTweetsTask(all_words='#covid19' ,tweets_count=20) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner(search_tweets_task=search_tweets_task,tweet_outputs=[tweets_collector, st.CsvTweetOutput('output_file.csv')]).run() tweets = tweets_collector.get_scrapped_tweets() return render_template("tweets.html", tweets=tweets, username=username) else: return render_template("error.html")
def test_return_tweets_objects(): phrase = '#koronawirus' search_tweets_task = st.SearchTweetsTask(all_words=phrase, tweets_limit=200) tweets_collector = st.CollectorTweetOutput() result = st.TweetSearchRunner(search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector]).run() scrapped_tweets = tweets_collector.get_scrapped_tweets() assert isinstance(result, st.SearchTweetsResult) assert result.downloaded_count == len(scrapped_tweets) assert result.downloaded_count > 0 assert all([ phrase in it.full_text for it in scrapped_tweets if phrase in it.full_text ]) is True
def test_search_as_replay(): search_tweets_task = st.SearchTweetsTask( all_words='#covid19', tweets_limit=500, replies_filter=st.RepliesFilter.ONLY_REPLIES ) tweets_collector = st.CollectorTweetOutput() st.TweetSearchRunner( search_tweets_task=search_tweets_task, tweet_outputs=[tweets_collector] ).run() tweet_list_assert_condition( tweets_collector.get_raw_list(), lambda tweet: len(tweet.in_reply_to_status_id_str + tweet.in_reply_to_user_id_str) > 0 )
def get_tweets() -> List[st.Tweet]: collect_tweet_output = st.CollectorTweetOutput() task = st.SearchTweetsTask(all_words="#covid19", tweets_limit=100) st.TweetSearchRunner(task, [collect_tweet_output]).run() return collect_tweet_output.get_scrapped_tweets()