def get_sentiment(): """Function triggered when Flask API is requested Returns: str: JSON containing analysis results """ # USER REQUEST PARAMETERS hashtag = request.args.get('hashtag', '') if hashtag == "": return "Please specify a non null hashtag" nb_days = request.args.get('nb_days', 7, type=int) nb_days = int(min(max(nb_days, 1), 7)) nb_tweets = max(request.args.get('nb_tweets', nb_days * 10), nb_days, type=int) get_topic_words = bool(int(request.args.get('get_topic_words', "1"))) n_topics = request.args.get('n_topics', 1, type=int) n_words_per_topic = request.args.get('n_words_per_topic', 10, type=int) lda_passes = request.args.get('lda_passes', 4, type=int) return_tweets = bool(int(request.args.get('return_tweets', "0"))) language = request.args.get('language', "en") # TWITTER REQUEST PARAMETERS days_offsets = range(-nb_days + 1, 1) query_key_value = " -is:retweet -is:quote lang:" + language tweet_fields = "created_at,public_metrics,author_id" max_nb_tweets_per_day = nb_tweets // len(days_offsets) query_string = "#" + hashtag.strip() + query_key_value # COMPUTE RESULTS tweets = get_tweets(query_string, days_offsets, tweet_fields, max_nb_tweets_per_day, nb_tweets, search_tweets_args) sentiments_df, cleaned_tweets_texts, filtered_tweets_df = compute_sentiment( tweets, model, tokenizer) if get_topic_words: top_topics = get_topics_from_tweets( NLTK_DATA_PATH, cleaned_tweets_texts, n_topics=n_topics, n_words_per_topic=n_words_per_topic, n_passes=lda_passes, force_download=False) if return_tweets: sentiments_tweets_df = pd.concat( (sentiments_df, filtered_tweets_df.reset_index(drop=True)), axis=1) results = {"sentiments_json": sentiments_tweets_df.to_json()} else: results = {"sentiments_json": sentiments_df.to_json()} if get_topic_words: results["top_topics_json"] = top_topics.to_json() return json.dumps(results)
def main(): """ prints timeline and stats""" if len(argv) != 4: print 'usage: tweets.py screen_name tweet_count stat_count' return scr_name = argv[1] twt_count, stat_count = int(argv[2]), int(argv[3]) tweets = get_tweets(scr_name, twt_count) #print extract_tweets(tweets) print len(tweets), 'tweets displayed' tags, mentions = get_stats(tweets, scr_name, stat_count) print_stats(tags, stat_count) print_stats(mentions, stat_count)
def test_get_tweets_total_nb_tweets(collect_results_mock): # collect_results_mock mocks collect_results from twitter api expected_tweets = [[{"text": "hello"}, {}], [{"text": "hi"}, {}]] collect_results_mock.side_effect = expected_tweets expected_tweets = [et[0] for et in expected_tweets] total_nb_tweets = 10 query_string = "#december -is:retweet -is:quote lang:en" days_offsets = range(-1, 1) tweet_fields = " " max_nb_tweets_per_day = 2 search_tweets_args = {} returned_tweets = get_tweets(query_string, days_offsets, tweet_fields, max_nb_tweets_per_day, total_nb_tweets, search_tweets_args) assert len(returned_tweets) <= total_nb_tweets assert returned_tweets == expected_tweets
from utils import get_tweets, result_file, write_results from constants import * def tweets_place(tweets): tuples = \ tweets.filter(lambda t: t[COLUMNS.index('country_code')] == 'US' and t[COLUMNS.index('place_type')] == 'city')\ .map(lambda t: (t[COLUMNS.index('place_name')], 1))\ .aggregateByKey(0, (lambda x, y: x + y), (lambda rdd1, rdd2: rdd1 + rdd2))\ .sortByKey()\ .sortBy(lambda t: t[1], False)\ .map(lambda t: '%s\t%s' % (t[0], t[1]))\ .collect() return tuples if __name__ == "__main__": task = '5' tweets = get_tweets(task, False) result_file = open(result_file(task), "w") results = tweets_place(tweets) write_results(result_file, results, cols=['place_name', 'num_tweets'])
def __init__(self, trainf_name, testf_name, smoothing_factor): self.vocab, self.vocabLength = getFV(trainf_name) self.train_tweets = get_tweets(trainf_name) self.tweets_labeled_yes, self.tweets_labeled_no = split_into_classes(trainf_name) self.test_tweets = get_tweets(testf_name) self.smoothing_factor = smoothing_factor