예제 #1
0
def get_sentiment():
    """Function triggered when Flask API is requested

    Returns:
        str: JSON containing analysis results

    """
    # USER REQUEST PARAMETERS
    hashtag = request.args.get('hashtag', '')
    if hashtag == "":
        return "Please specify a non null hashtag"
    nb_days = request.args.get('nb_days', 7, type=int)
    nb_days = int(min(max(nb_days, 1), 7))
    nb_tweets = max(request.args.get('nb_tweets', nb_days * 10),
                    nb_days,
                    type=int)
    get_topic_words = bool(int(request.args.get('get_topic_words', "1")))
    n_topics = request.args.get('n_topics', 1, type=int)
    n_words_per_topic = request.args.get('n_words_per_topic', 10, type=int)
    lda_passes = request.args.get('lda_passes', 4, type=int)
    return_tweets = bool(int(request.args.get('return_tweets', "0")))
    language = request.args.get('language', "en")

    # TWITTER REQUEST PARAMETERS
    days_offsets = range(-nb_days + 1, 1)
    query_key_value = " -is:retweet -is:quote lang:" + language
    tweet_fields = "created_at,public_metrics,author_id"
    max_nb_tweets_per_day = nb_tweets // len(days_offsets)
    query_string = "#" + hashtag.strip() + query_key_value

    # COMPUTE RESULTS
    tweets = get_tweets(query_string, days_offsets, tweet_fields,
                        max_nb_tweets_per_day, nb_tweets, search_tweets_args)
    sentiments_df, cleaned_tweets_texts, filtered_tweets_df = compute_sentiment(
        tweets, model, tokenizer)

    if get_topic_words:
        top_topics = get_topics_from_tweets(
            NLTK_DATA_PATH,
            cleaned_tweets_texts,
            n_topics=n_topics,
            n_words_per_topic=n_words_per_topic,
            n_passes=lda_passes,
            force_download=False)

    if return_tweets:
        sentiments_tweets_df = pd.concat(
            (sentiments_df, filtered_tweets_df.reset_index(drop=True)), axis=1)

        results = {"sentiments_json": sentiments_tweets_df.to_json()}
    else:
        results = {"sentiments_json": sentiments_df.to_json()}

    if get_topic_words:
        results["top_topics_json"] = top_topics.to_json()

    return json.dumps(results)
예제 #2
0
파일: tweets.py 프로젝트: cshintov/timeline
def main():
    """ prints timeline  and stats"""
    if len(argv) != 4:
        print 'usage: tweets.py screen_name tweet_count stat_count'
        return
    scr_name = argv[1]
    twt_count, stat_count = int(argv[2]), int(argv[3])
    tweets = get_tweets(scr_name, twt_count)
    #print extract_tweets(tweets)
    print len(tweets), 'tweets displayed'
    tags, mentions = get_stats(tweets, scr_name, stat_count)
    print_stats(tags, stat_count)
    print_stats(mentions, stat_count)
예제 #3
0
def test_get_tweets_total_nb_tweets(collect_results_mock):
    # collect_results_mock mocks collect_results from twitter api
    expected_tweets = [[{"text": "hello"}, {}], [{"text": "hi"}, {}]]
    collect_results_mock.side_effect = expected_tweets
    expected_tweets = [et[0] for et in expected_tweets]
    total_nb_tweets = 10
    query_string = "#december -is:retweet -is:quote lang:en"
    days_offsets = range(-1, 1)
    tweet_fields = " "
    max_nb_tweets_per_day = 2
    search_tweets_args = {}
    returned_tweets = get_tweets(query_string, days_offsets, tweet_fields,
                                 max_nb_tweets_per_day, total_nb_tweets,
                                 search_tweets_args)
    assert len(returned_tweets) <= total_nb_tweets
    assert returned_tweets == expected_tweets
예제 #4
0
from utils import get_tweets, result_file, write_results
from constants import *


def tweets_place(tweets):
    tuples = \
        tweets.filter(lambda t: t[COLUMNS.index('country_code')] == 'US' and t[COLUMNS.index('place_type')] == 'city')\
        .map(lambda t: (t[COLUMNS.index('place_name')], 1))\
        .aggregateByKey(0, (lambda x, y: x + y), (lambda rdd1, rdd2: rdd1 + rdd2))\
        .sortByKey()\
        .sortBy(lambda t: t[1], False)\
        .map(lambda t: '%s\t%s' % (t[0], t[1]))\
        .collect()

    return tuples


if __name__ == "__main__":
    task = '5'
    tweets = get_tweets(task, False)

    result_file = open(result_file(task), "w")

    results = tweets_place(tweets)

    write_results(result_file, results, cols=['place_name', 'num_tweets'])
예제 #5
0
 def __init__(self, trainf_name, testf_name, smoothing_factor):
   self.vocab, self.vocabLength = getFV(trainf_name)
   self.train_tweets = get_tweets(trainf_name)
   self.tweets_labeled_yes, self.tweets_labeled_no = split_into_classes(trainf_name)
   self.test_tweets = get_tweets(testf_name)
   self.smoothing_factor = smoothing_factor