def extract_full_urls(tweets): for key, tweet in enumerate(tweets): tweet[u'full_urls'] = TweetAnalysis.get_full_urls( [t[u'expanded_url'] for t in tweet[u"entities"][u'urls']]) tweet[u'newser_top100'] = TweetAnalysis.get_urls_in_newser( tweet[u'full_urls']) tweet[u'newser_all'] = TweetAnalysis.get_urls_in_newser_all( tweet[u'full_urls']) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def extract_sentiment_scores(tweets): for key, tweet in enumerate(tweets): tweet[u'sentiment'] = {} try: tweet[u'sentiment'][ u'sentiwordnet'] = TweetAnalysis.calculate_tweet_sentiment( tweet['filtered_text']) except KeyError: tweet[u'sentiment'][u'sentiwordnet'] = 0 # try: # tweet[u'sentiment'][u'happiness'] = TweetAnalysis.calculate_tweet_happiness(tweet['filtered_text']) # except KeyError: # tweet[u'sentiment'][u'happiness'] = 0 # # try: # tweet[u'sentiment'][u'sadness'] = TweetAnalysis.calculate_tweet_sadness(tweet['filtered_text']) # except KeyError: # tweet[u'sentiment'][u'sadness'] = 0 # # try: # tweet[u'sentiment'][u'emoticon_sentiment'] = TweetAnalysis.get_emoticon_sentiment(tweet['filtered_text']) # except KeyError: # tweet[u'sentiment'][u'emoticon_sentiment'] = 0 sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") tweets = TweetSetAnalysis.calculate_tweet140_sentiment(tweets) return tweets
def get_full_urls(tweets): urls = [] for tweet in tweets: urls = TweetAnalysis.get_full_urls( tweet[u"entities"][u'urls'][u'expanded_url']) return urls
def extract_count_words(tweets): for key, tweet in enumerate(tweets): tweet[u'word_count'] = TweetAnalysis.word_count(tweet[u'text']) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def get_proper_nouns(tweets): tweet_texts = get_tweet_texts(tweets) nouns = [] for tweet_text in tweet_texts: nouns.append(TweetAnalysis.get_proper_nouns(tweet_text)) return nouns
def extract_punctuations(tweets): for key, tweet in enumerate(tweets): punctuations = TweetAnalysis.get_punctuation(tweet[u'nlp']) tweet[u'punctuations'] = punctuations tweet[u'punctuation_count'] = len(punctuations) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def extract_numericals(tweets): for key, tweet in enumerate(tweets): numericals = TweetAnalysis.get_numericals(tweet[u'nlp']) tweet[u'numerical_mentions'] = numericals tweet[u'numerical_count'] = len(numericals) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def extract_emoticons(tweets): for key, tweet in enumerate(tweets): emoticons = TweetAnalysis.get_emoticons(tweet[u'nlp']) tweet[u'emoticons'] = emoticons tweet[u'emoticons_count'] = len(emoticons) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def extract_proper_nouns(tweets): for key, tweet in enumerate(tweets): nouns = TweetAnalysis.get_proper_nouns(tweet[u'nlp']) tweet[u'proper_nouns'] = nouns tweet[u'proper_nouns_count'] = len(nouns) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets
def extract_wiki_entities(tweets): for key, tweet in enumerate(tweets): entities = TweetAnalysis.get_wiki_entities_count(tweet[u'text']) tweet[u'wiki_entities'] = entities tweet[u'wiki_entities_count'] = len(entities) sys.stdout.write("\r%d of %d" % (key, len(tweets) - 1)) sys.stdout.flush() sys.stdout.write("\n") return tweets