def linearRegressionObject(): AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("LinearRegressionObject","hillary clinton",["2/17/2016","2/18/2016","2/19/2016","2/20/2016","2/21/2016","2/22/2016","2/23/2016"],("GDELT",[1,2,3,7,4,3,10]),("Tweet",[20,10,15,20,5,17,21])) (text,data)= types.linearRegressionAnalysis() print text print data return 0
def regressionObject(): fake_tweets = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) #change these values to plot different points fake_polls = np.array([3, 2, 5, 0, 4, 10, 7, 0, 5]) AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("RegressionObject", "Bernie", "Dates I guess", fake_tweets, fake_polls) types.Interpolate()
def main(): dic = QueryingDavid.candidateCountRangeHourlyDict("trump","2016041100","2016041700") ordered_dic = collections.OrderedDict(sorted(dic.items())) dates =[] count =[] for item in ordered_dic: dates.append(item) count.append(float(ordered_dic[item])) AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("TimeSeriesObject","trump","Twitter",dates,count) text= types.timeSeriesAnalysis() return 0
def main(): candidate = sys.argv[1] start = sys.argv[2] end = sys.argv[3] tweets, dates = getTweets(candidate, start, end) gdelt = getGDELT(candidate, start, end) print 'Tweets: ' + str(tweets) print 'GDELT: ' + str(gdelt) types = AnalysisObjectFactory.createObject("RegressionObject", candidate, dates, gdelt, tweets)
def main(): buildModel(300000) AnalysisObjectFactory.initialFactory() list_of_userids = [] list_of_list_of_tweets = [] list_of_candidates = ["trump"] for candidate in list_of_candidates: list_of_userids = [] list_of_list_of_tweets = [] final_result = session.execute("select * from topTweeters where candidate_name = '" + candidate + "'") for user in final_result: list_of_userids.append(str(getattr(user, "user_id"))) list_of_list_of_tweets.append(getattr(user, "list_of_tweets")) types = AnalysisObjectFactory.createObject( "TweetAnalWord2Vec", candidate, ["3/6/2016", "4/5/2016"], list_of_userids, list_of_list_of_tweets ) # types = AnalysisObjectFactory.createObject("TweetAnalWord2Vec","trump",["2/17/2016","2/18/2016","2/19/2016","2/20/2016","2/21/2016","2/22/2016","2/23/2016"],['123','234'],[["Trump is a motherfucking Genius! Ban all Muslims is the best thing ever!","RT @kateloving: KING: Bill Clinton almost sounds like Donald Trump with jab https://t.co/5a2ze5YTc4","This is an exercise in taking Donald Trump at his word"," @GlobeIdeas editor @katiekings... https://t.co/vEa1mhKVDz via @jorgeramosnews"],["The TRUMP Fight Song [unofficial] - TRUMP 2016 https://t.co/9AnOeQV83V via @YouTube","Cheats abd liars in GOPe - time to write in Trump! #StopTheSteal help Trump #BurnDownTheGOP https://t.co/3z8qY8IW5Q","Trump nomination up to 200 people","Trump nomination up to 200 people"]]) types.sentimentAnalysis() return 0
def plotGDELT(candidate, start, end): GDELT_counts, dates = GetDailyGDELTCounts.getGDELTCounts(candidate, start, end) #change Nones to 0's i = 0 for count in GDELT_counts: if count == None: GDELT_counts[i] = 0 i += 1 #normalize counts types = AnalysisObjectFactory.createObject("GraphAnalysisNormalized", candidate, "twitter", 'dates go here', GDELT_counts) GDELT_counts = types.getlist_para_two() print 'Normalized: ' + str(GDELT_counts) + '\n' #generate x-axis x = list(range(0, len(GDELT_counts))) line, = plt.plot(GDELT_counts, 'b') global legend legend[0].append(line) legend[1].append('GDELT') return GDELT_counts
def plotTwitter(candidate, start, end): twitter_counts, dates = GetHourlyTweetsDayRange.hourlyTweets(candidate, start, end) #change Nones to 0's i = 0 for count in twitter_counts: if count == None: twitter_counts[i] = 0 i += 1 #turn hourly counts into daily counts daily_counts = [] i = 0 while i < len(twitter_counts): daily_counts.append(sum(twitter_counts[i:i+23])) i += 24 #normalize counts types = AnalysisObjectFactory.createObject("GraphAnalysisNormalized", candidate, "twitter", 'dates go here', daily_counts) daily_counts = types.getlist_para_two() print 'Normalized: ' + str(daily_counts) + '\n' num_counts = len(daily_counts) #generate x-axis x = list(range(0, len(daily_counts))) line, = plt.plot(daily_counts, 'r') plt.xticks(x, dates, rotation = 'vertical') plt.xlim(0, num_counts) plt.gcf().subplots_adjust(bottom=0.20) plt.title('Daily tweet counts and article counts for ' + candidate) global legend legend[0].append(line) legend[1].append('Tweets') return daily_counts
from AnalysisObjectFactory import AnalysisObjectFactory import numpy as np from sklearn import linear_model import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import pandas as pd import GetHourlyTweetsDayRange from RegressionAnalysis import RegressionObject import GetDailyGDELTCounts import sys AnalysisObjectFactory.initialFactory() def getTweets(candidate, start, end): #get the tweets twitter_counts, dates = GetHourlyTweetsDayRange.hourlyTweets(candidate, start, end) #change Nones to 0's i = 0 for count in twitter_counts: if count == None: twitter_counts[i] = 0 i += 1 #turn hourly counts into daily counts daily_counts = [] i = 0 while i < len(twitter_counts): daily_counts.append(sum(twitter_counts[i:i+23])) i += 24
def timeSeriesObject(): AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("TimeSeriesObject", "Trump", "Date", [1, 2, 3, 4, 5, 6, 7, 8], [10, 8, 4, 6, 7, 5, 9, 2]) print types.getlist_para_two()
def graphAnalysis(): AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("GraphAnalysisNormalized","hilary clinton","gdelt",["2/23/2016","2/22/2016"],[15,23]) print types.getlist_para_two()
def classificationAnalysis(): AnalysisObjectFactory.initialFactory() types = AnalysisObjectFactory.createObject("ClassificationAnalysis","2/23/2016","gdelt",["hilary clinton","donaldtrump","ted cruz"],[9,8,2]) print types.getlist_para_two()