def evolving_list(self): # Skip Snaps with less than threshold tweets while len(self.Clustering.Snap["LOC"]) < self.rate_threshold: print "Found only %d tweets skipping %s" % ( len(self.Clustering.Snap["LOC"]), self.Clustering.Snap["TimeWindow"], ) self.Clustering.next() # go = raw_input('Look at next time snap %s ?'%self.Clustering.Snap['TimeWindow']) # if go in ['yes','y',1,'go']: while not self.Clustering.SnapIter.end: # Build clusters from tweetSnap labels = {} self.Clustering.build_clusters() # Collect indices of different clusters in dict for k, l in enumerate(self.Clustering.labels): labels.setdefault(l, []).append(k) # Make vocabulary of text from tokenized tweet vocabulary = get_vocabulary([text for text in self.Clustering.Snap["TEXT"]], self.tokenize) # Search for events in tweetSnap for event in self.buzz(labels, vocabulary): self.ResultDict = self.ResultDict.append(event, ignore_index=True) print event
def folium_map(self): """Generates a leaflet map with eventful tweets on the map""" # Skip Snaps with less than threshold tweets while len(self.Clustering.Snap["LOC"]) < self.rate_threshold: print "Found only %d tweet(s) skipping for timeWindow %s" % ( len(self.Clustering.Snap["LOC"]), self.Clustering.Snap["TimeWindow"], ) self.Clustering.next() # lat,lon = location[self.place]['latitude'],location[self.place]['longitude'] lat, lon = (42.3606249, -71.0591155) go = raw_input("Look at next time snap %s ?" % self.Clustering.Snap["TimeWindow"]) if go in ["yes", "y", 1, "go"]: # Build clusters from tweetSnap labels = {} self.Clustering.build_clusters() # Collect indices of different clusters in dict for k, l in enumerate(self.Clustering.labels): labels.setdefault(l, []).append(k) # Make vocabulary of text from tokenized tweet vocabulary = get_vocabulary(self.Clustering.Snap["TEXT"], self.tokenize) map_1 = folium.Map(location=[lat, lon], zoom_start=8, tiles="Stamen Terrain") # Search for events in tweetSnap for event in self.buzz(labels, vocabulary): popup = event.summary().encode("ascii", "ignore") print "Event :" + popup map_1.simple_marker(location=[event.location[0], event.location[1]], popup=popup) map_1.create_map(path="folium_map_%s.html" % self.Clustering.Snap["TimeWindow"]) del map_1
def print_vocabulary_report(db,scale=60*20,**kwargs): print "COLLECTING TWEETS...." TS = TweetSnap(db=db,timeWindow = scale,Placename2Geocode=False) print "COLLECTION OVER...." TIME_START = kwargs.get("TIME_START",time.gmtime(0)) TIME_END = kwargs.get("TIME_END",time.gmtime(time.time())) HotWordSize = kwargs.get("HotWordSize",8) if isinstance(TIME_START,str): TIME_START = time.gmtime(time.mktime(time.strptime(TIME_START,"%d %b %H:%M %Z %Y"))) if isinstance(TIME_END,str): TIME_END = time.gmtime(time.mktime(time.strptime(TIME_END,"%d %b %H:%M %Z %Y"))) TIME_DIFF = time.mktime(TIME_START) - time.mktime(TS.time_start) if TIME_DIFF>0: TS.move_on(TIME_DIFF-scale) volume = [] HotWordsList = [] ColorGradient = {} TweetCountDict = {} TimeList = [] while (TS.time_start<TIME_END and not TS.end): #Capture nextSnap and initialize time_start of next snap snap = TS.next() timeWindow = gmt_to_local(TS.time_start,make_string=True,format='%a %H:%M') #Volume of tweets volume.append(len(snap['LOC'])) #HotWords List Vocab_dict = dict(get_vocabulary(snap['TEXT']).most_common(HotWordSize)) TimeList.append(timeWindow) ColorGradient[timeWindow] = {} for word in Vocab_dict.keys(): ColorGradient[timeWindow][word] = Vocab_dict[word]/float(sum(Vocab_dict.values())) if word in TweetCountDict.keys(): TweetCountDict[word] += Vocab_dict[word] else: TweetCountDict[word] = Vocab_dict[word] print "LOOPING2" SortedTweetCount = sorted(TweetCountDict.iteritems(),key=operator.itemgetter(1)) WordList = [item[0] for item in SortedTweetCount] TweetCountArray = np.array([item[1] for item in SortedTweetCount],dtype=int) del SortedTweetCount ColorMap = np.empty([len(WordList),len(TimeList)],dtype=float) for rw,word in enumerate(WordList): for cl,timeWindow in enumerate(TimeList): if word in ColorGradient[timeWindow].keys(): ColorMap[rw][cl] = ColorGradient[timeWindow][word] else: ColorMap[rw][cl] = 0 ###PRINT RESULTS gs = gridspec.GridSpec(2,2,width_ratios=[1,2],height_ratios=[1,4]) gs.update(left=0.05,right=0.48,wspace=0.00000000000000000000000000000000000000005,hspace=0.00000000000000000000000000000000000000005) fig1 = plt.figure(figsize=(36,90),dpi=200) ax0 = fig1.add_subplot(gs[0,1]) ax1 = fig1.add_subplot(gs[1,1]) ax2 = fig1.add_subplot(gs[1,0]) ax3 = fig1.add_subplot(gs[0,0]) #TweetVolume ax0.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax0.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) ax0.set_axis_bgcolor('0.95') ASCII_WordList = [ word.encode('ascii','ignore') for word in WordList ] ax0.plot(np.arange(len(TimeList)),volume,label='NumberOfTweets',linewidth=0.75) ax0.legend(loc='upper left',ncol=4) ax0.set_xlim(0,len(TimeList)-1) ax0.xaxis.tick_top() ax0.yaxis.tick_right() ax0.set_xticks(np.arange(0,len(TimeList),5)) ax0.set_xticklabels(TimeList,rotation='vertical') #HotWordColorMap ax1.imshow(ColorMap,cmap=plt.cm.binary,vmin=ColorMap.min(),vmax=ColorMap.max(),aspect='auto',origin='lower') ax1.yaxis.tick_right() ax1.set_yticks(np.arange(len(WordList))) ax1.set_yticklabels(WordList) ax1.set_xticks(np.arange(0,len(TimeList),5)) ax1.set_xticklabels(TimeList,rotation='vertical') ax1.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax1.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) #TweetVolumeDistributionOverHotWords ax2.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax2.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) ax2.set_axis_bgcolor('0.95') ax2.invert_xaxis() ax2.barh(np.arange(len(WordList)),TweetCountArray,align='center') #add the numbers to the side of each bar PreviousValue = None for p, ch in zip(np.arange(len(WordList)), TweetCountArray): if ch!=PreviousValue: ax2.annotate(str(ch), xy=(ch + 2.5, p - 0.25), va='center') PreviousValue = ch else: continue ax2.set_yticks(np.arange(len(WordList))) ax2.set_yticklabels(WordList)#,rotation='horizontal') ax2.set_ylim(0,len(WordList)-1+0.25) #Plot table with assisting information #1. Date : Day, Date Year and TIME_START to TIME_END #2. TIME_START #3. TIME_END #4. TIME_WINDOW #5. No. of HotWords per TimeWindow #6. Total No. of unique HotWords Found #7. Max #of Tweets for HotWord & HotWord #8. Min #of Tweets for HotWord & HotWord #9. Max #of Tweets in a timeWindow & timeWindow #10.Mix #of Tweets in a timeWindow & timeWindow rowLabels = ['1. Date','2. Start time','3. End time','4. Time Window (seconds)','5. No.Of HotWords per TimeWindow','6. No. of unique hotwords','7. Max #of tweets for HotWord','8. Min #of tweets for HotWord','9. Max #of tweets in a time window','10. Min #of tweets in a time window'] DateStart = gmt_to_local(TIME_START,make_string=True,format='%a %d %b %Y') DateEnd = gmt_to_local(TIME_END,make_string=True,format='%a %d %b %Y') Date = DateStart if DateStart==DateEnd else DateStart+' to '+DateEnd start_time= gmt_to_local(TIME_START,make_string=True,format='%d %b %H:%M') end_time = gmt_to_local(TIME_END,make_string=True,format='%d %b %H:%M') cellText = [Date,start_time,end_time,scale,HotWordSize,len(set(WordList)),TweetCountArray.max(),TweetCountArray.min(),str(max(volume)),str(min(volume))] rowLabels.reverse() cellText.reverse() colLabels = ['Value'] for y, label, text in zip(range(len(cellText)),rowLabels,cellText): ax3.text(0.05,(float(y)/20)+0.05,s='%s : %s'%(label,text),size=20) ax3.xaxis.set_visible(False) ax3.yaxis.set_visible(False) fig1.savefig('%s_to_%spng'%(start_time,end_time),dpi=200,bbox_inches="tight") plt.close(fig1)