Exemplo n.º 1
0
	def retreive(self):

		min_time = None
		max_time = None

		for tw in self.TweetIter:

			TIME  = time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")

			if (tw['lat']<=self.bbox[0][0] and tw['lat']>=self.bbox[1][0]) and (tw['lon']>=self.bbox[0][1] and tw['lon']<=self.bbox[1][1] and TIME>=self.timerange[0]):
				TEXT   = '@'+tw['screen_name']+'\t _tweeted_ \t'+tw['text'] + '\t at_time \t' + gmt_to_local(TIME,make_string=True)
				self.tweets.append(TEXT)

				if min_time==None:
					min_time = TIME
				elif min_time>TIME:
					min_time = TIME

				if max_time==None:
					max_time = TIME
				elif max_time<TIME:
					max_time = TIME

			else:
				pass

			if TIME>self.timerange[1]:
				break
			else:
				pass

		#self.min_time = gmt_to_local(min_time,make_string=True)
		#self.max_time = gmt_to_local(max_time,make_string=True)

		return self.tweets
Exemplo n.º 2
0
	def retreive(self):

		for tw in self.TweetIter:

			if self.timerange!=None or self.bbox!=None:
				item                = {}
				item['screen_name'] = tw.split('\t _tweeted_ \t')[0]
				item['text']        = tw.split('\t _tweeted_ \t')[1].split('\t at_time \t')[0]
				item['created_at']  = tw.split('\t _tweeted_ \t')[1].split('\t at_time \t')[1]
				tw                  = item
			else:
				tw['created_at']  = gmt_to_local(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"),make_string=True)

			words_in_tweet = filter(lambda x: x.isalnum() and x not in ['USERNAME','URL','PHONENUMBER','TIME','NUMBER'],self.tokenize(tw['text']))
			if any(w in self.keywords for w in words_in_tweet):
				TEXT   = '@'+tw['screen_name']+'\t _tweeted_ \t'+tw['text'] + '\t at_time \t' + tw['created_at']
				self.tweets.append(TEXT)
			else:
				pass

		return self.tweets
Exemplo n.º 3
0
	def __iter__(self):

		#Initialize variables with class's current status
		time_start = self.time_start
		tw = self.tw
		UniqueUids = self.UniqueUids

		#Conditional variable needs initialization
		UserUnique     = False

		for item in self.ObjIter:

			#Tweets with no GPS are assigned place_name geocodes
			if item['lon']==0:
					item['lon'],item['lat'] = GetGeocode(item['place'])

			#Block all tweets outside place grid
			if item['lon']==0 or not (item['lon']>=self.Grid[0] and item['lon']<=self.Grid[2] and item['lat']>=self.Grid[1] and item['lat']<=self.Grid[3]):
				continue

			#Unfold tweet into its item variables and destroy tweet
			TEXT   = item['text']
			UID   = item['user_id']
			PLACE = item['place']
			LOC   = (float(item['lat']),float(item['lon']))
			TIME  = time.strptime(item['created_at'],"%a %b %d %H:%M:%S +0000 %Y")
			CREATED_AT = gmt_to_local(TIME,make_string=True)
			SCREEN_NAME = item['screen_name']
			del item

			#TimeWindow update
			shiftWindow = ((TIME<self.time_start) or (time.mktime(TIME)-time.mktime(self.time_start)>self.timeWindow)) and self.timeWindow!=-1

			if shiftWindow:
				#Capture a new timeWindow
				if len(self.tw['LOC'])!=0:
					#Create timestamps for start(stop) if timeWindow captured
					localstart =  time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(self.time_start)+time.mktime(time.localtime())-time.mktime(time.gmtime())))
					localend   =  time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(TIME)+time.mktime(time.localtime())-time.mktime(time.gmtime())))
					#Yield
					yield {'LOC':self.tw['LOC'],'TEXT':self.tw['TEXT'],'TimeWindow':[localstart,localend],'PLACE':self.tw['PLACE'],'CREATED_AT':self.tw['CREATED_AT'],'SCREEN_NAME':self.tw['SCREEN_NAME']}

				#Welcome new timeHashed
				self.time_start 		= TIME
				self.UniqueUids 		= []
				self.tw['LOC']			= []
				self.tw['TEXT']			= []
				self.tw['PLACE']    	= []
				self.tw['CREATED_AT'] 	= []
				self.tw['SCREEN_NAME']  = []


			#Check if UID has tweeted in this timeWindow before
			if UID not in self.UniqueUids and self.UsersUnique:
				self.UniqueUids += [UID]
				UserUnique = True

			#Write LOC and TEXT values to tweet dictionary
			if (self.UsersUnique and UserUnique) or (not self.UsersUnique):
				self.tw['LOC'].append(LOC)
				self.tw['TEXT'].append(TEXT)
				self.tw['PLACE'].append(PLACE)
				self.tw['CREATED_AT'].append(CREATED_AT)
				self.tw['SCREEN_NAME'].append(SCREEN_NAME)
				UserUnique = False

		if len(self.tw['LOC'])>0:
			#Create timestamps for start(stop) if timeWindow captured
			localstart =  time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(self.time_start)+time.mktime(time.localtime())-time.mktime(time.gmtime())))
			localend   =  time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(TIME)+time.mktime(time.localtime())-time.mktime(time.gmtime())))

			yield {'LOC':self.tw['LOC'],'TEXT':self.tw['TEXT'],'TimeWindow':[localstart,localend],'PLACE':self.tw['PLACE'],'CREATED_AT':self.tw['CREATED_AT'],'SCREEN_NAME':self.tw['SCREEN_NAME']}
Exemplo n.º 4
0
def print_vocabulary_report(db,scale=60*20,**kwargs):

  print "COLLECTING TWEETS...."
  TS = TweetSnap(db=db,timeWindow = scale,Placename2Geocode=False)
  print "COLLECTION OVER...."

  TIME_START = kwargs.get("TIME_START",time.gmtime(0))
  TIME_END   = kwargs.get("TIME_END",time.gmtime(time.time()))
  HotWordSize = kwargs.get("HotWordSize",8)

  if isinstance(TIME_START,str):
    TIME_START  = time.gmtime(time.mktime(time.strptime(TIME_START,"%d %b %H:%M %Z %Y")))
  if isinstance(TIME_END,str):
    TIME_END    = time.gmtime(time.mktime(time.strptime(TIME_END,"%d %b %H:%M %Z %Y")))

  TIME_DIFF   = time.mktime(TIME_START)  - time.mktime(TS.time_start)

  if TIME_DIFF>0:
    TS.move_on(TIME_DIFF-scale)

  volume = []
  HotWordsList = []
  ColorGradient = {}
  TweetCountDict    = {}
  TimeList      = []

  while (TS.time_start<TIME_END and not TS.end):

    #Capture nextSnap and initialize time_start of next snap
    snap = TS.next()
    timeWindow = gmt_to_local(TS.time_start,make_string=True,format='%a %H:%M')
    #Volume of tweets
    volume.append(len(snap['LOC']))

    #HotWords List
    Vocab_dict = dict(get_vocabulary(snap['TEXT']).most_common(HotWordSize))
    TimeList.append(timeWindow)

    ColorGradient[timeWindow] = {}

    for word in Vocab_dict.keys():
      ColorGradient[timeWindow][word] = Vocab_dict[word]/float(sum(Vocab_dict.values()))
      if word in TweetCountDict.keys():
        TweetCountDict[word] += Vocab_dict[word]
      else:
        TweetCountDict[word] = Vocab_dict[word]
    print "LOOPING2"



  SortedTweetCount = sorted(TweetCountDict.iteritems(),key=operator.itemgetter(1))
  WordList         = [item[0] for item in SortedTweetCount]
  TweetCountArray = np.array([item[1] for item in SortedTweetCount],dtype=int)
  del SortedTweetCount


  ColorMap = np.empty([len(WordList),len(TimeList)],dtype=float)

  for rw,word in enumerate(WordList):
    for cl,timeWindow in enumerate(TimeList):
      if word in ColorGradient[timeWindow].keys():
        ColorMap[rw][cl] = ColorGradient[timeWindow][word]
      else:
        ColorMap[rw][cl] = 0

  ###PRINT RESULTS
  gs      = gridspec.GridSpec(2,2,width_ratios=[1,2],height_ratios=[1,4])
  gs.update(left=0.05,right=0.48,wspace=0.00000000000000000000000000000000000000005,hspace=0.00000000000000000000000000000000000000005)

  fig1    = plt.figure(figsize=(36,90),dpi=200)


  ax0     = fig1.add_subplot(gs[0,1])
  ax1     = fig1.add_subplot(gs[1,1])
  ax2     = fig1.add_subplot(gs[1,0])
  ax3     = fig1.add_subplot(gs[0,0])

  #TweetVolume
  ax0.grid(True, 'major', color='w', linestyle='-', linewidth=0.7)
  ax0.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35)
  ax0.set_axis_bgcolor('0.95')

  ASCII_WordList = [ word.encode('ascii','ignore') for word in WordList ]
  ax0.plot(np.arange(len(TimeList)),volume,label='NumberOfTweets',linewidth=0.75)
  ax0.legend(loc='upper left',ncol=4)
  ax0.set_xlim(0,len(TimeList)-1)
  ax0.xaxis.tick_top()
  ax0.yaxis.tick_right()
  ax0.set_xticks(np.arange(0,len(TimeList),5))
  ax0.set_xticklabels(TimeList,rotation='vertical')

  #HotWordColorMap
  ax1.imshow(ColorMap,cmap=plt.cm.binary,vmin=ColorMap.min(),vmax=ColorMap.max(),aspect='auto',origin='lower')
  ax1.yaxis.tick_right()
  ax1.set_yticks(np.arange(len(WordList)))
  ax1.set_yticklabels(WordList)
  ax1.set_xticks(np.arange(0,len(TimeList),5))
  ax1.set_xticklabels(TimeList,rotation='vertical')

  ax1.grid(True, 'major', color='w', linestyle='-', linewidth=0.7)
  ax1.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35)

  #TweetVolumeDistributionOverHotWords
  ax2.grid(True, 'major', color='w', linestyle='-', linewidth=0.7)
  ax2.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35)
  ax2.set_axis_bgcolor('0.95')

  ax2.invert_xaxis()
  ax2.barh(np.arange(len(WordList)),TweetCountArray,align='center')

  #add the numbers to the side of each bar
  PreviousValue = None
  for p, ch in zip(np.arange(len(WordList)), TweetCountArray):
      if ch!=PreviousValue:
        ax2.annotate(str(ch), xy=(ch + 2.5, p - 0.25), va='center')
        PreviousValue = ch
      else:
        continue


  ax2.set_yticks(np.arange(len(WordList)))
  ax2.set_yticklabels(WordList)#,rotation='horizontal')
  ax2.set_ylim(0,len(WordList)-1+0.25)

  #Plot table with assisting information
  #1. Date : Day, Date Year and TIME_START to TIME_END
  #2. TIME_START
  #3. TIME_END
  #4. TIME_WINDOW
  #5. No. of HotWords per TimeWindow
  #6. Total No. of unique HotWords Found
  #7. Max #of Tweets for HotWord & HotWord
  #8. Min #of Tweets for HotWord & HotWord
  #9. Max #of Tweets in a timeWindow & timeWindow
  #10.Mix #of Tweets in a timeWindow & timeWindow

  rowLabels = ['1. Date','2. Start time','3. End time','4. Time Window (seconds)','5. No.Of HotWords per TimeWindow','6. No. of unique hotwords','7. Max #of tweets for HotWord','8. Min #of tweets for HotWord','9. Max #of tweets in a time window','10. Min #of tweets in a time window']
  DateStart = gmt_to_local(TIME_START,make_string=True,format='%a %d %b %Y')
  DateEnd   = gmt_to_local(TIME_END,make_string=True,format='%a %d %b %Y')
  Date      = DateStart if DateStart==DateEnd else DateStart+' to '+DateEnd
  start_time= gmt_to_local(TIME_START,make_string=True,format='%d %b %H:%M')
  end_time  = gmt_to_local(TIME_END,make_string=True,format='%d %b %H:%M')
  cellText  = [Date,start_time,end_time,scale,HotWordSize,len(set(WordList)),TweetCountArray.max(),TweetCountArray.min(),str(max(volume)),str(min(volume))]
  rowLabels.reverse()
  cellText.reverse()
  colLabels = ['Value']
  for y, label, text in zip(range(len(cellText)),rowLabels,cellText):
    ax3.text(0.05,(float(y)/20)+0.05,s='%s : %s'%(label,text),size=20)
  ax3.xaxis.set_visible(False)
  ax3.yaxis.set_visible(False)

  fig1.savefig('%s_to_%spng'%(start_time,end_time),dpi=200,bbox_inches="tight")
  plt.close(fig1)