Пример #1
0
def updateGrapWith(tweet):
	
	#convert time to iso format, escape non-standard TZ-offset, assume UTC.
	tweetTime =  dt.datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y')

	if len(tweet['entities']['hashtags']) > 1:
		#remove unicode and upper/lower-case equivalents
		tweetTags = [replaceControlChars(tag['text'].encode('ascii','ignore')).lower() for tag in tweet['entities']['hashtags']]
		tweetTags = set(tweetTags)
		tweet = { 'time': tweetTime, 'hashtags': tweetTags }
	else:			
		tweet = { 'time': tweetTime, 'hashtags': [] }
	
	if len(tweet['hashtags']) < 2:
		tweet = { 'time': tweetTime }
		tweetWindow.append(tweet)
	else:
		tweetWindow.append(tweet)
		for tag in tweetTags:
			if not tag in graph: graph[tag] = []
			adjacentTags = [adjTag for adjTag in tweetTags if (adjTag != tag)]
			graph[tag].extend(adjacentTags) #the same adjacency can exists multiple times for tracking purposes
			
	while ((tweetTime - tweetWindow[0]["time"]).total_seconds() > 60):
		evictedTweet = tweetWindow.popleft()
		if 'hashtags' in evictedTweet:
			adjacenciesToRemove = evictedTweet['hashtags']
			for tag in adjacenciesToRemove:
				tagsCopy = set(adjacenciesToRemove)
				tagsCopy.remove(tag)
				for toRemove in tagsCopy:
					graph[tag].remove(toRemove)
					if not graph[tag]: del graph[tag]
	return graph	
Пример #2
0
def getTextAndTimestamp(tweet):
    cleanedText = replaceControlChars(tweet["text"].encode("ascii", "ignore"))
    time = tweet["created_at"]

    if tweet["text"] != cleanedText:
        getTextAndTimestamp.tweetsWithUnicode += 1

    return "%s (timestamp: %s)" % (cleanedText, time)