Exemplo n.º 1
0
tweet_1 =  tweetLibrary.decode_tweet(tweets[0])
tweet_2 =  tweetLibrary.decode_tweet(tweets[1])
tweet_3 =  tweetLibrary.decode_tweet(tweets[2])
tweet_4 =  tweetLibrary.decode_tweet(tweets[3])
print tweetLibrary.get_timestamp(tweet_1) == 'Thu Oct 29 17:51:50 +0000 2015'
print tweetLibrary.get_timestamp(tweet_2) == 'Thu Oct 29 17:51:51 +0000 2015'
print tweetLibrary.get_timestamp(tweet_3) == "Thu Oct 29 18:10:49 +0000 2015"
print tweetLibrary.get_timestamp(tweet_4) == "Thu Oct 29 18:10:49 +0000 2015"
print tweet_2["text"]
print tweet_1["text"]
print tweet_3["text"]
print tweet_4["text"]

#Test 3: extract hashtags
print 'TEST 3: Extracting Hashtags'
tweet_5 = tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[10]))
tweet_6 = tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[23]))
tweet_7 = tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[39]))
tweet_8 = tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[88]))
print tweetLibrary.extract_hashtags(tweet_5) == ['#Trump', '#Election', '#News']
print tweetLibrary.extract_hashtags(tweet_6) == ['#hiring', '#BusinessMgmt', '#NettempsJobs', '#MenloPark', '#Job', '#Jobs', '#CareerArc']
print tweetLibrary.extract_hashtags(tweet_7) ==  ['#Hangry',  '#Rhonda']
print tweetLibrary.extract_hashtags(tweet_8) == ['#TUAUSENCIA']

#Test 4: Type of tweets[i]
print 'TEST 4: Check that type of decoded tweet is dictionary'
print type(tweetLibrary.decode_tweet(tweets[3])) is dict
print type(tweetLibrary.decode_tweet(tweets[77])) is dict

#Test 5: Hashtag connections
print 'TEST 5: Hashtag connections'
Exemplo n.º 2
0
import json
import tweetLibrary


tweets = open('tweets.txt')
tweets = tweets.readlines()
tweet_timestamps = []
tweet_hashtags = []
output = open('tweet_output/ft2.txt', 'w') 

for i in range(0,len(tweets)):
	tweet = tweetLibrary.decode_tweet(tweets[i])
	tweet_timestamps.append(int(tweetLibrary.get_ms_timestamp(tweet)))
	tweet_hashtags.append(tweetLibrary.extract_hashtags(tweetLibrary.get_tweet_text(tweet)))

current_timestamps=[]
lowest_index = 0
for i in range(1,len(tweet_timestamps)):
	current_timestamps.append(tweet_timestamps[i]) 
	print i 
	#lowest_index is the lowest index of current_timestamps where the element at that index is
	#within 60 seconds of element i 
	lowest_index = tweetLibrary.compare_timestamp_list(current_timestamps)
	del current_timestamps[0:lowest_index]
	current_hashtags = tweet_hashtags[i-len(current_timestamps):i]
	output.write('Current average degree:')
	output.write(str(tweetLibrary.get_average_degree(tweetLibrary.get_hashtag_dictionary(current_hashtags))))
	output.write('\n')


output.close()