Exemplo n.º 1
0
#Test 9: Find the degree of a hashtag
print 'TEST 9: Find the degree of a hashtag'
print tweetLibrary.get_hashtag_degree(sample_dictionary,'#Jobs') == 6
print tweetLibrary.get_hashtag_degree(sample_dictionary,'#hiring!') == 6
print tweetLibrary.get_hashtag_degree(sample_dictionary,'#LaLiga') == 1


#Test 10: Find average degree of a collection of hashtags 
print 'TEST 10: Find average degree'
print tweetLibrary.get_average_degree(sample_dictionary) == 129/float(54)
tweets_hashtags2 = []
for i in range(0,20):
	tweets_hashtags2.append(tweetLibrary.extract_hashtags(tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[i]))))
sample_dictionary2 = tweetLibrary.get_hashtag_dictionary(tweets_hashtags2)
print tweetLibrary.get_average_degree(sample_dictionary2) == 1.5
tweets_hashtags3 = []
for i in range(0,2000):
	tweets_hashtags3.append(tweetLibrary.extract_hashtags(tweetLibrary.get_tweet_text(tweetLibrary.decode_tweet(tweets[i]))))
sample_dictionary3 = tweetLibrary.get_hashtag_dictionary(tweets_hashtags3)
print tweetLibrary.get_average_degree(sample_dictionary3) == 1684/float(458)


#Test 11: Compare timestamps of a list of timestamps
tweet_timestamps = []
tweet_hashtags = []
for i in range(0,len(tweets)):
	tweet = tweetLibrary.decode_tweet(tweets[i])
	tweet_timestamps.append(int(tweetLibrary.get_ms_timestamp(tweet)))
	tweet_hashtags.append(tweetLibrary.extract_hashtags(tweetLibrary.get_tweet_text(tweet)))
tweetLibrary.compare_timestamp_list(tweet_timestamps)
Exemplo n.º 2
0
tweets = open('tweets.txt')
tweets = tweets.readlines()
tweet_timestamps = []
tweet_hashtags = []
output = open('tweet_output/ft2.txt', 'w') 

for i in range(0,len(tweets)):
	tweet = tweetLibrary.decode_tweet(tweets[i])
	tweet_timestamps.append(int(tweetLibrary.get_ms_timestamp(tweet)))
	tweet_hashtags.append(tweetLibrary.extract_hashtags(tweetLibrary.get_tweet_text(tweet)))

current_timestamps=[]
lowest_index = 0
for i in range(1,len(tweet_timestamps)):
	current_timestamps.append(tweet_timestamps[i]) 
	print i 
	#lowest_index is the lowest index of current_timestamps where the element at that index is
	#within 60 seconds of element i 
	lowest_index = tweetLibrary.compare_timestamp_list(current_timestamps)
	del current_timestamps[0:lowest_index]
	current_hashtags = tweet_hashtags[i-len(current_timestamps):i]
	output.write('Current average degree:')
	output.write(str(tweetLibrary.get_average_degree(tweetLibrary.get_hashtag_dictionary(current_hashtags))))
	output.write('\n')


output.close()