class TweetListener(StreamListener): # A listener handles tweets are the received from the stream. #This is a basic listener that just prints received tweets to standard output def __init__(self, outputfile, sentimentGraph): self.output_stream = outputfile # We load in the list of words and their log probabilities # self.happy_log_probs, self.sad_log_probs = readSentimentList('twitter_sentiment_list.csv') self.tweetHandler = TweetHandler(sentimentGraph) def on_data(self, data): try: keys = json.loads(data).keys() if('text' in keys and 'created_at' in keys): tweet_raw = json.loads(data)['text'] created_at = json.loads(data)['created_at'] self.tweetHandler.handleTweet(tweet_raw) # t = Tweet(tweet_raw) # t.compute() # self.sentimentGraph.plot(t.getValence(), t.getArousal(), tweet_raw) # print "["+created_at +"]\t" +tweet_raw+"\n" # blob = TextBlob(tweet, analyzer=NaiveBayesAnalyzer()) # self.output_stream.write("["+created_at +"]\t" +tweet_raw +"\n")# +str(blob.sentiment) +"\n\n") # self.output_stream.flush() except UnicodeEncodeError as uee: None return True def on_error(self, status): print status return False
def __init__(self, include_entities, stall_warning, teams, timeout=False): trackWords = '' for team in teams: trackWords = trackWords + team + ',' trackWords = trackWords[:-1] print trackWords self.post_params = {'include_entities': include_entities, 'stall_warning': stall_warning, 'track': trackWords} self.tweetHandler = TweetHandler(teams) #Set authorization tokens. self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret']) self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret']) self.conn = None self.buffer = '' self.timeout = timeout self.setup_connection()
def __init__(self, outputfile, sentimentGraph): self.output_stream = outputfile # We load in the list of words and their log probabilities # self.happy_log_probs, self.sad_log_probs = readSentimentList('twitter_sentiment_list.csv') self.tweetHandler = TweetHandler(sentimentGraph)
class TwitterStream: post_params = {} tweetHandler = None def __init__(self, include_entities, stall_warning, teams, timeout=False): trackWords = '' for team in teams: trackWords = trackWords + team + ',' trackWords = trackWords[:-1] print trackWords self.post_params = {'include_entities': include_entities, 'stall_warning': stall_warning, 'track': trackWords} self.tweetHandler = TweetHandler(teams) #Set authorization tokens. self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret']) self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret']) self.conn = None self.buffer = '' self.timeout = timeout self.setup_connection() def setup_connection(self): """ Create persistant HTTP connection to Streaming API endpoint using cURL.""" if self.conn: self.conn.close() self.buffer = '' self.conn = pycurl.Curl() # Restart connection if less than 1 byte/s is received during "timeout" seconds if isinstance(self.timeout, int): self.conn.setopt(pycurl.LOW_SPEED_LIMIT, 1) self.conn.setopt(pycurl.LOW_SPEED_TIME, self.timeout) self.conn.setopt(pycurl.URL, API_ENDPOINT_URL) self.conn.setopt(pycurl.USERAGENT, USER_AGENT) # Using gzip is optional but saves us bandwidth. self.conn.setopt(pycurl.ENCODING, 'deflate, gzip') self.conn.setopt(pycurl.POST, 1) self.conn.setopt(pycurl.POSTFIELDS, urllib.urlencode(self.post_params)) #Set connections details with authorization settings. self.conn.setopt(pycurl.HTTPHEADER, ['Host: stream.twitter.com', 'Authorization: %s' % self.get_oauth_header()]) # self.handle_tweet is the method that are called when new tweets arrive self.conn.setopt(pycurl.WRITEFUNCTION, self.handle_tweet) def get_oauth_header(self): """ Create and return OAuth header. """ params = {'oauth_version': '1.0', 'oauth_nonce': oauth.generate_nonce(), 'oauth_timestamp': int(time.time())} req = oauth.Request(method='POST', parameters=params, url='%s?%s' % (API_ENDPOINT_URL, urllib.urlencode(self.post_params))) req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), self.oauth_consumer, self.oauth_token) return req.to_header()['Authorization'].encode('utf-8') def start(self): """ Start listening to Streaming endpoint. Handle exceptions according to Twitter's recommendations. """ backoff_network_error = 0.25 backoff_http_error = 5 backoff_rate_limit = 60 while True: self.setup_connection() try: self.conn.perform() except: # Network error, use linear back off up to 16 seconds print 'Network error: %s' % self.conn.errstr() print 'Waiting %s seconds before trying again' % backoff_network_error time.sleep(backoff_network_error) backoff_network_error = min(backoff_network_error + 1, 16) continue # HTTP Error sc = self.conn.getinfo(pycurl.HTTP_CODE) if sc == 420: # Rate limit, use exponential back off starting with 1 minute and double each attempt print 'Rate limit, waiting %s seconds' % backoff_rate_limit time.sleep(backoff_rate_limit) backoff_rate_limit *= 2 else: # HTTP error, use exponential back off up to 320 seconds print 'HTTP error %s, %s' % (sc, self.conn.errstr()) print 'Waiting %s seconds' % backoff_http_error time.sleep(backoff_http_error) backoff_http_error = min(backoff_http_error * 2, 320) def handle_tweet(self, data): """ This method is called when data is received through Streaming endpoint.""" self.buffer += data if data.endswith('\r\n') and self.buffer.strip(): # complete message received message = json.loads(self.buffer) self.buffer = '' msg = '' if message.get('limit'): print 'Rate limiting caused us to miss %s tweets' % (message['limit'].get('track')) elif message.get('disconnect'): raise Exception('Got disconnect: %s' % message['disconnect'].get('reason')) elif message.get('warning'): print 'Got warning: %s' % message['warning'].get('message') else: #Tweet was received correctly. TweetHandler takes care of tweet string. self.tweetHandler.handleTweet(message.get('text'))
return i = -1 if is_reversed else 1 f = open(title + '.txt', mode='w') for j in range(0, N_USERS): next_item = heapq.heappop(q) f.write(detail1 + next_item[1] + detail2 + str((i * int(next_item[0])))) f.write('\n') f.close() if __name__ == '__main__': # TweetHandler th = TweetHandler() # number of users to show N_USERS = input("Enter number of users to show data for: ") # data sets tweets = [] user_number_of_tweets_overall = dict() user_number_of_tweets_per_hour = dict() user_number_of_followers = dict() number_of_retweets_per_tweet = dict() f = open('output.txt', 'r', encoding='utf-8') print('Reading output...') # go through each tweet