class TweetListener(StreamListener):

	# A listener handles tweets are the received from the stream.
	#This is a basic listener that just prints received tweets to standard output
	def __init__(self, outputfile, sentimentGraph):
		self.output_stream =  outputfile
		# We load in the list of words and their log probabilities
		# self.happy_log_probs, self.sad_log_probs = readSentimentList('twitter_sentiment_list.csv')
		self.tweetHandler = TweetHandler(sentimentGraph)

	def on_data(self, data):
		try:
			keys = json.loads(data).keys()
			if('text' in keys and 'created_at' in keys):
				tweet_raw = json.loads(data)['text']

				created_at = json.loads(data)['created_at']

				self.tweetHandler.handleTweet(tweet_raw)
				# t = Tweet(tweet_raw)
				# t.compute()

				# self.sentimentGraph.plot(t.getValence(), t.getArousal(), tweet_raw)

				# print "["+created_at +"]\t" +tweet_raw+"\n"

	 			# blob = TextBlob(tweet, analyzer=NaiveBayesAnalyzer())
				# self.output_stream.write("["+created_at +"]\t" +tweet_raw +"\n")# +str(blob.sentiment) +"\n\n")
				# self.output_stream.flush()
		except UnicodeEncodeError as uee:
			None
		return True

	def on_error(self, status):
		print status
		return False
示例#2
0
    def __init__(self, include_entities, stall_warning, teams, timeout=False):
        trackWords = ''
        for team in teams:
            trackWords = trackWords + team + ','
        trackWords = trackWords[:-1]
        print trackWords
        self.post_params = {'include_entities': include_entities,
               'stall_warning': stall_warning,
               'track': trackWords}
        self.tweetHandler = TweetHandler(teams)

        #Set authorization tokens.
        self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret'])
        self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret'])
        self.conn = None
        self.buffer = ''
        self.timeout = timeout
        self.setup_connection()
	def __init__(self, outputfile, sentimentGraph):
		self.output_stream =  outputfile
		# We load in the list of words and their log probabilities
		# self.happy_log_probs, self.sad_log_probs = readSentimentList('twitter_sentiment_list.csv')
		self.tweetHandler = TweetHandler(sentimentGraph)
示例#4
0
class TwitterStream:

    post_params = {}
    tweetHandler = None

    def __init__(self, include_entities, stall_warning, teams, timeout=False):
        trackWords = ''
        for team in teams:
            trackWords = trackWords + team + ','
        trackWords = trackWords[:-1]
        print trackWords
        self.post_params = {'include_entities': include_entities,
               'stall_warning': stall_warning,
               'track': trackWords}
        self.tweetHandler = TweetHandler(teams)

        #Set authorization tokens.
        self.oauth_token = oauth.Token(key=OAUTH_KEYS['access_token_key'], secret=OAUTH_KEYS['access_token_secret'])
        self.oauth_consumer = oauth.Consumer(key=OAUTH_KEYS['consumer_key'], secret=OAUTH_KEYS['consumer_secret'])
        self.conn = None
        self.buffer = ''
        self.timeout = timeout
        self.setup_connection()

    def setup_connection(self):
        """ Create persistant HTTP connection to Streaming API endpoint using cURL."""
        if self.conn:
            self.conn.close()
            self.buffer = ''
        self.conn = pycurl.Curl()
        # Restart connection if less than 1 byte/s is received during "timeout" seconds
        if isinstance(self.timeout, int):
            self.conn.setopt(pycurl.LOW_SPEED_LIMIT, 1)
            self.conn.setopt(pycurl.LOW_SPEED_TIME, self.timeout)
        self.conn.setopt(pycurl.URL, API_ENDPOINT_URL)
        self.conn.setopt(pycurl.USERAGENT, USER_AGENT)
        # Using gzip is optional but saves us bandwidth.
        self.conn.setopt(pycurl.ENCODING, 'deflate, gzip')
        self.conn.setopt(pycurl.POST, 1)
        self.conn.setopt(pycurl.POSTFIELDS, urllib.urlencode(self.post_params))
        #Set connections details with authorization settings.
        self.conn.setopt(pycurl.HTTPHEADER, ['Host: stream.twitter.com',
                                             'Authorization: %s' % self.get_oauth_header()])
        # self.handle_tweet is the method that are called when new tweets arrive
        self.conn.setopt(pycurl.WRITEFUNCTION, self.handle_tweet)

    def get_oauth_header(self):
        """ Create and return OAuth header.
"""
        params = {'oauth_version': '1.0',
                  'oauth_nonce': oauth.generate_nonce(),
                  'oauth_timestamp': int(time.time())}
        req = oauth.Request(method='POST', parameters=params, url='%s?%s' % (API_ENDPOINT_URL,
                                                                             urllib.urlencode(self.post_params)))
        req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), self.oauth_consumer, self.oauth_token)
        return req.to_header()['Authorization'].encode('utf-8')

    def start(self):
        """ Start listening to Streaming endpoint.
Handle exceptions according to Twitter's recommendations.
"""
        backoff_network_error = 0.25
        backoff_http_error = 5
        backoff_rate_limit = 60
        while True:
            self.setup_connection()
            try:
                self.conn.perform()
            except:
                # Network error, use linear back off up to 16 seconds
                print 'Network error: %s' % self.conn.errstr()
                print 'Waiting %s seconds before trying again' % backoff_network_error
                time.sleep(backoff_network_error)
                backoff_network_error = min(backoff_network_error + 1, 16)
                continue
            # HTTP Error
            sc = self.conn.getinfo(pycurl.HTTP_CODE)
            if sc == 420:
                # Rate limit, use exponential back off starting with 1 minute and double each attempt
                print 'Rate limit, waiting %s seconds' % backoff_rate_limit
                time.sleep(backoff_rate_limit)
                backoff_rate_limit *= 2
            else:
                # HTTP error, use exponential back off up to 320 seconds
                print 'HTTP error %s, %s' % (sc, self.conn.errstr())
                print 'Waiting %s seconds' % backoff_http_error
                time.sleep(backoff_http_error)
                backoff_http_error = min(backoff_http_error * 2, 320)

    def handle_tweet(self, data):
        """ This method is called when data is received through Streaming endpoint."""
        self.buffer += data
        if data.endswith('\r\n') and self.buffer.strip():
            # complete message received
            message = json.loads(self.buffer)
            self.buffer = ''
            msg = ''
            if message.get('limit'):
                print 'Rate limiting caused us to miss %s tweets' % (message['limit'].get('track'))
            elif message.get('disconnect'):
                raise Exception('Got disconnect: %s' % message['disconnect'].get('reason'))
            elif message.get('warning'):
                print 'Got warning: %s' % message['warning'].get('message')
            else:
                #Tweet was received correctly. TweetHandler takes care of tweet string.
                self.tweetHandler.handleTweet(message.get('text'))
示例#5
0
        return

    i = -1 if is_reversed else 1

    f = open(title + '.txt', mode='w')
    for j in range(0, N_USERS):
        next_item = heapq.heappop(q)
        f.write(detail1 + next_item[1] + detail2 + str((i * int(next_item[0]))))
        f.write('\n')

    f.close()


if __name__ == '__main__':
    # TweetHandler
    th = TweetHandler()

    # number of users to show
    N_USERS = input("Enter number of users to show data for: ")

    # data sets
    tweets = []
    user_number_of_tweets_overall = dict()
    user_number_of_tweets_per_hour = dict()
    user_number_of_followers = dict()
    number_of_retweets_per_tweet = dict()

    f = open('output.txt', 'r', encoding='utf-8')

    print('Reading output...')
    # go through each tweet