class Twitter: def __init__(self, **kwargs): # Instantiate Twitter API # Set env variables with values from https://developer.twitter.com/apps self.api = Api( consumer_key=environ['TWITTER_CONSUMER_KEY'], consumer_secret=environ['TWITTER_CONSUMER_SECRET'], access_token_key=environ['TWITTER_ACCESS_TOKEN_KEY'], access_token_secret=environ['TWITTER_ACCESS_TOKEN_SECRET'], ) def stream_sample(self): # backoff to avoid being put in timeout by Twitter if errors occur backoff = 1 while True: try: for tweet in self.api.GetStreamSample(): # Reset backoff since request succeeded backoff = 1 yield tweet except Exception as e: # Sometimes GetStreamSample connection fails sleep(backoff) # exponential backoff for repeated errors backoff *= 2
def __init__(self): # keys and tokens from the Twitter Dev Console consumer_key = '5a1tL5vH7qJ5k8btGtjFsziQb' consumer_secret = 'FC5nqWXQkJVKs1fmrb5InuDe0T7RE2H2FBWcqnoWNZXUZRe0lz' access_token = '1329456000-xxF6gN0EtV3kJueCWIagMEehqU1CNztcXFo0WlD' access_token_secret = 'mzCMqkWOLtOaWvQ1CRf8YztDgWCnfSKmXFaSa3VBXKqMY' api = Api(consumer_key, consumer_secret, access_token, access_token_secret) def preprocess(tweet, ascii=True, ignore_rt_char=True, ignore_url=True, ignore_mention=True, ignore_hashtag=True, letter_only=True, remove_stopwords=True, min_tweet_len=3): sword = stopwords.words('english') if ascii: # maybe remove lines with ANY non-ascii character for c in tweet: if not (0 < ord(c) < 127): return '' tokens = tweet.lower().split() # to lower, split res = [] for token in tokens: if remove_stopwords and token in sword: # ignore stopword continue if ignore_rt_char and token == 'rt': # ignore 'retweet' symbol continue if ignore_url and token.startswith('https:'): # ignore url continue if ignore_mention and token.startswith('@'): # ignore mentions continue if ignore_hashtag and token.startswith('#'): # ignore hashtags continue if letter_only: # ignore digits if not token.isalpha(): continue elif token.isdigit(): # otherwise unify digits token = '<num>' res += token, # append token if min_tweet_len and len( res) < min_tweet_len: # ignore tweets few than n tokens return '' else: return ' '.join(res) for line in api.GetStreamSample(): if 'text' in line and line['lang'] == u'en': # step 1 text = line['text'].encode('utf-8').replace('\n', ' ') # step 2 p_t = preprocess(text) # attempt authentication try: # create OAuthHandler object self.auth = OAuthHandler(consumer_key, consumer_secret) # set access token and secret self.auth.set_access_token(access_token, access_token_secret) # create tweepy API object to fetch tweets self.api = tweepy.API(self.auth) except: print("Error: Authentication Failed")