Пример #1
0
class Twitter:
    def __init__(self, **kwargs):
        # Instantiate Twitter API
        # Set env variables with values from https://developer.twitter.com/apps
        self.api = Api(
            consumer_key=environ['TWITTER_CONSUMER_KEY'],
            consumer_secret=environ['TWITTER_CONSUMER_SECRET'],
            access_token_key=environ['TWITTER_ACCESS_TOKEN_KEY'],
            access_token_secret=environ['TWITTER_ACCESS_TOKEN_SECRET'],
        )

    def stream_sample(self):
        # backoff to avoid being put in timeout by Twitter if errors occur
        backoff = 1
        while True:
            try:
                for tweet in self.api.GetStreamSample():
                    # Reset backoff since request succeeded
                    backoff = 1
                    yield tweet
            except Exception as e:
                # Sometimes GetStreamSample connection fails
                sleep(backoff)
                # exponential backoff for repeated errors
                backoff *= 2
Пример #2
0
    def __init__(self):

        # keys and tokens from the Twitter Dev Console
        consumer_key = '5a1tL5vH7qJ5k8btGtjFsziQb'
        consumer_secret = 'FC5nqWXQkJVKs1fmrb5InuDe0T7RE2H2FBWcqnoWNZXUZRe0lz'
        access_token = '1329456000-xxF6gN0EtV3kJueCWIagMEehqU1CNztcXFo0WlD'
        access_token_secret = 'mzCMqkWOLtOaWvQ1CRf8YztDgWCnfSKmXFaSa3VBXKqMY'
        api = Api(consumer_key, consumer_secret, access_token,
                  access_token_secret)

        def preprocess(tweet,
                       ascii=True,
                       ignore_rt_char=True,
                       ignore_url=True,
                       ignore_mention=True,
                       ignore_hashtag=True,
                       letter_only=True,
                       remove_stopwords=True,
                       min_tweet_len=3):
            sword = stopwords.words('english')

            if ascii:  # maybe remove lines with ANY non-ascii character
                for c in tweet:
                    if not (0 < ord(c) < 127):
                        return ''

            tokens = tweet.lower().split()  # to lower, split
            res = []

            for token in tokens:
                if remove_stopwords and token in sword:  # ignore stopword
                    continue
                if ignore_rt_char and token == 'rt':  # ignore 'retweet' symbol
                    continue
                if ignore_url and token.startswith('https:'):  # ignore url
                    continue
                if ignore_mention and token.startswith('@'):  # ignore mentions
                    continue
                if ignore_hashtag and token.startswith('#'):  # ignore hashtags
                    continue
                if letter_only:  # ignore digits
                    if not token.isalpha():
                        continue
                elif token.isdigit():  # otherwise unify digits
                    token = '<num>'

                res += token,  # append token

            if min_tweet_len and len(
                    res) < min_tweet_len:  # ignore tweets few than n tokens
                return ''
            else:
                return ' '.join(res)

        for line in api.GetStreamSample():
            if 'text' in line and line['lang'] == u'en':  # step 1
                text = line['text'].encode('utf-8').replace('\n',
                                                            ' ')  # step 2
                p_t = preprocess(text)

        # attempt authentication
        try:
            # create OAuthHandler object
            self.auth = OAuthHandler(consumer_key, consumer_secret)
            # set access token and secret
            self.auth.set_access_token(access_token, access_token_secret)
            # create tweepy API object to fetch tweets
            self.api = tweepy.API(self.auth)
        except:
            print("Error: Authentication Failed")