Пример #1
0
def coleta_tweets():

    try:

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='')

        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' %
                      (tweet['user']['screen_name'], tweet['text']) + ',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')

        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)
Пример #2
0
 def setup(self):
     self._client = TwitterSearch(
         consumer_key=self._config['consumer_key'],
         consumer_secret=self._config['consumer_secret'],
         access_token=self._config['access_token'],
         access_token_secret=self._config['access_token_secret'])
     self._last_id = None
Пример #3
0
def coleta_tweets():

    try:
    
        ts = TwitterSearch(
            consumer_key = '',
            consumer_secret = '',
            access_token = '',
            access_token_secret = ''
        )
    
        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'])+',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')
        
        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)
Пример #4
0
def SearchOnTwitter(keywords, language):
    """
    Allows to test twitter search library -> Print tweets of interest.
        Parameters:
            - keywords : string array that tweets must contain
            - language : string indicating the language of the interest tweets
        Return :
            - array of tweets
    """
    tweets = []
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(
            keywords
        )  # let's define all words we would like to have a look for
        tso.set_language(language)  # we want to see German tweets only
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

    return tweets
Пример #5
0
def count_for_tag(support, hashtag):
    print "Searching for %s " % hashtag
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([hashtag])  # let's define all words we would like to have a look for
        # tso.setLanguage('en')
        tso.setCount(100)  # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key="TNX9jTHJgTEyB1IcUECPJ4uSY",
            consumer_secret="5B1R0geyT1Iv2mBc601gaDwuBBjVkabab72UXbzVTDEJ7Z6XAb",
            access_token="143109809-5IAGEaGuuiBRjVVJT9WHUQnAQlOkVcemzhnOpMkx",
            access_token_secret="Yh5WeJo9Z01j42jbTk6tL47zl1Rdox1LJ1d2lJgAAPm0r",
            verify=False,
        )

        for tweet in ts.searchTweetsIterable(tso):
            # print tweet['coordinates']
            if tweet["place"] != None and tweet["place"].has_key("country"):
                country = tweet["place"]["country"]
                __increment(support, country)
                continue
            location = tweet["user"]["location"]
            if len(location) == 0:
                continue
            # try:
            # results = Geocoder.geocode(location)
            #                country = results[0].country
            #                increment(support, country)
            #                continue
            #            except GeocoderError as e:
            #                #print "Could not parse ", location
            #                pass
            country = None
            for cn, cd in country_desc.iteritems():
                if cn.lower() in location.lower():
                    country = cn
                    break
                for desc_part in cd:
                    desc_word = re.compile(r"\b%s\b" % desc_part)
                    if desc_word.search(location):
                        country = cn
                        break
                if country is not None:
                    break
            if country is None:
                pass
                # print( '%s' % location)
            else:
                # print ("Found %s in \"%s\"" % (country, location))
                __increment(support, country)
            if ts.getStatistics()["tweets"] > 1000:
                break

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print (e)
Пример #6
0
def collect_tweets_from_city(arg):
     
     if arg in location.keys():
         city = arg
     else:
         raise KeyError("[WARNING CASE-SENSITIVE] %s location geocode are not known, available locations %s"%(arg,location.keys())) 


     try:
          tso = TwitterSearchOrder()
          tso.setCount(100)
          tso.setIncludeEntities(True)
          tso.setResultType('mixed')
          tso.setGeocode(**location[city])
          ts = TwitterSearch(**twitter_auth1)
          
          conn = MySQLdb.connect(**mysql_auth)
          curr = conn.cursor()
          with conn:
               curr.execute("DROP TABLE IF EXISTS %sTable"%self.table)
               print 'table dropped'
               curr.execute("CREATE TABLE IF NOT EXISTS %sTable (Id INT PRIMARY KEY AUTO_INCREMENT,lat DECIMAL(7,5),lon DECIMAL(8,5),place VARCHAR(200),created_at VARCHAR(40),hashtags VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,urls VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_mentions VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,media VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,favorite_count INT,filter_level VARCHAR(10),tid BIGINT,in_reply_to_screen_name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_general_ci,in_reply_to_status_id BIGINT,in_reply_to_user_id BIGINT,retweet_count INT,source VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,text VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_id BIGINT,screen_name VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci,user_location VARCHAR(40) CHARACTER SET utf8 COLLATE utf8_general_ci,retweeted_status_id BIGINT)"%self.table)
               
               
               for tweet in ts.searchTweetsIterable(tso):
               
                    if tweet['coordinates']!=None:
                         lat =  float(tweet['coordinates']['coordinates'][1])
                         lon =  float(tweet['coordinates']['coordinates'][0])
                    else:
                         lat = 0
                         lon = 0
                    place = tweet['place']['full_name']
                    created_at = tweet['created_at']
                    hashtags = "%20".join([ item['text'] for item in tweet['entities']['hashtags']])
                    urls = "%20".join([ item['url'] for item in tweet['entities']['urls']])
                    user_mentions = "%20".join([ item['id_str']+"%40"+item["screen_name"] for item in tweet['entities']['user_mentions']])
                    media = "%20".join([ item['id_str']+"%40"+item["media_url"] for item in tweet['entities']['media']]) if 'media' in tweet['entities'].keys() else ''
                    favorite_count = tweet["favorite_count"] if tweet["favorite_count"]!=None else 0
                    filter_level = tweet["filter_level"] if 'filter_level' in tweet.keys() else ''
                    tid = tweet['id']
                    in_reply_to_screen_name = tweet["in_reply_to_screen_name"] if tweet["in_reply_to_screen_name"]!=None else 0
                    in_reply_to_status_id = tweet["in_reply_to_status_id"] if tweet["in_reply_to_status_id"]!=None else 0
                    in_reply_to_user_id = tweet["in_reply_to_user_id"] if tweet["in_reply_to_user_id"]!=None else 0
                    retweet_count = tweet["retweet_count"] if tweet["retweet_count"]!=None else 0
                    source = tweet["source"].replace("'","\\'").replace('"','\\"')
                    text = tweet["text"].replace("'","\\'").replace('"','\\"')
                    user_id = tweet["user"]["id"]
                    screen_name = tweet["user"]["screen_name"]
                    user_location = tweet["user"]["location"]
                    retweeted_status_id = tweet["retweeted_status"]["id"] if "retweeted_status" in tweet.keys() else 0
                    query = """INSERT INTO %sTable(lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id) VALUES ("%f","%f","%s","%s","%s","%s","%s","%s","%d","%s","%d","%s","%d","%d","%d","%s","%s","%d","%s","%s","%d")"""%(city,lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id)

                    curr.execute(query)
                    
              
     except TwitterSearchException as e:
          print (e)
Пример #7
0
 def __init__(self):
     # Login to twitter handle using oauth
     self.twitter = TwitterSearch(
         consumer_key='PYX15cyo7pBYyrny2kXomGf4N',
         consumer_secret=
         'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci',
         access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P',
         access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q'
     )
 def __init__(self, search_query):
     self.search_query = search_query
     self.library = TwitterSearch(
         consumer_key=os.getenv("SEARCHTWEETS_CONSUMER_KEY"),
         consumer_secret=os.getenv("SEARCHTWEETS_CONSUMER_SECRET"),
         access_token=os.getenv("SEARCHTWEETS_ACCESS_TOKEN"),
         access_token_secret=os.getenv("SEARCHTWEETS_ACCESS_TOKEN_SECRET")
     )
     twitter_search_order = self.search_query.create_twitter_search_order()
     self.iter = iter(self.library.search_tweets_iterable(twitter_search_order))
Пример #9
0
 def __init__(self, movie, limit=10, language='en'):
     self.movie = movie
     self.limit = 10
     self.language = language
     self.client = TwitterSearch(
         consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'),
         consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'),
         access_token=os.environ.get('TWITTER_ACCESS_TOKEN'),
         access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET')
     )
Пример #10
0
 def __init__(self):
     self.properties = TwitterSearchOrder()
     self.properties.set_keywords([''])
     self.properties.set_count(100)
     self.properties.set_include_entities(False)
     self.api = TwitterSearch(
         consumer_key=DATASOURCES['twitter']['api_key'],
         consumer_secret=DATASOURCES['twitter']['api_secret'],
         access_token=DATASOURCES['twitter']['access_token'],
         access_token_secret=DATASOURCES['twitter']['access_token_secret'])
Пример #11
0
    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config['consumer_key'],
            consumer_secret=self._config['consumer_secret'],
            access_token=self._config['access_token'],
            access_token_secret=self._config['access_token_secret'])
        self._last_id = None

        if type(self._config['query']) is not list:
            self._logger.exception('Twitter sensor failed. "query" config \
                                    value is not a list')
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')
Пример #12
0
def main(word):
    searchObj = TwitterSearch(word)
    result = searchObj.search_start(amount = 10)
    del searchObj
    
    resultlist=[]
    print('単語リスト作成中')
    for tweet in result[:20]:
        resultlist.extend(ja_en_ja(tweet))

    counterObj = Counter(resultlist)
    print('htmlファイル作成')
    twodata_into_wordcloud( list(counterObj.keys()),list(counterObj.values()) , minimamnum=0)
Пример #13
0
 def get_tweets(query):
     from TwitterSearch import TwitterSearch, TwitterSearchOrder
     import itertools
     tso = TwitterSearchOrder()
     tso.set_keywords(query.get('query', '').split(' '))
     # tso.set_language('en')
     tso.set_include_entities(False)
     ts = TwitterSearch(
         consumer_key=app.config.get('TWITTER_CONSUMER_KEY'),
         consumer_secret=app.config.get('TWITTER_CONSUMER_SECRET'),
         access_token=app.config.get('TWITTER_ACCESS_TOKEN'),
         access_token_secret=app.config.get('TWITTER_ACCESS_TOKEN_SECRET')
     )
     return list(itertools.islice(ts.search_tweets_iterable(tso), 0, int(query.get('count', 5))))
Пример #14
0
def getTweetsByWords(authdata, word,limit=100):
    tso = TwitterSearchOrder()
    tso.set_keywords([word])
    tso.set_include_entities(False)
    ts = TwitterSearch(consumer_key=authdata['consumer_key'], consumer_secret=authdata['consumer_secret'], access_token=authdata['access_token'], access_token_secret=authdata['access_token_secret'])
    result = []
    c = 0
    for tweet in ts.search_tweets_iterable(tso):
        if c == limit:
            break
        result.append(tweet['text'])
        print(c)
        c+=1
    return {'status': 'Task Completed', 'result': result}
Пример #15
0
def count_tweets_of_app(app_name):
    """
		Counts how many tweets are with the hashtag app_name and COMPETITION_NAME from diferent users

		Args:
			app_name:	name of the app of whose tweets are to be counted

		Returns:
			num of votes (tweets)
	"""

    from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([
            check_hashtag(app_name), COMPETITION_NAME
        ])  # let's define all words we would like to have a look for

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=TWITTER_API_KEY,
                           consumer_secret=TWITTER_API_KEY_SECRET,
                           access_token=TWITTER_ACCESS_TOKEN,
                           access_token_secret=TWITTER_ACCESS_TOKEN_SECRET)

        # this is where the fun actually starts :)
        users = []
        #count = 0

        for tweet in ts.search_tweets_iterable(tso):

            user = tweet['user']['id']

            #Check if tweet if from the same user
            if user not in users:
                #more info https://dev.twitter.com/overview/api/tweets
                time_tweet = datetime.datetime.strptime(
                    tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y')

                if (COMPETITION_START_DATE <
                        time_tweet) & (time_tweet < COMPETITION_END_DATE):
                    users.append(user)
                    #count += 1 + tweet["retweet_count"]
                    #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

        return len(users)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
        return -1
Пример #16
0
 def initialize_twitter_client(self):
     self.twitter_client = TwitterSearch(
         consumer_key=self.settings['TWITTER_CONSUMER_KEY'],
         consumer_secret=self.settings['TWITTER_CONSUMER_SECRET'],
         access_token=self.settings['TWITTER_ACCESS_TOKEN'],
         access_token_secret=self.settings['TWITTER_ACCESS_SECRET'],
     )
Пример #17
0
def reload_api():
    """
    Reinitialize the API client with a new API key. This method may block if no
    valid keys are currently available.
    """
    global API
    API = TwitterSearch(*KEYS.advance().splitlines())
    def init_tw_search_lib(self, domain_keyword):
        """
        Init TwitterSearch Library 
        (Copyright (C) 2013 Christian Koepp
        https://github.com/ckoepp/TwitterSearch/tree/master)

        
        Arguments:
            domain_keyword {str} -- The keyword from <domain_keywords_dict> 
                                    that will be used to search in Twitter
        
        Returns:
            [TwitterSearch] -- TwitterSearch object with our secret tokens
            [TwitterSearchOrder] -- TwitterSearchOrder object with initialized attributes
        """

        try:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            tso.add_keyword(
                domain_keyword)  # add keyword for search in Twitter
            tso.set_language('en')  # we want to see English tweets only
            tso.set_include_entities(
                False)  # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                consumer_key='<your-CONSUMER_KEY>',
                consumer_secret='<your-CONSUMER_SECRET>',
                access_token='<your-ACCESS_TOKEN>',
                access_token_secret='<your-ACCESS_TOKEN_SECRET>')

        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print(e)

        return ts, tso
def hello_world(keywords):
    response = make_response()
    response.headers.add("Access-Control-Allow-Origin", "*")
    response.headers.add("Access-Control-Allow-Headers", "*")
    response.headers.add("Access-Control-Allow-Methods", "*")
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([keywords])
        ts = TwitterSearch(keys)
        tweets = []
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])
    except TwitterSearchException as e:
        print(e)
    response = jsonify({'status': 200, 'results': tweets})
    return response
Пример #20
0
 def setup(self):
     self._client = TwitterSearch(
         consumer_key=self._config['consumer_key'],
         consumer_secret=self._config['consumer_secret'],
         access_token=self._config['access_token'],
         access_token_secret=self._config['access_token_secret']
     )
     self._last_id = None
Пример #21
0
    def get(self, user_handle=None):
        from TwitterSearch import TwitterSearch, TwitterUserOrder, TwitterSearchException

        if user_handle is None:
            return jsonify({
                'response': [],
                'status': 400,
                'message': 'No handle provided'
            })

        try:
            import itertools

            user_profile = TwitterUserOrder(user_handle)

            # Hardcode our API keys for optimal security
            consumer = 'CedAugFXME85jW5MRraKTJFgO'
            consumer_secret = 'RjLOp02iZqQnGM5cOt4bBeFjFHtFyVW09NSH14rVEyPouFvWLs'
            access = '378294925-zdTFn1Gf8rcBzv6gshfjfONZG9ZSc8QFUlZd1YO8'
            access_secret = '0MV9lR9kFdoUkLnKoWgdZCl74vunMAoCR7INC7pQYrSfW'

            ts = TwitterSearch(consumer_key=consumer,
                               consumer_secret=consumer_secret,
                               access_token=access,
                               access_token_secret=access_secret)

            # Fetch a list of tweets from the user with the provided handle
            tweet_iterator = ts.search_tweets_iterable(user_profile)

            # By default, we fetch only 20 tweets unless a query parameter is specified
            num_tweets = int(request.args.get('numTweets', 20))
            resolved_tweets = list(itertools.islice(tweet_iterator,
                                                    num_tweets))

            return jsonify({'response': resolved_tweets, 'status': 200})

        except TwitterSearchException as e:
            return jsonify({
                'response': [],
                'status':
                404,
                'message':
                'There was a problem fetching the data for {}: {}'.format(
                    user_handle, e)
            })
Пример #22
0
 def __init__(self):
     self.properties = TwitterSearchOrder()
     self.properties.set_keywords([''])
     self.properties.set_count(100)
     self.properties.set_include_entities(False)
     self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'],
                              consumer_secret=DATASOURCES['twitter']['api_secret'],
                              access_token=DATASOURCES['twitter']['access_token'],
                              access_token_secret=DATASOURCES['twitter']['access_token_secret'])
Пример #23
0
    def fetch_twitter_entries(self):
        origin = self
        max_id = origin.max_id
        since_id = None
        area = origin.area
        try:
            count = 50
            tso = TwitterSearchOrder() # create a TwitterSearchOrder object
            tso.setKeywords(['']) # let's define all words we would like to have a look for
            tso.setResultType('recent')
            if origin.max_id:
                tso.setMaxID(origin.max_id-1)  # as per twitter docs
            tso.setLanguage('en') # we want to see German tweets only
            tso.setGeocode(latitude=area.lat, longitude=area.long, radius=area.rad, km=True)
            tso.setCount(count) # please dear Mr Twitter, only give us 7 results per page
            tso.setIncludeEntities(False) # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                **settings.TWITTER
            )
            total = 0
            for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)

                max_id = save_tweets(area, origin, tweet)
                if not since_id:
                    since_id = max_id

                #import ipdb;ipdb.set_trace()
                #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

                total += 1

                if total >= 50:
                    break

        except TwitterSearchException as e: # take care of all those ugly errors if there are some
            print(e)
        # except:
        #     pass
        finally:
            origin.max_id = max_id
            origin.save()
Пример #24
0
def getTweets(politician_id, searchOnlySexistWords):
    try:

        politician = Politician.objects.get(id=politician_id)
        politician_names = [
            politician.first_name + " " + politician.last_name,
            politician.username
        ]

        tso = TwitterSearchOrder()
        searchTerms = []

        if searchOnlySexistWords:
            sexistWords = CONFIG['SEXISTWORDS']
            for word in sexistWords:
                for politician_name in politician_names:
                    searchTerms.append(word + ' ' + politician_name)
        elif searchOnlySexistWords is False:
            searchTerms = politician_names

        tso.set_keywords(searchTerms, or_operator=True)
        tso.set_language("en")
        tso.set_include_entities(False)
        querystr = tso.create_search_url()
        tso.set_search_url(querystr + "&tweet_mode=extended")
        ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'],
                           consumer_secret=CONFIG['CONSUMER_SECRET'],
                           access_token=CONFIG['ACCESS_TOKEN'],
                           access_token_secret=CONFIG['ACCESS_TOKEN_SECRET'])

        print("**Processing tweets for " +
              str(politician.first_name + " " + politician.last_name) + "**")
        if searchOnlySexistWords:
            tweets = ts.search_tweets_iterable(tso)
            return tweets
        else:
            # will limit to 100 if not only searching sexist words
            tweets = ts.search_tweets(tso)
            return tweets['content']['statuses']

    except TwitterSearchException as e:
        logging.exception("Unable to get new tweets because of" + str(e))
    def getStats(self, url, proxy, headers, timeout):
        """returns (retweet + favorite count) count from twitter API , url is url that could be in a tweet, proxy is 'ip:port' in string, headers should contain user-agent in as an item in dictionary, timeout is maximum time while waiting for response and is an int"""
        count = 0

        tso = TwitterSearchOrder()
        tso.set_search_url('q=' + url)
        tso.set_result_type(result_type='mixed')
        tso.set_include_entities(False)
        tso.set_count(100)

        ts = TwitterSearch(consumer_key=self.ConsumerKey,
                           consumer_secret=self.ConsumerSecret,
                           access_token=self.AccessTokenKey,
                           access_token_secret=self.AccessTokenSecret,
                           proxy=proxy)

        for tweet in ts.search_tweets_iterable(tso):
            count += tweet['retweet_count'] + tweet['favorite_count']

        return count
Пример #26
0
    def search(self):
        try:
            tso = TwitterSearchOrder()
            tso.set_keywords(*self.search_terms)
            tso.set_include_entities(False)
            tso.set_count(100)

            ts = TwitterSearch(
                consumer_key='aOUVcCWLIYEbUvHW5dLjVc7Gf',
                consumer_secret='8qb3LTAHbj43J40Rxm0RMLAOaP4QoEHfFVGTeJ3S6iUmSBq6JJ',
                access_token='4251433696-ulZx8dJ3QZE95ds0PhXNldeKFhjhBUoGSuGycSE',
                access_token_secret='wx65NQaBHHgwC4xLOgRxFSs4kWWzkg09KkgNkAKHZryks'
            )

            for tweet in ts.search_tweets_iterable(tso):
                self.data.append(tweet['text'])
                self.save_line(tweet['text'])

            # self.save_data(self.data)
        except TwitterSearchException as exception:
            print(exception)
Пример #27
0
    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config['consumer_key'],
            consumer_secret=self._config['consumer_secret'],
            access_token=self._config['access_token'],
            access_token_secret=self._config['access_token_secret']
        )
        self._last_id = None

        if type(self._config['query']) is not list:
            self._logger.exception('Twitter sensor failed. "query" config \
                                    value is not a list')
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')
Пример #28
0
def get_search_api(config):
    global _API
    if _API:
        return _API
    consumer_key = config['api']['twitterconsumerkey']
    consumer_secret = config['api']['twitterconsumersecret']
    access_token = config['api']['twitteraccesstoken']
    access_token_secret = config['api']['twitteraccesstokensecret']

    _API = TwitterSearch(consumer_key=consumer_key,
                         consumer_secret=consumer_secret,
                         access_token=access_token,
                         access_token_secret=access_token_secret)
    return _API
Пример #29
0
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'],
                                 consumer_secret=DATASOURCES['twitter']['api_secret'],
                                 access_token=DATASOURCES['twitter']['access_token'],
                                 access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
Пример #30
0
def getTweets(politician_id):
	try:

		politician = Politician.objects.get(id=politician_id)

		politician_names = [politician.first_name + " " + politician.last_name, politician.last_name, politician.username]
		print("Getting Tweets for " + str(politician.first_name + " " + politician.last_name))
		tso = TwitterSearchOrder()			
		sexistWords = ['bitch', 'skank', 'rape']
		searchTerms = []

		for word in sexistWords:
			for politician in politician_names:
				searchTerms.append(word + ' ' + politician)
		
		tso.set_keywords(searchTerms, or_operator=True)
		print(searchTerms)
		tso.set_language("en")
		tso.set_include_entities(False)
		querystr = tso.create_search_url()
		tso.set_search_url(querystr + "&tweet_mode=extended")

		ts = TwitterSearch(
            consumer_key = os.environ.get('CONSUMER_KEY', CONFIG['CONSUMER_KEY']),
            consumer_secret = os.environ.get('CONSUMER_SECRET', CONFIG['CONSUMER_SECRET']),
            access_token = os.environ.get('ACCESS_TOKEN', CONFIG['ACCESS_TOKEN']),
            access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET', CONFIG['ACCESS_TOKEN_SECRET'])
        )
		
		return ts.search_tweets_iterable(tso)

	except TwitterSearchException as e:
		logging.exception("Unable to get new tweets because of"  + str(e))

# if __name__ == "__main__":
#     getTweets()
Пример #31
0
class TwitterScrape:
    """Methods to gather data from twitter searches"""
    def __init__(self):
        # Login to twitter handle using oauth
        self.twitter = TwitterSearch(
            consumer_key='PYX15cyo7pBYyrny2kXomGf4N',
            consumer_secret=
            'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci',
            access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P',
            access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q'
        )

    def search(self, query, lang='en', n=10**5):
        """
        Search twitter for specified query.
        Function returns n tweets or as many as can be found for that query.

        Parameters:
        query -- Search query (String)
        lang -- Specify language of tweets, optional, default: 'en' (String)
        n -- Number of tweets to return, optional, default: 10**3 (Int)

        Returns: 
        tweets_out -- Pandas series of tweets of length n
        """
        # Initialise container
        tweets_out = []
        # Setup twitter search
        tso = TwitterSearchOrder()
        tso.set_keywords([query])
        tso.set_language(lang)
        tso.set_include_entities(False)

        # Begin search
        sys.stdout.write("Tweet number out of {0}: ".format(n))
        for i, tweet in enumerate(self.twitter.search_tweets_iterable(tso)):
            # Break from loop when n tweets are reached
            if i == n:
                break
            # Output progress
            if i % 100 == 0:
                sys.stdout.write('{0} '.format(i))
                sys.stdout.flush()
            # Add the next tweet to the container
            tweets_out.append('%s' % (tweet['text']))
        print
        # Return as pandas series as it's easier to work with
        return pd.Series(tweets_out)
Пример #32
0
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(
            consumer_key=DATASOURCES['twitter']['api_key'],
            consumer_secret=DATASOURCES['twitter']['api_secret'],
            access_token=DATASOURCES['twitter']['access_token'],
            access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
Пример #33
0
def authenticate_twitter():
    # it's about time to create a TwitterSearch object with our secret tokens
    # The tokens are stored in SSM and encrypted

    ts = TwitterSearch(consumer_key=(ssm.get_parameter(
        Name='twitter-consumer-key',
        WithDecryption=True))['Parameter']['Value'],
                       consumer_secret=(ssm.get_parameter(
                           Name='twitter-consumer-secret',
                           WithDecryption=True))['Parameter']['Value'],
                       access_token=(ssm.get_parameter(
                           Name='twitter-access-token',
                           WithDecryption=True))['Parameter']['Value'],
                       access_token_secret=(ssm.get_parameter(
                           Name='twitter-access-token-secret',
                           WithDecryption=True))['Parameter']['Value'])
    return ts
Пример #34
0
class TwitterReviews(object):
    def __init__(self, movie, limit=10, language='en'):
        self.movie = movie
        self.limit = 10
        self.language = language
        self.client = TwitterSearch(
            consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'),
            consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'),
            access_token=os.environ.get('TWITTER_ACCESS_TOKEN'),
            access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET')
        )

    def __enter__(self):
        self.client.connect()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type == TwitterSearchException:
            logging.exception(str(exc_val))
            self.client.cleanUp()
        self.client.disconnect()

    @property
    def reviews(self):
        return Reviews(self._get_results(), limit=self.limit)

    def _prepare_request(self):
        tso = TwitterSearchOrder()
        tso.setKeywords(self._get_keywords())
        tso.setLanguage(self.language)
        tso.setIncludeEntities(False)
        return tso

    def _get_keywords(self):
        return ['#' + self.movie + 'Movie']

    def _get_results(self):
        request = self._prepare_request()
        return self.client.getSearchResults(request)
def collect_tweets(keyword, count, force=False):    
    from TwitterSearch import TwitterSearch
    from TwitterSearch import TwitterSearchOrder
    import pymongo
    from dateutil.parser import parse
    from alchemyapi import AlchemyAPI
    import ConfigParser
    
    # try:
    #     keyword = sys.argv[1]
    #     count = int(sys.argv[2])
    # except IndexError:
    # 	e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count"
    # 	raise Exception(e_too_few_args)
    # try:
    #     if sys.argv[3] == '-f':
    #         force = True
    #     else:
    #         e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded."
    #         raise Exception(e_invalid_argument)    
    # except IndexError:
    #     force = False
    
    # Read the config file for config variables
    config = ConfigParser.RawConfigParser()
    config.read('config.cfg')
    mongo_url = config.get('Mongo', 'db_url')
    
    # Connect to the Mongo database using MongoClient
    
    client = pymongo.MongoClient(mongo_url)
    db = client.get_default_database()
    # Access/create the collection based on the command line argument
    tweets = db[keyword]
    
    #Generate the alchemyapi variable
    alchemyapi = AlchemyAPI()
    
    # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search.
    if keyword[0] is ".":
        keyword = keyword.replace('.', '#')
    
    # Lines 33-42 ensure that the query is not doing duplicate work.
    # First, it counts to see how many documents exist in the collection
    db_count = tweets.count()
    
    # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id"
    if db_count is not 0:
        latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1)
        latest_id_str = latest_id[db_count-1]['object']['tweet_id']
        latest_id_int = int(latest_id_str)
        print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.'
    # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call.    
    else:
        print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
        
    # create a TwitterSearchOrder object
    tso = TwitterSearchOrder() 
    
    # let's define all words we would like to have a look for
    tso.set_keywords([keyword])
    
    # Select language
    tso.set_language('en') 
    
    # Include Entity information
    tso.set_include_entities(True)
    
    if db_count is not 0:
        tso.set_since_id(latest_id_int)
        print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
    else:
    	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'
    
        
    # Create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = config.get('Twitter', 'consumer_key'),
        consumer_secret = config.get('Twitter', 'consumer_secret'),
        access_token = config.get('Twitter', 'access_token'),
        access_token_secret = config.get('Twitter', 'access_token_secret')
     )
     
    # Perform the search
    twitter_search = ts.search_tweets_iterable(tso)

    # Start the insert count variable
    db_inserts = 0
    
    # this is where the fun actually starts :)
    try:
        for tweet in twitter_search:
            if db_inserts < count:
                mentions_list = []
                hashtags_list = []
                # Create the caliper_tweet object
                caliper_tweet = {
              "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
              "type": "MessagingEvent",
              "startedAtTime": "",
              ## Can be used to query Twitter API for user information
              "actor": "",
              "verb": "tweetSent",
              "object": {
                "type": "MessagingEvent",
                "tweet_id": "",
                "tweet_uri": "",
                "subtype": "tweet",
                ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to']
                "to": "",
                "author": {
                    "author_uri": "",
                    "author_alias": "",
                    "author_name": "",
                    },
                "text": "",
                "sentiment": {
                    "type": "",
                    "score": "",
                    "color": ""
                },
                "parent": "",
                ## "mentions" is an array of the caliper IDs from the user_mentions objects array
                "user_mentions": [],
                ## "hashtags" is an array of the hashtag texts included in the tweet entities
                "hashtags": []
              }
            }
                
                 # Set the re-usable variables
                tweet_text = tweet['text']
                
                ## AlchemyAPI Sentiment Analysis
                tweet_sentiment = ''
                response = alchemyapi.sentiment('text', tweet_text)
                if 'docSentiment' in response.keys():
                    if 'score' in response['docSentiment']:
                        tweet_sentiment_score = response['docSentiment']['score']
                        tweet_sentiment_score = float(tweet_sentiment_score)
                        tweet_sentiment_score = round(tweet_sentiment_score, 2)
                    else:
                        tweet_sentiment_score = 0
                    tweet_sentiment_type = response['docSentiment']['type']
                    tweet_sentiment_score_a = abs(tweet_sentiment_score)
                    if (tweet_sentiment_score) > 0:
                        tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")"
                    else: 
                        tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")"
                elif force == True:
                    print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.'
                    tweet_sentiment_type = 'API Limit Exceeded'
                    tweet_sentiment_score = 0
                    tweet_sentiment_color = 'rgba(0,0,0,0)'
                else:
                    e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue'
                    raise Exception(e_alchemy_api_limit)
                    
            
                ds = tweet['created_at']
                tweet_date = parse(ds)
                caliper_tweet['startedAtTime'] = tweet_date
                caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name']
                caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str']
                caliper_tweet['object']['tweet_id'] = tweet['id_str']
                if tweet['in_reply_to_user_id_str'] is None:
                    caliper_tweet['object']['to'] = 'NoReply'
                    caliper_tweet['object']['parent'] = 'NoReply'
                else:
                    caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str']
                    if tweet['in_reply_to_status_id_str'] is None:
                        caliper_tweet['object']['parent'] = 'None'
                    else:    
                        caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str']
                caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str']
                caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name']
                caliper_tweet['object']['author']['author_name'] = tweet['user']['name']
                caliper_tweet['object']['text'] = unicode(tweet['text'])
                caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type
                caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score
                caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color
                for x in list(tweet['entities']['hashtags']):
                    hashtag = x['text']
                    hashtags_list.append(hashtag)
                for x in list(tweet['entities']['user_mentions']):
                    mention = x['id_str']
                    mentions_list.append(mention)
                caliper_tweet['object']['user_mentions'] = mentions_list
                caliper_tweet['object']['hashtags'] = hashtags_list
             
                tweets.insert(caliper_tweet)
                
                db_inserts = db_inserts + 1
                
            else:
                raise StopIteration
    except StopIteration:
        print str(db_inserts) + " inserts made in the " + keyword + " collection."
Пример #36
0
cities = f.read().splitlines()  # read all lines into cities
for city in cities:
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords(['#travel', '#holiday', '#vacation'],
                         or_operator=True)
        tso.add_keyword(
            city)  # add the city name as one of the search keywords
        tso.set_language('en')  # we want to see English tweets only
        tso.set_count(100)  # search for 100 pages of tweets
        tso.set_result_type(
            'mixed')  # search for both the popular and real-time tweets
        tso.set_include_entities(False)

        ts = TwitterSearch(consumer_key='aaabbb',
                           consumer_secret='cccddd',
                           access_token='112233',
                           access_token_secret='445566')

        def my_callback_closure(
            current_ts_instance
        ):  # accepts ONE argument: an instance of TwitterSearch
            queries, tweets_seen = current_ts_instance.get_statistics()
            if queries > 0 and (queries %
                                5) == 0:  # trigger delay every 5th query
                time.sleep(60)  # sleep for 60 seconds

        filename = city + '.txt'
        f = open('./result/' + filename, 'w')
        for tweet in ts.search_tweets_iterable(tso,
                                               callback=my_callback_closure):
            if 'RT @' not in tweet['text']:
Пример #37
0
def Tweets():

    try:

        max_feeds = 10
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_language('en')
        tso.set_include_entities(
            False)  # and don't give us all those entity information
        tso.set_until(new_date)
        tso.arguments.update({'tweet_mode': 'extended'})
        tso.arguments.update({'truncated': 'False'})

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='',
                           proxy='http://proxy_address')

        for c in range(len(MainDF)):
            count = 0

            #kw=[MainDF['twitter'][c]]
            #for h in MainDF['hashtag'][c]:
            #    kw.append(h)

            tso.set_keywords(MainDF['hashtag'][c])
            tweets_list = []

            tuo = TwitterUserOrder(MainDF['twitter'][c])
            #            tuo.set_language('en')
            tuo.set_include_entities(
                False)  # and don't give us all those entity information
            #            tuo.set_until(days_ago)
            #            tuo.set_count(15)
            tuo.arguments.update({'tweet_mode': 'extended'})
            tuo.arguments.update({'truncated': 'False'})

            #for tweet in ts.search_tweets_iterable(tso):
            #    print(tweet)
            #    tweets_list.append([tweet['user']['screen_name'],tweet['full_text']])

            for tweet in ts.search_tweets_iterable(tso):
                if 'retweeted_status' in tweet:
                    None
                    #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']])
                else:
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)
                    #print(tweet)
                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])

            for tweet in ts.search_tweets_iterable(tuo):
                if tweet['lang'] != 'en':
                    #print(tweet)
                    None
                else:

                    # print(tweet)
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)

                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])
                    count = count + 1

                    if count == max_feeds:
                        break

            if tweets_list != []:
                tweets_datasets[MainDF['company'][c]] = pd.DataFrame(
                    tweets_list)
                tweets_datasets[MainDF['company'][c]].columns = [
                    'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link'
                ]
                tweets_datasets[MainDF['company'][c]].insert(
                    0, 'Category', 'Twitter')

                for i in range(
                        len(tweets_datasets[MainDF['company'][c]]['Date'])):

                    tweets_datasets[MainDF['company'][c]]['Date'][i] = parse(
                        tweets_datasets[MainDF['company'][c]]['Date'][i])
                    tweets_datasets[
                        MainDF['company'][c]]['Date'][i] = tweets_datasets[
                            MainDF['company'][c]]['Date'][i].date()

                    #print(datasets[companies_names[count]])

                tw_current_companies.append(MainDF['company'][c])

            else:
                None

            #tweets_list.append()
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Пример #38
0
class TwitterSearchSensor(PollingSensor):
    def __init__(self, sensor_service, config=None, poll_interval=None):
        super(TwitterSearchSensor, self).__init__(sensor_service=sensor_service,
                                                  config=config,
                                                  poll_interval=poll_interval)
        self._trigger_ref = 'twitter.matched_tweet'
        self._logger = self._sensor_service.get_logger(__name__)

    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config['consumer_key'],
            consumer_secret=self._config['consumer_secret'],
            access_token=self._config['access_token'],
            access_token_secret=self._config['access_token_secret']
        )
        self._last_id = None

        if type(self._config['query']) is not list:
            self._logger.exception('Twitter sensor failed. "query" config \
                                    value is not a list')
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')

    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'], True)

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)

    def cleanup(self):
        pass

    def add_trigger(self, trigger):
        pass

    def update_trigger(self, trigger):
        pass

    def remove_trigger(self, trigger):
        pass

    def _get_last_id(self):
        if not self._last_id and hasattr(self._sensor_service, 'get_value'):
            self._last_id = self._sensor_service.get_value(name='last_id')

        return self._last_id

    def _set_last_id(self, last_id):
        self._last_id = last_id

        if hasattr(self._sensor_service, 'set_value'):
            self._sensor_service.set_value(name='last_id', value=last_id)

    def _dispatch_trigger_for_tweet(self, tweet):
        trigger = self._trigger_ref

        url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'], tweet['id'])
        payload = {
            'id': tweet['id'],
            'created_at': tweet['created_at'],
            'lang': tweet['lang'],
            'place': tweet['place'],
            'retweet_count': tweet['retweet_count'],
            'favorite_count': tweet['favorite_count'],
            'user': {
                'screen_name': tweet['user']['screen_name'],
                'name': tweet['user']['name'],
                'location': tweet['user']['location'],
                'description': tweet['user']['description'],
            },
            'text': tweet['text'],
            'url': url
        }
        self._sensor_service.dispatch(trigger=trigger, payload=payload)
Пример #39
0
import json
from TwitterSearch import TwitterSearch, TwitterSearchException, TwitterUserOrder
from watson_developer_cloud import AlchemyLanguageV1

alchemy_language = AlchemyLanguageV1(
    api_key='a04bf0cda38fd380a2e89b9b54d6076729b568ce')

# def getTweets(place):
try:
    place = raw_input("Enter a twitter handle: ")
    tuo = TwitterUserOrder(place)  # create a TwitterUserOrder

    # it's about time to create TwitterSearch object again
    ts = TwitterSearch(
        consumer_key='jP53etLOQHrdCtMc4j2Djas2z',
        consumer_secret='9UmpzmT1IPF6JuNzODHOyXZU19Vv1C0eYOQraQLwY04jAMGpu4',
        access_token='746046118652416000-BZC8oHZZ75dJe8Q8fGlMigNvKy6kVwK',
        access_token_secret='Nfl6UpuUUdvSy60tN6p7l3l1W0GOGKpQoIbqZg78cdrtd')

    def my_callback_closure(
        current_ts_instance
    ):  # accepts ONE argument: an instance of TwitterSearch
        queries, tweets_seen = current_ts_instance.get_statistics()
        # if queries > 0 and (queries % 60) == 0: # trigger delay every 5th query
        #     time.sleep(30) # sleep for 60 seconds

    tweetArray = []
    # start asking Twitter about the timeline
    for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure):
        # tweetArray.append(tweet['text'])
        # if 'accessible' in tweet['text']:
class TwitterSearchSensor(PollingSensor):
    def __init__(self, sensor_service, config=None, poll_interval=None):
        super(TwitterSearchSensor, self).__init__(
            sensor_service=sensor_service, config=config, poll_interval=poll_interval
        )
        self._trigger_ref = "twitter.matched_tweet"
        self._logger = self._sensor_service.get_logger(__name__)

    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config["consumer_key"],
            consumer_secret=self._config["consumer_secret"],
            access_token=self._config["access_token"],
            access_token_secret=self._config["access_token_secret"],
        )
        self._last_id = None

        if type(self._config["query"]) is not list:
            self._logger.exception(
                'Twitter sensor failed. "query" config \
                                    value is not a list'
            )
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')

    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config["query"])

        language = self._config.get("language", None)
        if language:
            tso.set_language(language)

        tso.set_result_type("recent")
        tso.set_count(self._config.get("count", 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets["content"]["statuses"]
        except Exception as e:
            self._logger.exception("Polling Twitter failed: %s" % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]["id"])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)

    def cleanup(self):
        pass

    def add_trigger(self, trigger):
        pass

    def update_trigger(self, trigger):
        pass

    def remove_trigger(self, trigger):
        pass

    def _get_last_id(self):
        if not self._last_id and hasattr(self._sensor_service, "get_value"):
            self._last_id = self._sensor_service.get_value(name="last_id")

        return self._last_id

    def _set_last_id(self, last_id):
        self._last_id = last_id

        if hasattr(self._sensor_service, "set_value"):
            self._sensor_service.set_value(name="last_id", value=last_id)

    def _dispatch_trigger_for_tweet(self, tweet):
        trigger = self._trigger_ref

        url = "%s/%s/status/%s" % (BASE_URL, tweet["user"]["screen_name"], tweet["id"])
        payload = {
            "id": tweet["id"],
            "created_at": tweet["created_at"],
            "lang": tweet["lang"],
            "place": tweet["place"],
            "retweet_count": tweet["retweet_count"],
            "favorite_count": tweet["favorite_count"],
            "user": {
                "screen_name": tweet["user"]["screen_name"],
                "name": tweet["user"]["name"],
                "location": tweet["user"]["location"],
                "description": tweet["user"]["description"],
            },
            "text": tweet["text"],
            "url": url,
        }
        self._sensor_service.dispatch(trigger=trigger, payload=payload)
Пример #41
0
import json
import sys

BRAND = sys.argv[1]
COUCH_DATABASE_NAME = 'mt-twitter-' + BRAND
TWITTER_SEARCH_KEYWORDS = [BRAND]
TWITTER_CREDENTIALS = json.loads(
    Path(__file__).joinpath('..', '..', 'twitter.cfg.json').abspath().bytes())

# Establish connection to CouchDB and select the database to write into.
# The database must already exist; create it manually in the CouchDB control
# panel first.
database = couchdb.Server()[COUCH_DATABASE_NAME]

# Setup a twitter connection and configure its credentials:
twitter_connection = TwitterSearch(**TWITTER_CREDENTIALS)

# The twitter client may stop iterating the tweets at some point.
# In order to automatically continue at the last position, we put the
# import in a "while"-loop which will be stopped when there are no new
# tweets to import.
while True:
    # First, let's build a search query:
    twitter_query = TwitterSearchOrder()
    twitter_query.set_keywords(TWITTER_SEARCH_KEYWORDS)
    # Only import english tweets as our sentiment analysis will only work
    # with the English language for now.
    twitter_query.set_language('en')
    # We do not require entities (e.g. extracted URLs) as we are only
    # interested in the raw text of the tweet.
    twitter_query.set_include_entities(False)
Пример #42
0
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
from CREDS import *

ts = TwitterSearch(
                  consumer_key = TWITTER_CONSUMER_KEY,
                  consumer_secret = TWITTER_CONSUMER_SECRET, 
                  access_token = TWITTER_ACCESS_TOKEN,
                  access_token_secret = TWITTER_ACCESS_TOKEN_SECRET,
                  )

try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['surveymonkey','docs.google.com/forms'], or_operator=True)

    for tweet in ts.search_tweets_iterable(tso):
        print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))

except TwitterSearchException as e:
   print(e)
tso.set_language('en') 

# Include Entity information
tso.set_include_entities(True)

if db_count is not 0:
    tso.set_since_id(latest_id_int)
    print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
else:
	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'

    
# Create a TwitterSearch object with our secret tokens
ts = TwitterSearch(
    consumer_key = config.get('Twitter', 'consumer_key'),
    consumer_secret = config.get('Twitter', 'consumer_secret'),
    access_token = config.get('Twitter', 'access_token'),
    access_token_secret = config.get('Twitter', 'access_token_secret')
 )
 
# Perform the search
twitter_search = ts.search_tweets_iterable(tso)

# Start the insert count variable
db_inserts = 0

# this is where the fun actually starts :)
try:
    for tweet in twitter_search:
        if db_inserts < count:
            mentions_list = []
            hashtags_list = []
Пример #44
0
def twitter_search(params, start_time):
    """
    Retrieves most recent tweets since yesterday based on keywords.
    Retrieves as many tweets as api gives, up to the maximum set by max_tweets.
    :param params: The keywords to search for, formatted as list of 
    strings. To search for a url, use this syntax:
        "url:\"gizmodo com\""
    in which the domain is separated by spaces instead of dots and the 
    internal quotes are escaped with backspaces.
    :return: Returns list of dicts containing:
      - tweets: the number of tweets, since yesterday, about the specified
      keywords (up to a maximum count of max_tweets)
      - tweets_followers: the number of (unique) followers of those tweets
      (i.e., if the same person tweets ten times in one day, that person's
      followers are counted once, not ten times).
      - most_followed_name: the name of the tweeter who tweeted in 'tweets'
      (above) who has the most followers
      - most_followed_count: the count of the number of followers who follow
      the tweeter with the most followers
    """
    print('starting twitter_search')
    # Set up flow control variables.
    max_tweets = 10000  # maximum number of tweets to retrieve from api
    more_tweets = True  # are there more tweets to retrieve?
    need_to_sleep = False  # tells to sleep (if approaching api rate limit)

    error = 'ok'

    try:
        # create TwitterSearch object using this app's tokens.
        ts = TwitterSearch(
            consumer_key=tw.CONSUMER_KEY,
            consumer_secret=tw.CONSUMER_SECRET,
            access_token=tw.ACCESS_TOKEN,
            access_token_secret=tw.ACCESS_TOKEN_SECRET
        )

        # Create a TwitterSearchOrder object and add keywords to it.
        tso = TwitterSearchOrder()
        for param in params:
            tso.add_keyword(param)
        # Only search for tweets since yesterday (in UTC).
        yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(1)
        tso.set_since(yesterday)

        # Set up counter variables.
        tweets = 0  # count of tweets about keywords, since yesterday
        unique_tweeters = {}  # dict of unique tweeters about keywords
        tweets_followers = 0  # count of followers of unique_tweeters
        min_id = 0  # next tweet for paginated results, when multiple api calls
        max_followers = (0, 'null')  # the tweeter with the most followers

        # Keep calling the api (for paginated results) until there are no
        # more tweets to retrieve, or until max_tweets limit has been reached.
        while more_tweets and tweets < max_tweets:
            # Sleep for 60 seconds, if needed, to avoid hitting api limit.
            if need_to_sleep:
                print("rate limit:", rate_limit)
                time.sleep(60)
            # Call the search api.
            response = ts.search_tweets(tso)
            # Are there no more tweets to retrieve?
            if len(response["content"]["statuses"]) == 0:
                more_tweets = False
            else:  # there are more tweets to retrieve
                # Iterate through the batch of tweets retrieved from this
                # api call. Count the tweet and track all the unique tweeters.
                for tweet in response["content"]["statuses"]:
                    if tweets > max_tweets:
                        break  # stop counting/tracking if reached max_tweets
                    tweets += 1
                    if (min_id == 0) or (tweet["id"] < min_id):
                        # Set min_id to the id of this tweet. The api returns
                        # tweets in reverse chronological order (most recent is
                        # first), so min_id is a lowering "ceiling" of which
                        # tweet id to start from during subsequent api call.
                        min_id = tweet["id"]
                    # Can uncomment the following lines to see who is tweeting.
                    # print(str(tweets) + "\t" + str(tweet["id"])
                    #       + "\t" + tweet["user"]["screen_name"]
                    #       + "\t" + str(tweet["user"]["followers_count"]))
                    if tweet["user"]["screen_name"] not in unique_tweeters:
                        tweeter = tweet["user"]["screen_name"]
                        tweeters_followers = tweet["user"]["followers_count"]
                        # Add tweet's screen_name and followers_count to
                        # unique_tweeters, iff this is first time seeing
                        # this screen_name.
                        unique_tweeters[tweeter] = tweeters_followers
                # Set the next paginated result's start point (subtract one
                # to avoid retrieving the last tweet from this batch twice).
                tso.set_max_id(min_id - 1)
            # If less than 15 api calls remaining then sleep during next loop.
            # (Search api free tier allows 180 calls per 15 minute period.)
            rate_limit = int(ts.get_metadata()["x-rate-limit-remaining"])
            if rate_limit < 15:
                need_to_sleep = True
            else:
                need_to_sleep = False
        # After all tweets have been retrieved (up to max_tweets), calculate
        # metrics on the followers of the tweeters in unique_tweeters.
        for tweeter in unique_tweeters:
            # Count how many followers there are in all the unique_tweeters.
            tweets_followers += unique_tweeters[tweeter]
            # Determine which tweeter from unique_tweeters has most followers.
            if unique_tweeters[tweeter] > max_followers[0]:
                max_followers = (unique_tweeters[tweeter], tweeter)

    except TwitterSearchException as e:
        tweets = None
        tweets_followers = None
        error = format_exception(ValueError, e, e.__traceback__)

    tweets = make_dict(
        value=tweets,
        data_name='tweets',
        start_time=start_time,
        status=error
    )

    tweets_followers = make_dict(
        value=tweets_followers,
        data_name='tweets_followers',
        start_time=start_time,
        status=error
    )

    most_followed_name = make_dict(
        value=escape(max_followers[1], True),
        data_name='most_followed_name',
        start_time=start_time,
        status=error
    )

    most_followed_count = make_dict(
        value=max_followers[0],
        data_name='most_followed_count',
        start_time=start_time,
        status=error
    )

    return [tweets, tweets_followers, most_followed_name, most_followed_count]