Exemplo n.º 1
0
  def get_new_tweets(self, keywords: list) -> None:
    '''
    Use the TwitterSearch lib to fetch tweets that match the given keywords.
    Pass tweets to the _store method to update the database.
    '''
    tweets = []
    if self.DEBUG: print("Searching for tweets with {} as keywords.".format(keywords)) # DEBUG
    try:
      tso = TwitterSearchOrder()
      tso.setKeywords(keywords)
      tso.setLanguage('en')
      tso.setCount(1)
      tso.setIncludeEntities(False)

      ts = TwitterSearch(
          consumer_key = 'YOUR STUFF HERE',
          consumer_secret = 'YOUR STUFF HERE',
          access_token = 'YOUR STUFF HERE',
          access_token_secret = 'YOUR STUFF HERE'
        )
      ts.authenticate()
      for tweet in ts.searchTweetsIterable(tso):
        tweets.append(tweet)
    except TwitterSearchException as e:
      self.report_error(["TwitterSearchException",e])

    if self.DEBUG: print("Fetched {} new tweets with {} as keywords.".format(len(tweets),keywords)) # DEBUG
    self._store(tweets, keywords)
Exemplo n.º 2
0
def fetch_tweets(search_request):
    """
    fetches tweets from Twitter API extracts urls and updates db
    """
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([search_request])  # define search request
        tso.setCount(settings.tweets_per_page)  # only results_per_page
        tso.setIncludeEntities(True)  # give us entity information

        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key=twitter.TWITTER_CONSUMER_KEY,
            consumer_secret=twitter.TWITTER_CONSUMER_SECRET,
            access_token=twitter.TWITTER_ACCESS_TOKEN,
            access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET
        )

        ts.authenticate()  # user must authenticate first
        tweets = ts.searchTweetsIterable(tso)
        found_urls = extract_urls(tweets)
        search_keyword_object = SearchKeyWord()
        search_keyword_object.gifs = found_urls
        search_keyword_object.search_keyword = search_request
        search_keyword_object.updated_at = datetime.now()
        print(search_keyword_object)
        search_keyword_object.save()
        return found_urls

    except TwitterSearchException, e:  # to take care of errors
        message = e.message
Exemplo n.º 3
0
def fetch_tweets(search_request):
    """
    fetches tweets from Twitter API extracts urls and updates db
    """
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([search_request])  # define search request
        tso.setCount(settings.tweets_per_page)  # only results_per_page
        tso.setIncludeEntities(True)  # give us entity information

        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key=twitter.TWITTER_CONSUMER_KEY,
            consumer_secret=twitter.TWITTER_CONSUMER_SECRET,
            access_token=twitter.TWITTER_ACCESS_TOKEN,
            access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET)

        ts.authenticate()  # user must authenticate first
        tweets = ts.searchTweetsIterable(tso)
        found_urls = extract_urls(tweets)
        search_keyword_object = SearchKeyWord()
        search_keyword_object.gifs = found_urls
        search_keyword_object.search_keyword = search_request
        search_keyword_object.updated_at = datetime.now()
        print(search_keyword_object)
        search_keyword_object.save()
        return found_urls

    except TwitterSearchException, e:  # to take care of errors
        message = e.message
Exemplo n.º 4
0
def getTweetsForKeyword(keyword, last_id=None):
    """
    Get the (recent) tweets for a given keyword
    :param keyword: the query keyword
    :return: a list of tweets. List is empty if an error occurs
    """
    tweet_list = []

    try:
        print '*** Searching tweets for keyword:', keyword, ' ...'
        tso = TwitterSearchOrder()
        tso.setKeywords([keyword])
        tso.setLanguage('en')
        tso.setResultType('recent')
        tso.setCount(100)
        tso.setIncludeEntities(True)

        if last_id is not None:
            tso.setSinceID(last_id)

        ts = TwitterSearch(
            consumer_key=params.CONSUMER_KEY,
            consumer_secret=params.CONSUMER_SECRET,
            access_token=params.ACCESS_TOKEN,
            access_token_secret=params.ACCESS_TOKEN_SECRET
        )

        ts.authenticate()

        counter = 0

        for tweet in ts.searchTweetsIterable(tso):
            counter += 1
            tweet_list.append(tweet)
        print '*** Found a total of %i tweets for keyword:' % counter, keyword
        return tweet_list

    except TwitterSearchException, e:
        print "[ERROR]", e.message
        return tweet_list
try:
    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
    tso.setKeywords(['@vooruit']) # let's define all words we would like to have a look for
    #tso.setLanguage('nl') # we want to see German tweets only
    tso.setCount(1) # please dear Mr Twitter, only give us 1 results per page
    tso.setIncludeEntities(False) # and don't give us all those entity information

    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key, 
        consumer_secret, 
        access_token, 
        access_token_secret 
     )

    ts.authenticate() # we need to use the oauth authentication first to be able to sign messages

    counter  = 0 # just a small counter

    # Write mode creates a new file or overwrites the existing content of the file. 
    # Write mode will _always_ destroy the existing contents of a file.
    
    # This will create a new file or **overwrite an existing file**.
    f = open("/mnt/GT3/GT2 Projects/tweets.txt", "w")
    f.write ("user;tweet;image\n")
    for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
	
     		#print '@%s tweeted: %s' % (unescape(tweet['user']['screen_name']), unescape(tweet['text']))
		try:
			f.write(unescape(tweet['user']['screen_name'].encode('utf-8','ignore')))
			f.write (": ;")          		
    def Get_Data(self):
        
        MAX_PAGES = 15
        RESULTS_PER_PAGE = 100

        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.setKeywords(search_keywords) # let's define all words we would like to have a look for
        tso.setLanguage('en') # we want to see German tweets only
        tso.setCount(RESULTS_PER_PAGE) # please dear Mr Twitter, only give us 1 results per page
        tso.setIncludeEntities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
                           consumer_key = 'enter your key',
                           consumer_secret = 'enter your secret',
                           access_token = '81498230-z',
                           access_token_secret = 'z'
                           )
        ts.authenticate() # we need to use the oauth authentication first to be able to sign messages
        i=0
        no_users = 0
        no_tweets = 0
        search_results = []
        twitter_dict = dict()

        #Retweet pattern to detect retweets
        rt_patterns = re.compile(r"(RT|via)((?:\b\W*@\w+)+)", re.IGNORECASE)   
        rt_origins_list = []
        rt_origins = ''
        
        db = Create_Couchdb_Instance(database_name)
        while(True):
            tso.setKeywords([search_keywords[i]])
            for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)
                try:
                    user_id = tweet['user']['id_str']
                    tweet_text = tweet['text']
                    time_stmp = tweet['created_at']
                    try:
                        if 'media' in tweet['entities']:
                            if len(tweet['entities']['media'])>0:
                                im_id=tweet['entities']['media'][0]['id']
                                im_url=tweet['entities']['media'][0]['expanded_url']
                        if 'urls' in tweet['entities']:
                            if len(tweet['entities']['urls'])>0:
                                urls=tweet['entities']['urls']
                        if 'user_mentions' in tweet['entities']:
                            if len(tweet['entities']['user_mentions'])>0:
                                mentions=tweet['entities']['user_mentions']
                    except:
                        im_id=''
                        im_url=''
                        urls=''
                        mentions=''
                    # If screen name is not present , add all the details        
                    if user_id not in db:                                     
                        twitter_dict=dict()
                        twitter_dict['_id'] = user_id
                        twitter_dict['text']=[{'urls':urls,'image':[im_id,im_url],'mentions':mentions,'tweet_id':tweet['id'],'text':tweet_text,'timestamp':time_stmp,'coordinates':tweet['coordinates'],'source':tweet['source'],'in_reply_to_screen_name':tweet['in_reply_to_screen_name'],'retweet_count':tweet['retweet_count']}]
                        twitter_dict['friends_count'] = tweet['user']['friends_count']
                        twitter_dict['location'] = tweet['user']['location']
                        twitter_dict['profile_description'] = tweet['user']['description']
                        twitter_dict['tweet_count'] = tweet['user']['statuses_count']
                        twitter_dict['followers_count'] = tweet['user']['followers_count']
                        twitter_dict['screen_name'] = tweet['user']['screen_name']
                        twitter_dict['profile_created_at'] = tweet['user']['created_at']
                        no_users += 1
                        no_tweets += 1
                        db.create(twitter_dict)
                    # If screen name is present then check if the tweet is present otherwise append the tweet details
                    else:
                        doc = db[user_id]
                        tweets = {}
                        for tweet_dict in doc['text']:
                            tweets[tweet_dict['text']] = 'tweet'
                        if tweet_text not in tweets:
                            doc['text'].append({'urls':urls,'image':[im_id,im_url],'mentions':mentions,'tweet_id':tweet['id'],'text':tweet_text,'timestamp':time_stmp,'coordinates':tweet['coordinates'],'source':tweet['source'],'in_reply_to_screen_name':tweet['in_reply_to_screen_name'],'retweet_count':tweet['retweet_count']})
                            no_tweets += 1
                            db[user_id] = doc
                    print 'No of users {}, No. of Tweets: {}, Time: {}'.format(no_users,no_tweets,strftime("%Y-%m-%d %H:%M:%S"))
                    write_log(no_users,no_tweets)
                except Exception as e:
                    print 'Exception: {}'.format(r)

            i = i+1
            # In the new API version 1.1 , the rate limits are fixed per 15 mins instead of per hour
            if i == len(search_keywords):
                i=0
                print "Sleeping for 15 mins"
                time.sleep(900)                                                      
Exemplo n.º 7
0
try:
    tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
    tso.setKeywords([
        'FCX', 'Freeport McMoran'
    ])  # let's define all words we would like to have a look for
    tso.setLanguage('en')  # we want to see German tweets only
    tso.setCount(1)  # please dear Mr Twitter, only give us 1 results per page
    tso.setIncludeEntities(
        False)  # and don't give us all those entity information

    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key='ug5WVZqqCD9vY8gY9SVCQ',
        consumer_secret='pr1yfm0oTNbGtZnL1bd5R3Ybl4fr9NteDgDnyjpwaA',
        access_token='160739524-e0k14cLQGqaQalNKWOhp6QApGGKMYKqJMzZwzohA',
        access_token_secret='Ml2mEkEr5FSd8ymcEZrQh5gtwWeQSkvtXekK7GZHQR4')

    ts.authenticate(
    )  # we need to use the oauth authentication first to be able to sign messages

    counter = 0  # just a small counter
    for tweet in ts.searchTweetsIterable(
            tso):  # this is where the fun actually starts :)
        counter += 1
        print '@%s tweeted: %s' % (tweet['user']['screen_name'],
                                   tweet['created_at'])  #, tweet['date'])

    print '*** Found a total of %i tweets' % counter

except TwitterSearchException, e:  # take care of all those ugly errors if there are some
    print e.message
def viewtwitterevents(request):
    '''
    Query the Twitter API for a given hashtag and date range, using these Twitter-Python bindings:
    https://github.com/ckoepp/TwitterSearch
    '''
    try:
        # Hashtag is required.
        hashtag = None
        query = None
        if 'hashtag' in request.GET and len(request.GET['hashtag']) > 0:
            hashtag = __sanitize_input(request.GET['hashtag'])
        else:
            raise Exception('Hashtag is required!')
        # If datetime range is supplied, use that, else return all events.
        dtStart = dtEnd = None
        strStart = strEnd = None
        if 'date_start' in request.GET and request.GET['date_start']:
            strStart = request.GET['date_start']
            dtStart = datetime.datetime.strptime(strStart, "%Y-%m-%d").date()
        if 'date_end' in request.GET and request.GET['date_end']:
            strEnd = request.GET['date_end']
            dtEnd = datetime.datetime.strptime(strEnd, "%Y-%m-%d").date()
        # Check for teh "no tweet cap" param.
        noTweetCap = False
        if 'no_tweet_cap' in request.GET and request.GET['no_tweet_cap'] == 'true':
            noTweetCap = True
        # First, check the cache for the Twitter API result.
        cacheKey = hashtag + '_' + strStart + '_' + strEnd
        secondaryCacheFilePath = settings.SECONDARY_CACHE_DIRECTORY + cacheKey + '.json' 
        response = cache.get(cacheKey)
        if response == None:
            if os.path.isfile(secondaryCacheFilePath):
                with open(secondaryCacheFilePath, "r") as textFile:
                    response = json.load(textFile)
        if response == None or len(response) == 0:
            totalEventCnt = 0
            totalEventCntThresh = 0
            events = []
            tsMax = 0
            tsMin = sys.maxint
            # Authenticate with Twitter API.
            tso = TwitterSearchOrder()
            tso.setLanguage('en')
            tso.setCount(100)
            tso.setIncludeEntities(False)
            tso.setResultType('recent')
            # Create a TwitterSearch object with our secret tokens
            twitterSearch = TwitterSearch(
                consumer_key=settings.TWITTER_CONSUMER_KEY,
                consumer_secret=settings.TWITTER_CONSUMER_SECRET,
                access_token=settings.TWITTER_ACCESS_TOKEN,
                access_token_secret=settings.TWITTER_ACCESS_TOKEN_SECRET
             )
            twitterSearch.authenticate()
            # Construct and run the twitter search query.
            if dtStart != None and dtEnd != None:
                query = hashtag
                tso.setUntil(dtEnd)
            else:
                query = hashtag
            tso.setKeywords([query])
            maxId = 0
            tweetCnt = MAX_INT32
            doLoop = True
            # Page through the Twitter search API results until we either get no results or we arrive at the start date.
            while (doLoop):
                # Exit conditions.
                if not doLoop:
                    break;
                if tweetCnt == 0:
                    break
                if maxId > 0:
                    tso.setMaxID(maxId)
                    tso.setKeywords([hashtag])
                # Reset counter.
                tweetCnt = 0
                # Reset last tweet.
                lastTweet = None
                # Create an additional retry loop for when Twitter refuses the next page.
                try:
                    for tweet in twitterSearch.searchTweetsIterable(tso):
                        dt = __getDateFromTweetCreatedAt(tweet['created_at'])
                        if dt.date() < dtStart:
                            doLoop = False
                            break;
                        ts = time.mktime(dt.timetuple())
                        if ts > tsMax:
                            tsMax = ts
                        if ts < tsMin:
                            tsMin = ts
                        lastTweet = tweet
                        # Copy search results to the Event list.
                        events.append(
                                      {
                                       'event_key': hashtag,
                                       'event_datetime': str(tweet['created_at']),
                                       'event_timestamp': ts,
                                       'event_value': tweet['text'],
                                       'event_tags': [hashtag],
                                       'raw_data': tweet
                                       }
                                      )
                        # Increment counter.
                        tweetCnt += 1
                        totalEventCnt += 1
                        totalEventCntThresh += 1
                        if totalEventCntThresh >= 1000:
                            print('Processed ' + str(totalEventCnt) + ' tweets.')
                            totalEventCntThresh = 0
                        # Exit conditions:
                        if not noTweetCap and totalEventCnt >= settings.TWITTER_SEARCH_API_TWEET_CAP:
                            doLoop = False
                            break
                except Exception as ex:
                    # Wait and then try last request again.
                    sleepDurationSeconds = 900  # 15 minutes.
                    print("Got exception when querying Twitter search API: " + ex.message)
                    # Save the portion of the events JSON.
                    #with open('C:/Dev/labs-heartbeat-visualization-framework/json-backup/' + cacheKey + '-part-' + str(totalEventCnt) + '.json', "w") as textFile:
                    with open(settings.SECONDARY_CACHE_DIRECTORY + cacheKey + '-part-' + str(totalEventCnt) + '.json', "w") as textFile:
                        textFile.write(json.dumps(events, default=json_util.default))
                    print("Sleeping for " + str(sleepDurationSeconds) + " seconds.")
                    time.sleep(sleepDurationSeconds)
                    # Reset the tweet counter to make sure we don't artificually trigger the loop exit condition.
                    tweetCnt = -1
                    print("Time to wake up and try again from maxId = " + str(maxId))
                if lastTweet != None:
                    maxId = long(lastTweet['id_str'])
            # Return the file list as JSON.
            response = {
                        'heartbeat_events': events,
                        'timestamp_max': tsMax,
                        'timestamp_min': tsMin,
                        'allowed_event_keys': [hashtag]
                        };
            # Now cache response.
            cache.set(cacheKey, response, 43200)  # 12 hours
            # Finally, store the events in a text file (TODO: I may remove this later).
            with open(secondaryCacheFilePath, "w") as textFile:
                textFile.write(json.dumps(response, default=json_util.default))
        ser = json.dumps(response, default=json_util.default)
        return HttpResponse(ser, mimetype="application/json")
    except Exception as ex:
        # Respond with error as JSON.
        return HttpResponse(ApiResponse.from_exception(ex).to_json(), mimetype="application/json")