示例#1
0
    def backfill_tweets(self):
        """ Get all tweets from the author of the source tweet since the last
        time this method was called, adding them to the database. """
        name = self.source.user.screen_name
        new_tweets = search(['@'+name, 'from:'+name], self.last_tweet_id)
        if not new_tweets:
            return
        logger.info("Backfilling %s tweets for %s, last id %s"
                % (len(new_tweets), name, self.last_tweet_id))

        new_tweets = [db.merge(tweet_to_Tweet(t)) for t in new_tweets]
        new_ids = [t.id for t in new_tweets]
        stray = [db.merge(t) for t in self.stray_tweets if t.id not in new_ids]
        logger.info("%s other stray tweets" % len(stray))
        # doing the db.merge already added the tweets

        self.tweets.extend(stray)
        self.tweets.extend(new_tweets)
        self.update_last_tweet_id()
        self.stray_tweets = []
        logger.info("%s total tweets in %s" % (len(self.tweets), self))
示例#2
0
def receive_tweet(incidents, search_queue, tweet):
    """ Take a tweet and process it, possibly adding it to an incident or
    creating a new one out of it """
    global last_search_time, search_interval

    logger.info("Received %s" % tweet)
    # disregard retweets
    if (tweet.retweet_of is not None): return

    tweet = db.merge(tweet)
    offered = False
    first_inactive_inc = None

    for inc in reversed(incidents):
        # stop when we reach the inactive incidents. if a new incident has
        # become inactive since we last added a tweet, refresh the tracked users
        # this way depends on the incidents list being sorted by inactive time
        if not inc.active():
            if inc is not first_inactive_inc:
                first_inactive_inc = inc
            break

        # try to add the tweet to an incident.
        if inc.offer_tweet(tweet):
            logger.info("Found %s for %s" % (inc, tweet))
            # TODO: check if it's okay to just discard this tweet because
            # incidents will find the tweet themselves when they backfill
            offered = True
            newlen = len(inc.tweets) + len(inc.stray_tweets)
            update_histo(newlen-1, newlen)

    # make incidents for any tweets unrelated to current incidents
    if not offered and incident_tweet(tweet):
        inc = Incident(tweet)
        incidents.append(inc)
        update_histo(0, 1)
        search_queue.put(inc)
        logger.info("Created incident for %s" % tweet)

    # every search-interval seconds, backfill the oldest-updated incident
    if time() - last_search_time >= search_interval:
        next_incident = search_queue.get()
        logger.info("Doing backfill on %s" % next_incident)
        oldlen = len(next_incident.tweets) + len(next_incident.stray_tweets)
        next_incident.backfill_tweets()
        db.commit()
        newlen = len(next_incident.tweets) + len(next_incident.stray_tweets)
        update_histo(oldlen, newlen)
        if next_incident.active:
            search_queue.put(next_incident)
        last_search_time = time()
    logger.info("%s incidents: %s" % (len(incidents), get_histo()))
示例#3
0
def query_twitter(how_long=0, interval=5):
    """ Interface function """
    reset_location_cache()
    # can send 180 requests per 15 min = 5 sec
    start = time()

    # make sure we don't create duplicates.
    # keeping track of this ourselves saves many db hits
    # if we don't specify go indefinitely
    last_tweet_id = 0
    while time() - start < how_long:
        tweets = search(search_terms, last_tweet_id)
        if not tweets: # if we dont get anything back, sleep and try again
            sleep(interval)
            continue
        # if a retrieved tweet has a loc/user with a matching ID already in the
        # db, that loc/user is updated instead of a new one added, bc of merge
        try:
            db.add_all([db.merge(tweet_to_Tweet(t)) for t in tweets])
            db.commit()
            last_tweet_id = tweets[0]['id_str']
        except OperationalError:
            pass
        sleep(interval)