def backfill_tweets(self): """ Get all tweets from the author of the source tweet since the last time this method was called, adding them to the database. """ name = self.source.user.screen_name new_tweets = search(['@'+name, 'from:'+name], self.last_tweet_id) if not new_tweets: return logger.info("Backfilling %s tweets for %s, last id %s" % (len(new_tweets), name, self.last_tweet_id)) new_tweets = [db.merge(tweet_to_Tweet(t)) for t in new_tweets] new_ids = [t.id for t in new_tweets] stray = [db.merge(t) for t in self.stray_tweets if t.id not in new_ids] logger.info("%s other stray tweets" % len(stray)) # doing the db.merge already added the tweets self.tweets.extend(stray) self.tweets.extend(new_tweets) self.update_last_tweet_id() self.stray_tweets = [] logger.info("%s total tweets in %s" % (len(self.tweets), self))
def query_twitter(how_long=0, interval=5): """ Interface function """ reset_location_cache() # can send 180 requests per 15 min = 5 sec start = time() # make sure we don't create duplicates. # keeping track of this ourselves saves many db hits # if we don't specify go indefinitely last_tweet_id = 0 while time() - start < how_long: tweets = search(search_terms, last_tweet_id) if not tweets: # if we dont get anything back, sleep and try again sleep(interval) continue # if a retrieved tweet has a loc/user with a matching ID already in the # db, that loc/user is updated instead of a new one added, bc of merge try: db.add_all([db.merge(tweet_to_Tweet(t)) for t in tweets]) db.commit() last_tweet_id = tweets[0]['id_str'] except OperationalError: pass sleep(interval)
def test_tweets_to_Tweets(): """ Sample tweets should be converted properly """ fields = ['text', 'id_str', 'user'] tweets = [util.tweet_to_Tweet(sample.tweets[0], fields), util.tweet_to_Tweet(sample.tweets[1], fields)] sample.assert_matching_tweets(tweets)
def rec(tweet): receive_tweet(incidents, search_queue, tweet_to_Tweet(tweet))