def get(self): list_id = self.request.get('list_id') if not list_id: msg = 'No list name specified' logging.warning(msg) self.response.write(msg) return last_tweet_id = self._LookupLatestTweet(list_id) crawl_state = CrawlState.FromRequest(self.request, last_tweet_id) # In parallel: look-up the latest set of games for this # division and cache it division, age_bracket, league = list_id_bimap.ListIdBiMap.GetStructuredPropertiesForList( crawl_state.list_id) backfill_date = ParseDate(self.request.get('backfill_date')) update_games_only = self.request.get('update_games_only') games_start = datetime.utcnow() if backfill_date: games_start = backfill_date + timedelta(weeks=1) # Query tweets for that week for this list if not update_games_only: tweet_query = tweets.Tweet.query( tweets.Tweet.from_list == list_id, tweets.Tweet.created_at > games_start - timedelta(weeks=1), tweets.Tweet.created_at < games_start).order().order(-tweets.Tweet.created_at) twts_future = tweet_query.fetch_async() # For Twitter, only pull up games for the last two weeks. twit_games_query = Game.query( Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.last_modified_at > games_start - timedelta(weeks=1), Game.last_modified_at < games_start).order(-Game.last_modified_at) twit_games_future = twit_games_query.fetch_async() tourney_ids = [] if league == League.USAU: tourneys_query = Tournament.query( Tournament.end_date < games_start + timedelta(days=3)) tourneys = tourneys_query.fetch(100) for tourney in tourneys: if not tourney.sub_tournaments: continue for st in tourney.sub_tournaments: if st.division == division and st.age_bracket == age_bracket: tourney_ids.append(tourney.id_str) if tourney_ids: # For SR, pull up games scheduled for a day in either direction. sr_games_query = Game.query(Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.tournament_id.IN(tourney_ids)) sr_games_future = sr_games_query.fetch_async() if not backfill_date: token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.ListStatuses( crawl_state.list_id, count=crawl_state.num_to_crawl, since_id=crawl_state.last_tweet_id, max_id=crawl_state.max_id, fake_data=self.request.get('fake_data')) except twitter_fetcher.FetchError as e: msg = 'Could not fetch statuses for list %s' % crawl_state.list_id logging.warning('%s: %s', msg, e) self.response.write(msg) # TODO: retry the request a fixed # of times return # Update the various datastores. twts, users = self.UpdateTweetDbWithNewTweets( json_obj, crawl_state) if backfill_date: if update_games_only: twts = [] else: twts = twts_future.get_result() users = {} existing_games = twit_games_future.get_result() if tourney_ids: sr_existing_games = sr_games_future.get_result() existing_games.extend(sr_existing_games) self.UpdateGames(twts, existing_games, users, division, age_bracket, league)
def get(self): list_id = self.request.get('list_id') if not list_id: msg = 'No list name specified' logging.warning(msg) self.response.write(msg) return last_tweet_id = self._LookupLatestTweet(list_id) crawl_state = CrawlState.FromRequest(self.request, last_tweet_id) # In parallel: look-up the latest set of games for this # division and cache it division, age_bracket, league = list_id_bimap.ListIdBiMap.GetStructuredPropertiesForList( crawl_state.list_id) backfill_date = ParseDate(self.request.get('backfill_date')) update_games_only = self.request.get('update_games_only') games_start = datetime.utcnow() if backfill_date: games_start = backfill_date + timedelta(weeks=1) # Query tweets for that week for this list if not update_games_only: tweet_query = tweets.Tweet.query( tweets.Tweet.from_list == list_id, tweets.Tweet.created_at > games_start - timedelta(weeks=1), tweets.Tweet.created_at < games_start).order( ).order(-tweets.Tweet.created_at) twts_future = tweet_query.fetch_async() # For Twitter, only pull up games for the last two weeks. twit_games_query = Game.query(Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.last_modified_at > games_start - timedelta(weeks=1), Game.last_modified_at < games_start).order( -Game.last_modified_at) twit_games_future = twit_games_query.fetch_async() tourney_ids = [] if league == League.USAU: tourneys_query = Tournament.query( Tournament.end_date < games_start + timedelta(days=3)) tourneys = tourneys_query.fetch(100) for tourney in tourneys: if not tourney.sub_tournaments: continue for st in tourney.sub_tournaments: if st.division == division and st.age_bracket == age_bracket: tourney_ids.append(tourney.id_str) if tourney_ids: # For SR, pull up games scheduled for a day in either direction. sr_games_query = Game.query(Game.division == division, Game.age_bracket == age_bracket, Game.league == league, Game.tournament_id.IN(tourney_ids)) sr_games_future = sr_games_query.fetch_async() if not backfill_date: token_manager = oauth_token_manager.OauthTokenManager() fetcher = twitter_fetcher.TwitterFetcher(token_manager) try: json_obj = fetcher.ListStatuses(crawl_state.list_id, count=crawl_state.num_to_crawl, since_id=crawl_state.last_tweet_id, max_id=crawl_state.max_id, fake_data=self.request.get('fake_data')) except twitter_fetcher.FetchError as e: msg = 'Could not fetch statuses for list %s' % crawl_state.list_id logging.warning('%s: %s', msg, e) self.response.write(msg) # TODO: retry the request a fixed # of times return # Update the various datastores. twts, users = self.UpdateTweetDbWithNewTweets(json_obj, crawl_state) if backfill_date: if update_games_only: twts = [] else: twts = twts_future.get_result() users = {} existing_games = twit_games_future.get_result() if tourney_ids: sr_existing_games = sr_games_future.get_result() existing_games.extend(sr_existing_games) self.UpdateGames(twts, existing_games, users, division, age_bracket, league)