Пример #1
0
    def get(self):
        list_id = self.request.get('list_id')
        if not list_id:
            msg = 'No list name specified'
            logging.warning(msg)
            self.response.write(msg)
            return

        last_tweet_id = self._LookupLatestTweet(list_id)
        crawl_state = CrawlState.FromRequest(self.request, last_tweet_id)

        # In parallel: look-up the latest set of games for this
        # division and cache it
        division, age_bracket, league = list_id_bimap.ListIdBiMap.GetStructuredPropertiesForList(
            crawl_state.list_id)

        backfill_date = ParseDate(self.request.get('backfill_date'))
        update_games_only = self.request.get('update_games_only')
        games_start = datetime.utcnow()
        if backfill_date:
            games_start = backfill_date + timedelta(weeks=1)
            # Query tweets for that week for this list
            if not update_games_only:
                tweet_query = tweets.Tweet.query(
                    tweets.Tweet.from_list == list_id,
                    tweets.Tweet.created_at > games_start - timedelta(weeks=1),
                    tweets.Tweet.created_at <
                    games_start).order().order(-tweets.Tweet.created_at)
                twts_future = tweet_query.fetch_async()

        # For Twitter, only pull up games for the last two weeks.
        twit_games_query = Game.query(
            Game.division == division, Game.age_bracket == age_bracket,
            Game.league == league,
            Game.last_modified_at > games_start - timedelta(weeks=1),
            Game.last_modified_at < games_start).order(-Game.last_modified_at)
        twit_games_future = twit_games_query.fetch_async()

        tourney_ids = []
        if league == League.USAU:
            tourneys_query = Tournament.query(
                Tournament.end_date < games_start + timedelta(days=3))
            tourneys = tourneys_query.fetch(100)
            for tourney in tourneys:
                if not tourney.sub_tournaments:
                    continue
                for st in tourney.sub_tournaments:
                    if st.division == division and st.age_bracket == age_bracket:
                        tourney_ids.append(tourney.id_str)

        if tourney_ids:
            # For SR, pull up games scheduled for a day in either direction.
            sr_games_query = Game.query(Game.division == division,
                                        Game.age_bracket == age_bracket,
                                        Game.league == league,
                                        Game.tournament_id.IN(tourney_ids))
            sr_games_future = sr_games_query.fetch_async()

        if not backfill_date:
            token_manager = oauth_token_manager.OauthTokenManager()
            fetcher = twitter_fetcher.TwitterFetcher(token_manager)
            try:
                json_obj = fetcher.ListStatuses(
                    crawl_state.list_id,
                    count=crawl_state.num_to_crawl,
                    since_id=crawl_state.last_tweet_id,
                    max_id=crawl_state.max_id,
                    fake_data=self.request.get('fake_data'))
            except twitter_fetcher.FetchError as e:
                msg = 'Could not fetch statuses for list %s' % crawl_state.list_id
                logging.warning('%s: %s', msg, e)
                self.response.write(msg)

                # TODO: retry the request a fixed # of times
                return

            # Update the various datastores.
            twts, users = self.UpdateTweetDbWithNewTweets(
                json_obj, crawl_state)

        if backfill_date:
            if update_games_only:
                twts = []
            else:
                twts = twts_future.get_result()
            users = {}

        existing_games = twit_games_future.get_result()
        if tourney_ids:
            sr_existing_games = sr_games_future.get_result()
            existing_games.extend(sr_existing_games)
        self.UpdateGames(twts, existing_games, users, division, age_bracket,
                         league)
Пример #2
0
  def get(self):
    list_id = self.request.get('list_id')
    if not list_id:
      msg = 'No list name specified'
      logging.warning(msg)
      self.response.write(msg)
      return

    last_tweet_id = self._LookupLatestTweet(list_id)
    crawl_state = CrawlState.FromRequest(self.request, last_tweet_id)
    
    # In parallel: look-up the latest set of games for this
    # division and cache it
    division, age_bracket, league = list_id_bimap.ListIdBiMap.GetStructuredPropertiesForList(
        crawl_state.list_id)

    backfill_date = ParseDate(self.request.get('backfill_date'))
    update_games_only = self.request.get('update_games_only')
    games_start = datetime.utcnow()
    if backfill_date:
      games_start = backfill_date + timedelta(weeks=1)
      # Query tweets for that week for this list
      if not update_games_only:
        tweet_query = tweets.Tweet.query(
            tweets.Tweet.from_list == list_id,
            tweets.Tweet.created_at > games_start - timedelta(weeks=1),
            tweets.Tweet.created_at < games_start).order(
            ).order(-tweets.Tweet.created_at)
        twts_future = tweet_query.fetch_async()

    # For Twitter, only pull up games for the last two weeks.
    twit_games_query = Game.query(Game.division == division,
        Game.age_bracket == age_bracket,
        Game.league == league,
        Game.last_modified_at > games_start - timedelta(weeks=1),
        Game.last_modified_at < games_start).order(
            -Game.last_modified_at)
    twit_games_future = twit_games_query.fetch_async()

    tourney_ids = []
    if league == League.USAU:
      tourneys_query = Tournament.query(
          Tournament.end_date < games_start + timedelta(days=3))
      tourneys = tourneys_query.fetch(100)
      for tourney in tourneys:
        if not tourney.sub_tournaments:
          continue
        for st in tourney.sub_tournaments:
          if st.division == division and st.age_bracket == age_bracket:
            tourney_ids.append(tourney.id_str)

    if tourney_ids:
      # For SR, pull up games scheduled for a day in either direction.
      sr_games_query = Game.query(Game.division == division,
          Game.age_bracket == age_bracket,
          Game.league == league,
          Game.tournament_id.IN(tourney_ids))
      sr_games_future = sr_games_query.fetch_async()

    if not backfill_date:
      token_manager = oauth_token_manager.OauthTokenManager()
      fetcher = twitter_fetcher.TwitterFetcher(token_manager)
      try:
        json_obj = fetcher.ListStatuses(crawl_state.list_id, count=crawl_state.num_to_crawl,
            since_id=crawl_state.last_tweet_id, max_id=crawl_state.max_id,
            fake_data=self.request.get('fake_data'))
      except twitter_fetcher.FetchError as e:
        msg = 'Could not fetch statuses for list %s' % crawl_state.list_id
        logging.warning('%s: %s', msg, e)
        self.response.write(msg)

        # TODO: retry the request a fixed # of times
        return

      # Update the various datastores.
      twts, users = self.UpdateTweetDbWithNewTweets(json_obj, crawl_state)

    if backfill_date:
      if update_games_only:
        twts = []
      else:
        twts = twts_future.get_result()
      users = {}

    existing_games = twit_games_future.get_result()
    if tourney_ids:
      sr_existing_games = sr_games_future.get_result()
      existing_games.extend(sr_existing_games)
    self.UpdateGames(twts, existing_games, users, division, age_bracket, league)