Пример #1
0
    def testParseTourneyLandingPage_updateTourneyNewDate(self, mock_add_queue):
        key = game_model.tourney_key_full('my-tourney')
        wrong_date_tourney = game_model.Tournament(
            last_modified_at=datetime.utcnow(),
            key=key,
            start_date=datetime(2016, 5, 31, 0, 0),
            end_date=datetime(2016, 5, 31, 0, 0),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ],
            url='%s%s' %
            (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'),
            id_str='my-tourney',
            name='my tourney')
        wrong_date_tourney.put()

        self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE)
        # Need to add the tourney URL to the URL as a parameter
        response = self.testapp.get(
            '/tasks/sr/list_tournament_details?name=my-tourney')
        self.assertEqual(200, response.status_int)

        calls = mock_add_queue.mock_calls
        self.assertEquals(1, len(calls))
        self.assertEquals(
            calls[0],
            mock.call(url='/tasks/sr/crawl_tournament',
                      method='GET',
                      params={
                          'url_suffix': 'schedule/Men/College-Men/',
                          'name': 'my-tourney',
                          'division': 'OPEN',
                          'age_bracket': 'COLLEGE'
                      },
                      queue_name='score-reporter'))

        got_tourney = key.get()
        # The tourney should now be updated with the new division that was added.
        want_tourney = game_model.Tournament(
            key=key,
            url='%s%s' %
            (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'),
            id_str='my-tourney',
            name='my tourney',
            image_url_https=got_tourney.image_url_https,
            start_date=datetime(2016, 3, 31, 0, 0),
            end_date=datetime(2016, 3, 31, 0, 0),
            last_modified_at=got_tourney.last_modified_at,
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ])
        self.assertEquals(got_tourney, want_tourney)
  def testParseTournamentInfo(self):
    """Verify we can parse the correct tourney info from the landing page."""
    url = 'http://a'
    content = self.testdata.GetLinkedTournamentLandingPage()
    start_date = datetime.datetime.strptime('8/29/2015', '%M/%d/%Y')
    end_date = datetime.datetime.strptime('8/30/2015:8', '%M/%d/%Y:%H')

    expected_subtourney = game_model.SubTournament(
        age_bracket=scores_messages.AgeBracket.NO_RESTRICTION,
        division=scores_messages.Division.OPEN)
    expected_sectionals = game_model.Tournament(
        id_str='', name='East New England Men\'s Sectionals', url=url,
        sub_tournaments=[expected_subtourney],
        start_date=start_date, end_date=end_date)
    actual_sectionals = self.crawler.ParseTournamentInfo(content, url, '',
        scores_messages.Division.OPEN,
        scores_messages.AgeBracket.NO_RESTRICTION)
    self.assertTrue('' == actual_sectionals.id_str)
    expected_sectionals.id_str = actual_sectionals.id_str
    self.assertEqual(expected_sectionals, actual_sectionals)

    content = self.testdata.GetMultiDivisionTournamentLandingPage()
    start_date = datetime.datetime.strptime('5/22/2015', '%M/%d/%Y')
    end_date = datetime.datetime.strptime('5/23/2015:8', '%M/%d/%Y:%H')
    expected_nationals = game_model.Tournament(
        id_str='', name='USA Ultimate D-I College Championships', url=url,
        sub_tournaments=[expected_subtourney],
        start_date=start_date, end_date=end_date)
    actual_nationals = self.crawler.ParseTournamentInfo(content, url, '',
        scores_messages.Division.OPEN, scores_messages.AgeBracket.COLLEGE)

    self.assertTrue('' == actual_nationals.id_str)
    expected_nationals.id_str = actual_nationals.id_str
    expected_sectionals.sub_tournaments[0].age_bracket = scores_messages.AgeBracket.COLLEGE
    self.assertEqual(expected_nationals, actual_nationals)
  def get(self):
    """Schedules crawling for each division on the tourney landing page."""
    url = self.request.get('name', '')
    if not url:
      WriteError('No tournament name specified', self.response)
      return

    response = FetchUsauPage(url)
    if response.status_code != 200:
      WriteError('Tourney page not found', self.response)
      return

    crawler = score_reporter_crawler.ScoreReporterCrawler()
    tournaments = crawler.GetDivisions(response.content)
    image_url = crawler.GetTourneyImageUrl(response.content)

    start_date, end_date = crawler.GetDates(response.content)

    full_url = '%s%s' % (USAU_URL_PREFIX, url)
    key = game_model.tourney_key_full(url)
    tourney_pb = game_model.Tournament(
        key=key, id_str=url, url=full_url, name=url.replace('-', ' '),
        start_date=start_date, end_date=end_date,
        image_url_https=image_url,
        last_modified_at=datetime.utcnow())
    crawl_url = '/tasks/sr/crawl_tournament'
    for tourney_info in tournaments:
      tourney_pb.sub_tournaments.append(
          game_model.SubTournament(
            division=tourney_info[0],
            age_bracket=tourney_info[1]))
      taskqueue.add(url=crawl_url, method='GET',
          params={'url_suffix': tourney_info[2], 'name': url,
            'division': tourney_info[0].name,
            'age_bracket': tourney_info[1].name},
          queue_name='score-reporter')

    existing_tourney = key.get()
    if not existing_tourney:
      tourney_pb.put()
      return
    changed = False
    if len(tourney_pb.sub_tournaments) > len(existing_tourney.sub_tournaments):
      changed = True
    if existing_tourney.start_date != tourney_pb.start_date:
      changed = True
    if existing_tourney.end_date != tourney_pb.end_date:
      changed = True
    if existing_tourney.image_url_https != tourney_pb.image_url_https:
      changed = True
    if changed:
      existing_tourney.image_url_https = tourney_pb.image_url_https
      existing_tourney.sub_tournaments = tourney_pb.sub_tournaments
      existing_tourney.last_modified_at = tourney_pb.last_modified_at
      existing_tourney.start_date = tourney_pb.start_date
      existing_tourney.end_date = tourney_pb.end_date
      existing_tourney.put()
Пример #4
0
    def testParseTourneyScores_updateDate(self, mock_add_queue):
        # Page with two teams, one of which has been added to the DB.
        self.SetHtmlResponse(FAKE_TOURNEY_SCORES_PAGE)
        params = {
            'url_suffix': 'schedule/Men/College-Men/',
            'name': 'my_tourney',
            'division': 'OPEN',
            'age_bracket': 'COLLEGE'
        }
        # Both teams, the tournament, and the game have already been added to the database.
        key = game_model.tourney_key_full(params['name'])
        now = datetime.utcnow()
        tourney = game_model.Tournament(
            last_modified_at=datetime(2016, 5, 25, 0, 0),
            key=key,
            has_started=False,
            start_date=datetime(2016, 5, 31, 0, 0),
            end_date=datetime(2016, 5, 31, 0, 0),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ],
            url='%s%s' %
            (score_reporter_handler.USAU_URL_PREFIX, params.get('name', '')),
            id_str=params.get('name', ''),
            name='my tourney')
        tourney.put()

        game_model.TeamIdLookup(score_reporter_id='123',
                                score_reporter_tourney_id=['8%3d']).put()
        game_model.TeamIdLookup(score_reporter_id='456',
                                score_reporter_tourney_id=['g%3d']).put()
        game_info = score_reporter_crawler.GameInfo(
            '71984', 'my_tourney', 'my_tourney', scores_messages.Division.OPEN,
            scores_messages.AgeBracket.COLLEGE)
        game_info.status = 'Unknown'
        game = game_model.Game.FromGameInfo(game_info, {})
        self.assertEquals(scores_messages.GameStatus.UNKNOWN, game.game_status)
        game.put()
        response = self.testapp.get('/tasks/sr/crawl_tournament',
                                    params=params)
        self.assertEqual(200, response.status_int)

        calls = mock_add_queue.mock_calls
        self.assertEquals(0, len(calls))

        db_game = game_model.game_key(game).get()
        self.assertEquals(scores_messages.GameStatus.FINAL,
                          db_game.game_status)
        db_tourney = key.get()
        # Tournament details should have been updated.
        self.assertEquals(True, db_tourney.has_started)
        self.assertTrue(db_tourney.last_modified_at >= now)
Пример #5
0
    def testTournamentSerialization(self):
        """Verify serialization between Tourney protobuf and ndb classes."""
        now = datetime.datetime.utcnow()
        now_txt = now.strftime(tweets.DATE_PARSE_FMT_STR)
        tourney = game_model.Tournament()
        tourney.name = 'name'
        tourney.url = 'url'
        tourney.id_str = '1234'
        tourney.start_date = now
        tourney.end_date = now
        tourney.last_modified_at = now
        tourney.image_url_https = 'https_url'
        tourney.sub_tournaments = [
            game_model.SubTournament(
                division=scores_messages.Division.OPEN,
                age_bracket=scores_messages.AgeBracket.NO_RESTRICTION),
            game_model.SubTournament(
                division=scores_messages.Division.WOMENS,
                age_bracket=scores_messages.AgeBracket.COLLEGE),
        ]
        tpb = tourney.ToProto()

        expected_tourney = scores_messages.Tournament()
        expected_tourney.name = 'name'
        expected_tourney.url = 'url'
        expected_tourney.id_str = '1234'
        expected_tourney.image_url_https = 'https_url'
        expected_tourney.start_date = now_txt
        expected_tourney.end_date = now_txt
        expected_tourney.last_modified_at = now_txt
        expected_tourney.divisions = [
            scores_messages.Division.WOMENS,
            scores_messages.Division.OPEN,
        ]
        expected_tourney.age_brackets = [
            scores_messages.AgeBracket.COLLEGE,
            scores_messages.AgeBracket.NO_RESTRICTION,
        ]
        self.assertEquals(expected_tourney, tpb)
Пример #6
0
    def testParseTourneyLandingPage(self, mock_add_queue):
        self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE)
        # Need to add the tourney URL to the URL as a parameter
        response = self.testapp.get(
            '/tasks/sr/list_tournament_details?name=my-tourney')
        self.assertEqual(200, response.status_int)

        calls = mock_add_queue.mock_calls
        self.assertEquals(1, len(calls))
        self.assertEquals(
            calls[0],
            mock.call(url='/tasks/sr/crawl_tournament',
                      method='GET',
                      params={
                          'url_suffix': 'schedule/Men/College-Men/',
                          'name': 'my-tourney',
                          'division': 'OPEN',
                          'age_bracket': 'COLLEGE'
                      },
                      queue_name='score-reporter'))

        key = game_model.tourney_key_full('my-tourney')
        got_tourney = key.get()
        url = '%s/%s' % ('https://play.usaultimate.org',
                         'assets/1/15/EventLogoDimension/TCTLogo_510x340.jpg')
        want_tourney = game_model.Tournament(
            key=key,
            url='%s%s' %
            (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'),
            start_date=datetime(2016, 3, 31, 0, 0),
            image_url_https=url,
            end_date=datetime(2016, 3, 31, 0, 0),
            id_str='my-tourney',
            name='my tourney',
            last_modified_at=got_tourney.last_modified_at,
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ])
        self.assertEquals(got_tourney, want_tourney)

        # Crawl it again. There should still only be one tourney in the db.
        self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE)
        response = self.testapp.get(
            '/tasks/sr/list_tournament_details?name=my-tourney')
        self.assertEqual(200, response.status_int)
        all_tourneys = game_model.Tournament.query().fetch()
        self.assertEquals(1, len(all_tourneys))
Пример #7
0
    def ParseTournamentInfo(self, content, url, id, division, age_bracket):
        """Parses the tournament info.

    Args:
      content: Full HTML contents of tourney landing page.
      url: URL of tourney landing page, relative to EVENT_PREFIX.
      id: id of tourney landing page.
      division: Text-format of Division protobuf ('OPEN', eg)
      age_bracket: Text-format of AgeBracket protobuf ('COLLEGE', eg)

    Returns:
      game_model.Tournament object that can be used for interacting with the
      datastore objects. Namely, the ID is unique and will be consistently
      returned for the same tournament.
    """
        parser = TournamentInfoParser()
        parser.feed(content)
        city, state = parser.get_location()

        # TODO(P2): make call to Maps API to get geo pt.
        #   *OR* use URL from FieldMap link. But only do this if the
        #   tournament (or sub-tournament) is new. In crawling a tourney
        #   like nationals, first one division will be added and then
        #   other divisions need to be added correctly.
        tourney = game_model.Tournament(
            id_str=id,
            name=parser.get_name(),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division(division),
                    age_bracket=scores_messages.AgeBracket(age_bracket))
            ],
            url=url)

        date_fmt_str = '%M/%d/%Y'
        # TODO(P2: use date/time appropriate for location)
        if parser.get_start_date():
            tourney.start_date = datetime.strptime(parser.get_start_date(),
                                                   date_fmt_str)

        # The end date needs to be after all the games are done.
        delta = timedelta(days=1, hours=8)
        if parser.get_end_date():
            tourney.end_date = datetime.strptime(parser.get_end_date(),
                                                 date_fmt_str) + delta

        return tourney
Пример #8
0
    def testGetTournaments(self, mock_add_queue, mock_app_identity):
        """Test non-trivial functionality of GetTournaments."""
        # Add 3 tournaments.
        # 1 with no games.
        name = 'no-games-tourney'
        key = game_model.tourney_key_full(name)
        tourney = game_model.Tournament(
            last_modified_at=datetime.utcnow(),
            key=key,
            has_started=False,
            start_date=datetime(2016, 5, 31, 0, 0),
            end_date=datetime(2016, 5, 31, 0, 0),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ],
            url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name),
            id_str=name,
            name=name)
        tourney.put()
        # 1 with all games that have not started.
        name = 'not-started-tourney'
        key = game_model.tourney_key_full(name)
        tourney = game_model.Tournament(
            last_modified_at=datetime.utcnow(),
            key=key,
            has_started=False,
            start_date=datetime(2016, 5, 31, 0, 0),
            end_date=datetime(2016, 5, 31, 0, 0),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ],
            url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name),
            id_str=name,
            name=name)
        tourney.put()
        game_model.Game(scores=[0, 0],
                        tournament_id=name,
                        id_str='a',
                        created_at=datetime.utcnow()).put()

        # 1 with in-progress games and one game that hasn't started.
        name = 'in-progress-tourney'
        key = game_model.tourney_key_full(name)
        tourney = game_model.Tournament(
            last_modified_at=datetime.utcnow(),
            key=key,
            has_started=True,
            start_date=datetime(2016, 5, 31, 0, 0),
            end_date=datetime(2016, 5, 31, 0, 0),
            sub_tournaments=[
                game_model.SubTournament(
                    division=scores_messages.Division.OPEN,
                    age_bracket=scores_messages.AgeBracket.COLLEGE)
            ],
            url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name),
            id_str=name,
            name=name)
        tourney.put()
        game_model.Game(scores=[0, 0],
                        tournament_id=name,
                        id_str='b',
                        created_at=datetime.utcnow()).put()
        game_model.Game(scores=[1, 2],
                        tournament_id=name,
                        id_str='c',
                        created_at=datetime.utcnow()).put()

        request = scores_messages.TournamentsRequest()
        response = self.api.GetTournaments(request)
        self.assertEquals(1, len(response.tournaments))
        self.assertEquals(name, response.tournaments[0].name)
        self.assertEquals(1, len(response.tournaments[0].games))