def testParseTourneyLandingPage_updateTourneyNewDate(self, mock_add_queue): key = game_model.tourney_key_full('my-tourney') wrong_date_tourney = game_model.Tournament( last_modified_at=datetime.utcnow(), key=key, start_date=datetime(2016, 5, 31, 0, 0), end_date=datetime(2016, 5, 31, 0, 0), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ], url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'), id_str='my-tourney', name='my tourney') wrong_date_tourney.put() self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE) # Need to add the tourney URL to the URL as a parameter response = self.testapp.get( '/tasks/sr/list_tournament_details?name=my-tourney') self.assertEqual(200, response.status_int) calls = mock_add_queue.mock_calls self.assertEquals(1, len(calls)) self.assertEquals( calls[0], mock.call(url='/tasks/sr/crawl_tournament', method='GET', params={ 'url_suffix': 'schedule/Men/College-Men/', 'name': 'my-tourney', 'division': 'OPEN', 'age_bracket': 'COLLEGE' }, queue_name='score-reporter')) got_tourney = key.get() # The tourney should now be updated with the new division that was added. want_tourney = game_model.Tournament( key=key, url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'), id_str='my-tourney', name='my tourney', image_url_https=got_tourney.image_url_https, start_date=datetime(2016, 3, 31, 0, 0), end_date=datetime(2016, 3, 31, 0, 0), last_modified_at=got_tourney.last_modified_at, sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ]) self.assertEquals(got_tourney, want_tourney)
def testParseTournamentInfo(self): """Verify we can parse the correct tourney info from the landing page.""" url = 'http://a' content = self.testdata.GetLinkedTournamentLandingPage() start_date = datetime.datetime.strptime('8/29/2015', '%M/%d/%Y') end_date = datetime.datetime.strptime('8/30/2015:8', '%M/%d/%Y:%H') expected_subtourney = game_model.SubTournament( age_bracket=scores_messages.AgeBracket.NO_RESTRICTION, division=scores_messages.Division.OPEN) expected_sectionals = game_model.Tournament( id_str='', name='East New England Men\'s Sectionals', url=url, sub_tournaments=[expected_subtourney], start_date=start_date, end_date=end_date) actual_sectionals = self.crawler.ParseTournamentInfo(content, url, '', scores_messages.Division.OPEN, scores_messages.AgeBracket.NO_RESTRICTION) self.assertTrue('' == actual_sectionals.id_str) expected_sectionals.id_str = actual_sectionals.id_str self.assertEqual(expected_sectionals, actual_sectionals) content = self.testdata.GetMultiDivisionTournamentLandingPage() start_date = datetime.datetime.strptime('5/22/2015', '%M/%d/%Y') end_date = datetime.datetime.strptime('5/23/2015:8', '%M/%d/%Y:%H') expected_nationals = game_model.Tournament( id_str='', name='USA Ultimate D-I College Championships', url=url, sub_tournaments=[expected_subtourney], start_date=start_date, end_date=end_date) actual_nationals = self.crawler.ParseTournamentInfo(content, url, '', scores_messages.Division.OPEN, scores_messages.AgeBracket.COLLEGE) self.assertTrue('' == actual_nationals.id_str) expected_nationals.id_str = actual_nationals.id_str expected_sectionals.sub_tournaments[0].age_bracket = scores_messages.AgeBracket.COLLEGE self.assertEqual(expected_nationals, actual_nationals)
def get(self): """Schedules crawling for each division on the tourney landing page.""" url = self.request.get('name', '') if not url: WriteError('No tournament name specified', self.response) return response = FetchUsauPage(url) if response.status_code != 200: WriteError('Tourney page not found', self.response) return crawler = score_reporter_crawler.ScoreReporterCrawler() tournaments = crawler.GetDivisions(response.content) image_url = crawler.GetTourneyImageUrl(response.content) start_date, end_date = crawler.GetDates(response.content) full_url = '%s%s' % (USAU_URL_PREFIX, url) key = game_model.tourney_key_full(url) tourney_pb = game_model.Tournament( key=key, id_str=url, url=full_url, name=url.replace('-', ' '), start_date=start_date, end_date=end_date, image_url_https=image_url, last_modified_at=datetime.utcnow()) crawl_url = '/tasks/sr/crawl_tournament' for tourney_info in tournaments: tourney_pb.sub_tournaments.append( game_model.SubTournament( division=tourney_info[0], age_bracket=tourney_info[1])) taskqueue.add(url=crawl_url, method='GET', params={'url_suffix': tourney_info[2], 'name': url, 'division': tourney_info[0].name, 'age_bracket': tourney_info[1].name}, queue_name='score-reporter') existing_tourney = key.get() if not existing_tourney: tourney_pb.put() return changed = False if len(tourney_pb.sub_tournaments) > len(existing_tourney.sub_tournaments): changed = True if existing_tourney.start_date != tourney_pb.start_date: changed = True if existing_tourney.end_date != tourney_pb.end_date: changed = True if existing_tourney.image_url_https != tourney_pb.image_url_https: changed = True if changed: existing_tourney.image_url_https = tourney_pb.image_url_https existing_tourney.sub_tournaments = tourney_pb.sub_tournaments existing_tourney.last_modified_at = tourney_pb.last_modified_at existing_tourney.start_date = tourney_pb.start_date existing_tourney.end_date = tourney_pb.end_date existing_tourney.put()
def testParseTourneyScores_updateDate(self, mock_add_queue): # Page with two teams, one of which has been added to the DB. self.SetHtmlResponse(FAKE_TOURNEY_SCORES_PAGE) params = { 'url_suffix': 'schedule/Men/College-Men/', 'name': 'my_tourney', 'division': 'OPEN', 'age_bracket': 'COLLEGE' } # Both teams, the tournament, and the game have already been added to the database. key = game_model.tourney_key_full(params['name']) now = datetime.utcnow() tourney = game_model.Tournament( last_modified_at=datetime(2016, 5, 25, 0, 0), key=key, has_started=False, start_date=datetime(2016, 5, 31, 0, 0), end_date=datetime(2016, 5, 31, 0, 0), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ], url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, params.get('name', '')), id_str=params.get('name', ''), name='my tourney') tourney.put() game_model.TeamIdLookup(score_reporter_id='123', score_reporter_tourney_id=['8%3d']).put() game_model.TeamIdLookup(score_reporter_id='456', score_reporter_tourney_id=['g%3d']).put() game_info = score_reporter_crawler.GameInfo( '71984', 'my_tourney', 'my_tourney', scores_messages.Division.OPEN, scores_messages.AgeBracket.COLLEGE) game_info.status = 'Unknown' game = game_model.Game.FromGameInfo(game_info, {}) self.assertEquals(scores_messages.GameStatus.UNKNOWN, game.game_status) game.put() response = self.testapp.get('/tasks/sr/crawl_tournament', params=params) self.assertEqual(200, response.status_int) calls = mock_add_queue.mock_calls self.assertEquals(0, len(calls)) db_game = game_model.game_key(game).get() self.assertEquals(scores_messages.GameStatus.FINAL, db_game.game_status) db_tourney = key.get() # Tournament details should have been updated. self.assertEquals(True, db_tourney.has_started) self.assertTrue(db_tourney.last_modified_at >= now)
def testTournamentSerialization(self): """Verify serialization between Tourney protobuf and ndb classes.""" now = datetime.datetime.utcnow() now_txt = now.strftime(tweets.DATE_PARSE_FMT_STR) tourney = game_model.Tournament() tourney.name = 'name' tourney.url = 'url' tourney.id_str = '1234' tourney.start_date = now tourney.end_date = now tourney.last_modified_at = now tourney.image_url_https = 'https_url' tourney.sub_tournaments = [ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.NO_RESTRICTION), game_model.SubTournament( division=scores_messages.Division.WOMENS, age_bracket=scores_messages.AgeBracket.COLLEGE), ] tpb = tourney.ToProto() expected_tourney = scores_messages.Tournament() expected_tourney.name = 'name' expected_tourney.url = 'url' expected_tourney.id_str = '1234' expected_tourney.image_url_https = 'https_url' expected_tourney.start_date = now_txt expected_tourney.end_date = now_txt expected_tourney.last_modified_at = now_txt expected_tourney.divisions = [ scores_messages.Division.WOMENS, scores_messages.Division.OPEN, ] expected_tourney.age_brackets = [ scores_messages.AgeBracket.COLLEGE, scores_messages.AgeBracket.NO_RESTRICTION, ] self.assertEquals(expected_tourney, tpb)
def testParseTourneyLandingPage(self, mock_add_queue): self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE) # Need to add the tourney URL to the URL as a parameter response = self.testapp.get( '/tasks/sr/list_tournament_details?name=my-tourney') self.assertEqual(200, response.status_int) calls = mock_add_queue.mock_calls self.assertEquals(1, len(calls)) self.assertEquals( calls[0], mock.call(url='/tasks/sr/crawl_tournament', method='GET', params={ 'url_suffix': 'schedule/Men/College-Men/', 'name': 'my-tourney', 'division': 'OPEN', 'age_bracket': 'COLLEGE' }, queue_name='score-reporter')) key = game_model.tourney_key_full('my-tourney') got_tourney = key.get() url = '%s/%s' % ('https://play.usaultimate.org', 'assets/1/15/EventLogoDimension/TCTLogo_510x340.jpg') want_tourney = game_model.Tournament( key=key, url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, 'my-tourney'), start_date=datetime(2016, 3, 31, 0, 0), image_url_https=url, end_date=datetime(2016, 3, 31, 0, 0), id_str='my-tourney', name='my tourney', last_modified_at=got_tourney.last_modified_at, sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ]) self.assertEquals(got_tourney, want_tourney) # Crawl it again. There should still only be one tourney in the db. self.SetHtmlResponse(FAKE_TOURNEY_LANDING_PAGE) response = self.testapp.get( '/tasks/sr/list_tournament_details?name=my-tourney') self.assertEqual(200, response.status_int) all_tourneys = game_model.Tournament.query().fetch() self.assertEquals(1, len(all_tourneys))
def ParseTournamentInfo(self, content, url, id, division, age_bracket): """Parses the tournament info. Args: content: Full HTML contents of tourney landing page. url: URL of tourney landing page, relative to EVENT_PREFIX. id: id of tourney landing page. division: Text-format of Division protobuf ('OPEN', eg) age_bracket: Text-format of AgeBracket protobuf ('COLLEGE', eg) Returns: game_model.Tournament object that can be used for interacting with the datastore objects. Namely, the ID is unique and will be consistently returned for the same tournament. """ parser = TournamentInfoParser() parser.feed(content) city, state = parser.get_location() # TODO(P2): make call to Maps API to get geo pt. # *OR* use URL from FieldMap link. But only do this if the # tournament (or sub-tournament) is new. In crawling a tourney # like nationals, first one division will be added and then # other divisions need to be added correctly. tourney = game_model.Tournament( id_str=id, name=parser.get_name(), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division(division), age_bracket=scores_messages.AgeBracket(age_bracket)) ], url=url) date_fmt_str = '%M/%d/%Y' # TODO(P2: use date/time appropriate for location) if parser.get_start_date(): tourney.start_date = datetime.strptime(parser.get_start_date(), date_fmt_str) # The end date needs to be after all the games are done. delta = timedelta(days=1, hours=8) if parser.get_end_date(): tourney.end_date = datetime.strptime(parser.get_end_date(), date_fmt_str) + delta return tourney
def testGetTournaments(self, mock_add_queue, mock_app_identity): """Test non-trivial functionality of GetTournaments.""" # Add 3 tournaments. # 1 with no games. name = 'no-games-tourney' key = game_model.tourney_key_full(name) tourney = game_model.Tournament( last_modified_at=datetime.utcnow(), key=key, has_started=False, start_date=datetime(2016, 5, 31, 0, 0), end_date=datetime(2016, 5, 31, 0, 0), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ], url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name), id_str=name, name=name) tourney.put() # 1 with all games that have not started. name = 'not-started-tourney' key = game_model.tourney_key_full(name) tourney = game_model.Tournament( last_modified_at=datetime.utcnow(), key=key, has_started=False, start_date=datetime(2016, 5, 31, 0, 0), end_date=datetime(2016, 5, 31, 0, 0), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ], url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name), id_str=name, name=name) tourney.put() game_model.Game(scores=[0, 0], tournament_id=name, id_str='a', created_at=datetime.utcnow()).put() # 1 with in-progress games and one game that hasn't started. name = 'in-progress-tourney' key = game_model.tourney_key_full(name) tourney = game_model.Tournament( last_modified_at=datetime.utcnow(), key=key, has_started=True, start_date=datetime(2016, 5, 31, 0, 0), end_date=datetime(2016, 5, 31, 0, 0), sub_tournaments=[ game_model.SubTournament( division=scores_messages.Division.OPEN, age_bracket=scores_messages.AgeBracket.COLLEGE) ], url='%s%s' % (score_reporter_handler.USAU_URL_PREFIX, name), id_str=name, name=name) tourney.put() game_model.Game(scores=[0, 0], tournament_id=name, id_str='b', created_at=datetime.utcnow()).put() game_model.Game(scores=[1, 2], tournament_id=name, id_str='c', created_at=datetime.utcnow()).put() request = scores_messages.TournamentsRequest() response = self.api.GetTournaments(request) self.assertEquals(1, len(response.tournaments)) self.assertEquals(name, response.tournaments[0].name) self.assertEquals(1, len(response.tournaments[0].games))