def _retrieve_all_teams(year, season_file=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NBA stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Parameters ---------- year : string The requested year to pull stats from. season_file : string (optional) Link with filename to the local season page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('nba') # Given the delays to the NBA season in 2020, the default season # selection logic is no longer valid after the original season should # have concluded. In this case, the previous season should be pulled # instead. if year == 2021: try: doc = pq(SEASON_PAGE_URL % year) except HTTPError: year = str(int(year) - 1) # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(SEASON_PAGE_URL % year, season_file) teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base') opp_teams_list = utils._get_stats_table(doc, 'div#all_opponent-stats-base') if not teams_list and not opp_teams_list: utils._no_data_found() return None, None for stats_list in [teams_list, opp_teams_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, season_page, offensive_stats, defensive_stats): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NCAAF stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. season_page : string (optional) Link with filename to the local season stats page. offensive_stats : string (optional) Link with filename to the local offensive stats page. defensive_stats : string (optional) Link with filename to the local defensive stats page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('ncaaf') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(SEASON_PAGE_URL % year, season_page) teams_list = utils._get_stats_table(doc, 'div#div_standings') offense_doc = utils._pull_page(OFFENSIVE_STATS_URL % year, offensive_stats) offense_list = utils._get_stats_table(offense_doc, 'table#offense') defense_doc = utils._pull_page(DEFENSIVE_STATS_URL % year, defensive_stats) defense_list = utils._get_stats_table(defense_doc, 'table#defense') if not teams_list and not offense_list and not defense_list: utils._no_data_found() for stats_list in [teams_list, offense_list, defense_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, standings_file=None, teams_file=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified MLB stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Parameters ---------- year : string The requested year to pull stats from. standings_file : string (optional) Link with filename to the local standings page. teams_file : string (optional) Link with filename to the local teams page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('mlb') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(STANDINGS_URL % year) and \ utils._url_exists(STANDINGS_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(STANDINGS_URL % year, standings_file) div_prefix = 'div#all_expanded_standings_overall' standings = utils._get_stats_table(doc, div_prefix) doc = utils._pull_page(TEAM_STATS_URL % year, teams_file) div_prefix = 'div#all_teams_standard_%s' batting_stats = utils._get_stats_table(doc, div_prefix % 'batting') pitching_stats = utils._get_stats_table(doc, div_prefix % 'pitching') if not standings and not batting_stats and not pitching_stats: utils._no_data_found() return None, None for stats_list in [standings, batting_stats, pitching_stats]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'DET' for the Detroit Pistons. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nba') # Given the delays to the NBA season in 2020, the default season # selection logic is no longer valid after the original season # should have concluded. In this case, the previous season should # be pulled instead. if year == 2021: try: doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) except HTTPError: year = str(int(year) - 1) # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation, year)) schedule = utils._get_stats_table(doc, 'table#games') if not schedule: utils._no_data_found() return self._add_games_to_schedule(schedule) if 'id="games_playoffs"' in str(doc): playoffs = utils._get_stats_table(doc, 'table#games_playoffs') self._add_games_to_schedule(playoffs, True)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'PURDUE' for the Purdue Boilermakers. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('ncaab') # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#schedule') if not schedule: utils._no_data_found() return for item in schedule: if 'class="thead"' in str(item): continue game = Game(item) self._games.append(game)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'NWE' for the New England Patriots. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nfl') # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year) if not schedule: utils._no_data_found() return self._add_games_to_schedule(schedule, REGULAR_SEASON, year) if 'playoff_gamelog%s' % year in str(doc): playoffs = utils._get_stats_table(doc, 'table#playoff_gamelog%s' % year) self._add_games_to_schedule(playoffs, POST_SEASON, year)
def _retrieve_all_teams(year, basic_stats=None, basic_opp_stats=None, adv_stats=None, adv_opp_stats=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NCAAB stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. basic_stats : string (optional) Link with filename to the local basic stats page. basic_opp_stats : string (optional) Link with filename to the local basic opponent stats page. adv_stats : string (optional) Link with filename to the local advanved stats page. adv_opp_stats : string (optional) Link with filename to the local advanced opponents stats page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('ncaab') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(BASIC_STATS_URL % year) and \ utils._url_exists(BASIC_STATS_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(BASIC_STATS_URL % year, basic_stats) teams_list = utils._get_stats_table(doc, 'table#basic_school_stats') doc = utils._pull_page(BASIC_OPPONENT_STATS_URL % year, basic_opp_stats) opp_list = utils._get_stats_table(doc, 'table#basic_opp_stats') doc = utils._pull_page(ADVANCED_STATS_URL % year, adv_stats) adv_teams_list = utils._get_stats_table(doc, 'table#adv_school_stats') doc = utils._pull_page(ADVANCED_OPPONENT_STATS_URL % year, adv_opp_stats) adv_opp_list = utils._get_stats_table(doc, 'table#adv_opp_stats') if not teams_list and not opp_list and not adv_teams_list \ and not adv_opp_list: utils._no_data_found() return None, None for stats_list in [teams_list, opp_list, adv_teams_list, adv_opp_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def test_invalid_url_exception_returns_false(self, *args, **kwargs): response = utils._url_exists('http://www.exception.com') assert not response
def test_404_url_returns_false(self, *args, **kwargs): response = utils._url_exists('http://www.404.com/doesnt/exist') assert not response
def test_valid_url_returns_true(self, *args, **kwargs): response = utils._url_exists('http://www.good_url.com/this/is/valid') assert response