Пример #1
0
def _retrieve_all_teams(year, season_file=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NBA stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    season_file : string (optional)
        Link with filename to the local season page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('nba')
        # Given the delays to the NBA season in 2020, the default season
        # selection logic is no longer valid after the original season should
        # have concluded. In this case, the previous season should be pulled
        # instead.
        if year == 2021:
            try:
                doc = pq(SEASON_PAGE_URL % year)
            except HTTPError:
                year = str(int(year) - 1)
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(SEASON_PAGE_URL % year, season_file)
    teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base')
    opp_teams_list = utils._get_stats_table(doc, 'div#all_opponent-stats-base')

    if not teams_list and not opp_teams_list:
        utils._no_data_found()
        return None, None
    for stats_list in [teams_list, opp_teams_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Пример #2
0
def _retrieve_all_teams(year, season_page, offensive_stats, defensive_stats):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NCAAF stats table and finds
    all requested stats. Each team then has a Team instance created which
    includes all requested stats and a few identifiers, such as the team's
    name and abbreviation. All of the individual Team instances are added
    to a list.

    Note that this method is called directly once Teams is invoked and does
    not need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    season_page : string (optional)
        Link with filename to the local season stats page.
    offensive_stats : string (optional)
        Link with filename to the local offensive stats page.
    defensive_stats : string (optional)
        Link with filename to the local defensive stats page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('ncaaf')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(SEASON_PAGE_URL % year, season_page)
    teams_list = utils._get_stats_table(doc, 'div#div_standings')
    offense_doc = utils._pull_page(OFFENSIVE_STATS_URL % year, offensive_stats)
    offense_list = utils._get_stats_table(offense_doc, 'table#offense')
    defense_doc = utils._pull_page(DEFENSIVE_STATS_URL % year, defensive_stats)
    defense_list = utils._get_stats_table(defense_doc, 'table#defense')
    if not teams_list and not offense_list and not defense_list:
        utils._no_data_found()
    for stats_list in [teams_list, offense_list, defense_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Пример #3
0
def _retrieve_all_teams(year, standings_file=None, teams_file=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified MLB stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    standings_file : string (optional)
        Link with filename to the local standings page.
    teams_file : string (optional)
        Link with filename to the local teams page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('mlb')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(STANDINGS_URL % year) and \
           utils._url_exists(STANDINGS_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(STANDINGS_URL % year, standings_file)
    div_prefix = 'div#all_expanded_standings_overall'
    standings = utils._get_stats_table(doc, div_prefix)
    doc = utils._pull_page(TEAM_STATS_URL % year, teams_file)
    div_prefix = 'div#all_teams_standard_%s'
    batting_stats = utils._get_stats_table(doc, div_prefix % 'batting')
    pitching_stats = utils._get_stats_table(doc, div_prefix % 'pitching')
    if not standings and not batting_stats and not pitching_stats:
        utils._no_data_found()
        return None, None
    for stats_list in [standings, batting_stats, pitching_stats]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Пример #4
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'DET' for the Detroit Pistons.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nba')
            # Given the delays to the NBA season in 2020, the default season
            # selection logic is no longer valid after the original season
            # should have concluded. In this case, the previous season should
            # be pulled instead.
            if year == 2021:
                try:
                    doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
                except HTTPError:
                    year = str(int(year) - 1)
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation, year))
        schedule = utils._get_stats_table(doc, 'table#games')
        if not schedule:
            utils._no_data_found()
            return
        self._add_games_to_schedule(schedule)
        if 'id="games_playoffs"' in str(doc):
            playoffs = utils._get_stats_table(doc, 'table#games_playoffs')
            self._add_games_to_schedule(playoffs, True)
Пример #5
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'PURDUE' for the Purdue Boilermakers.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('ncaab')
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#schedule')
        if not schedule:
            utils._no_data_found()
            return

        for item in schedule:
            if 'class="thead"' in str(item):
                continue
            game = Game(item)
            self._games.append(game)
Пример #6
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'NWE' for the New England Patriots.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nfl')
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year)
        if not schedule:
            utils._no_data_found()
            return
        self._add_games_to_schedule(schedule, REGULAR_SEASON, year)
        if 'playoff_gamelog%s' % year in str(doc):
            playoffs = utils._get_stats_table(doc,
                                              'table#playoff_gamelog%s' % year)
            self._add_games_to_schedule(playoffs, POST_SEASON, year)
Пример #7
0
def _retrieve_all_teams(year,
                        basic_stats=None,
                        basic_opp_stats=None,
                        adv_stats=None,
                        adv_opp_stats=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NCAAB stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Note that this method is called directly once Teams is invoked and does not
    need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    basic_stats : string (optional)
        Link with filename to the local basic stats page.
    basic_opp_stats : string (optional)
        Link with filename to the local basic opponent stats page.
    adv_stats : string (optional)
        Link with filename to the local advanved stats page.
    adv_opp_stats : string (optional)
        Link with filename to the local advanced opponents stats page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('ncaab')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(BASIC_STATS_URL % year) and \
           utils._url_exists(BASIC_STATS_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(BASIC_STATS_URL % year, basic_stats)
    teams_list = utils._get_stats_table(doc, 'table#basic_school_stats')
    doc = utils._pull_page(BASIC_OPPONENT_STATS_URL % year, basic_opp_stats)
    opp_list = utils._get_stats_table(doc, 'table#basic_opp_stats')
    doc = utils._pull_page(ADVANCED_STATS_URL % year, adv_stats)
    adv_teams_list = utils._get_stats_table(doc, 'table#adv_school_stats')
    doc = utils._pull_page(ADVANCED_OPPONENT_STATS_URL % year, adv_opp_stats)
    adv_opp_list = utils._get_stats_table(doc, 'table#adv_opp_stats')
    if not teams_list and not opp_list and not adv_teams_list \
       and not adv_opp_list:
        utils._no_data_found()
        return None, None
    for stats_list in [teams_list, opp_list, adv_teams_list, adv_opp_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Пример #8
0
    def test_invalid_url_exception_returns_false(self, *args, **kwargs):
        response = utils._url_exists('http://www.exception.com')

        assert not response
Пример #9
0
    def test_404_url_returns_false(self, *args, **kwargs):
        response = utils._url_exists('http://www.404.com/doesnt/exist')

        assert not response
Пример #10
0
    def test_valid_url_returns_true(self, *args, **kwargs):
        response = utils._url_exists('http://www.good_url.com/this/is/valid')

        assert response