Пример #1
0
    def test_nfl_boxscore_string_representation(self):
        expected = ('Boxscore for Houston Texans at Kansas City Chiefs '
                    '(Thursday Sep 10, 2020)')

        boxscore = Boxscore(BOXSCORE)

        assert boxscore.__repr__() == expected
Пример #2
0
    def test_nfl_boxscore_players(self):
        boxscore = Boxscore(BOXSCORE)

        assert len(boxscore.home_players) == 33
        assert len(boxscore.away_players) == 28

        for player in boxscore.home_players:
            assert not player.dataframe.empty
        for player in boxscore.away_players:
            assert not player.dataframe.empty
Пример #3
0
    def test_invalid_url_yields_empty_class(self):
        flexmock(Boxscore) \
            .should_receive('_retrieve_html_page') \
            .and_return(None)

        boxscore = Boxscore(BOXSCORE)

        for key, value in boxscore.__dict__.items():
            if key == '_uri':
                continue
            assert value is None
Пример #4
0
    def test_game_summary_with_no_scores_returns_none(self):
        result = Boxscore(None)._parse_summary(
            pq("""<table class="linescore nohover stats_table no_freeze">
    <tbody>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
    </tbody>
</table>"""))

        assert result == {'away': [None], 'home': [None]}
Пример #5
0
def game_data_from_year(year):
    """Load the data from a certain year. 
    
    Parameters
    ----------
    year : int
        The year to get the data from
    
    Returns
    -------
    DataFrame
        A DataFrame containing data for all the games in the year
    """
    # Weeks
    FIRST_WEEK = 1
    LAST_WEEK  = 21

    # Game data
    game_data = pd.DataFrame()
    
    try: 
        # Retrieve data for each week in the year
        for week in range(FIRST_WEEK, LAST_WEEK + 1):
            # Week data
            week_data = pd.DataFrame()

            # Retrieve data for each game in the week
            for game in range(len(Boxscores(week, year).games[str(week)+"-"+str(year)])):
                # Box score stats
                box_score = Boxscore(Boxscores(week, year).games[str(week)+"-"+str(year)][game]['boxscore']).dataframe

                # Add box score stats for the game to the data for
                # the week
                week_data = pd.concat([week_data, box_score])

            # Add a column for week number to the data for the week
            # and store the number of the current week
            week_data["week"] = [week]*len(Boxscores(week,year).games[str(week)+"-"+str(year)])

            # Add the data for the week to the game data for 
            # the year
            game_data = pd.concat([game_data, week_data])
    except:
        print("ERROR: Data loading failed")
        return -1
    else:
        return game_data
Пример #6
0
    def setup_method(self, *args, **kwargs):
        self.results = {
            'date': 'Thursday Sep 10, 2020',
            'time': '8:20pm',
            'datetime': datetime(2020, 9, 10, 20, 20),
            'stadium': 'Arrowhead Stadium',
            'attendance': 15895,
            'duration': '2:53',
            'winner': HOME,
            'winning_name': 'Kansas City Chiefs',
            'winning_abbr': 'KAN',
            'losing_name': 'Houston Texans',
            'losing_abbr': 'HTX',
            'won_toss': 'Chiefs (deferred)',
            'weather': '56 degrees, relative humidity 95%, wind 7 mph',
            'vegas_line': 'Kansas City Chiefs -9.5',
            'surface': 'Astroturf',
            'roof': 'Outdoors',
            'over_under': '53.5 (over)',
            'away_points': 20,
            'away_first_downs': 21,
            'away_rush_attempts': 22,
            'away_rush_yards': 118,
            'away_rush_touchdowns': 2,
            'away_pass_completions': 20,
            'away_pass_attempts': 32,
            'away_pass_yards': 253,
            'away_pass_touchdowns': 1,
            'away_interceptions': 1,
            'away_times_sacked': 4,
            'away_yards_lost_from_sacks': 11,
            'away_net_pass_yards': 242,
            'away_total_yards': 360,
            'away_fumbles': 0,
            'away_fumbles_lost': 0,
            'away_turnovers': 1,
            'away_penalties': 5,
            'away_yards_from_penalties': 37,
            'away_third_down_conversions': 4,
            'away_third_down_attempts': 10,
            'away_fourth_down_conversions': 1,
            'away_fourth_down_attempts': 1,
            'away_time_of_possession': '25:13',
            'home_points': 34,
            'home_first_downs': 28,
            'home_rush_attempts': 34,
            'home_rush_yards': 166,
            'home_rush_touchdowns': 1,
            'home_pass_completions': 24,
            'home_pass_attempts': 32,
            'home_pass_yards': 211,
            'home_pass_touchdowns': 3,
            'home_interceptions': 0,
            'home_times_sacked': 1,
            'home_yards_lost_from_sacks': 8,
            'home_net_pass_yards': 203,
            'home_total_yards': 369,
            'home_fumbles': 0,
            'home_fumbles_lost': 0,
            'home_turnovers': 0,
            'home_penalties': 1,
            'home_yards_from_penalties': 5,
            'home_third_down_conversions': 7,
            'home_third_down_attempts': 13,
            'home_fourth_down_conversions': 1,
            'home_fourth_down_attempts': 1,
            'home_time_of_possession': '34:47',
        }
        flexmock(utils) \
            .should_receive('_todays_date') \
            .and_return(MockDateTime(YEAR, MONTH))

        self.boxscore = Boxscore(BOXSCORE)
def retrieve_schedule(season, team, first_game, cred='credentials.json'):
    """Used to retrieve the schedule information for played games of a specified team and insert the info into the NFL
    DB in the local MySQL server.
    year: int of year the season started in, takes into account spillover into Jan/Feb
    team: Used to specify which team will be retrieved
    first_game: YYYY/MM/DD date in which the first game of the season is played, used for week calculation
    :return : None, uploads teams (home and away), final score, week, season, and date to games table
    """

    # establishing connection and cursor
    f = open(cred)

    login = json.load(f)

    engine = sql.create_engine(
        "mysql+pymysql://{}:{}@localhost:3306/nfl".format(
            str(login['userId']), str(login['password'])))

    f.close()

    # Establishing some necessary objects for iteration
    schedule = Team(team_name=team, year=season).schedule
    months_dict = {
        'January': '01',
        'February': '02',
        'March': '03',
        'April': '04',
        'May': '05',
        'June': '06',
        'July': '07',
        'August': '08',
        'September': '09',
        'October': '10',
        'November': '11',
        'December': '12'
    }

    # Iterating through each game and adding home games to the schedule
    for game in schedule:

        # creating the necessary url and retrieving box score
        game_split = str(game).split(' ')

        # handling for Jan/Feb spillover into the next year
        if str(game).split(' ')[0] == 'January' or str(game).split(
                ' ')[0] == 'February':
            game_id = str(season + 1) + months_dict[game_split[0]] + str(
                game_split[1].zfill(2)) + '0' + team.lower()

            boxscore = Boxscore(uri=game_id)
        else:
            game_id = str(season) + months_dict[game_split[0]] + str(
                game_split[1].zfill(2)) + '0' + team.lower()
            boxscore = Boxscore(uri=game_id)

        # boxscore url's are only generated for home games, so handling for such exceptions
        if boxscore.home_abbreviation == 'None':
            print(
                'Game data does not exist. Game is either away game for {} or is yet to be played.'
                .format(team))

        else:
            # Unique index already created to avoid duplicates in table when updating, error handling for this
            try:

                game_data = {
                    'game_id':
                    game_id,
                    'home':
                    str(boxscore.home_abbreviation).upper(),
                    'away':
                    str(boxscore.away_abbreviation).upper(),
                    'home_score':
                    boxscore.home_points,
                    'away_score':
                    boxscore.away_points,
                    'season':
                    season,
                    'week_no': ((boxscore.datetime - dt.datetime.strptime(
                        first_game, '%Y/%m/%d')).days // 7) + 1,
                    'game_date':
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d')
                }

                df = pd.DataFrame(game_data, index=[0])

                df.to_sql('dim_nfl_games',
                          con=engine,
                          index=False,
                          if_exists='append')

                print('Game inputted: {} vs. {} on {} (Week {})'.format(
                    str(boxscore.home_abbreviation),
                    str(boxscore.away_abbreviation),
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d'),
                    ((boxscore.datetime - dt.datetime.strptime(
                        first_game, '%Y/%m/%d')).days // 7) + 1))

            except sql.exc.IntegrityError:
                print('Game: {} vs. {} on {} already present in table.'.format(
                    str(boxscore.home_abbreviation),
                    str(boxscore.away_abbreviation),
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d')))

    print('Schedule inputted: {} for {}'.format(team, season))
Пример #8
0
 def boxscore(self):
     """
     Returns an instance of the Boxscore class containing more detailed
     stats on the game.
     """
     return Boxscore(self._boxscore)
Пример #9
0
    def setup_method(self, *args, **kwargs):
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)

        self.boxscore = Boxscore(None)
Пример #10
0
class TestNFLBoxscore:
    @patch('requests.get', side_effect=mock_pyquery)
    def setup_method(self, *args, **kwargs):
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)

        self.boxscore = Boxscore(None)

    def test_away_team_wins(self):
        fake_away_points = PropertyMock(return_value=28)
        fake_home_points = PropertyMock(return_value=21)
        type(self.boxscore)._away_points = fake_away_points
        type(self.boxscore)._home_points = fake_home_points

        assert self.boxscore.winner == AWAY

    def test_home_team_wins(self):
        fake_away_points = PropertyMock(return_value=21)
        fake_home_points = PropertyMock(return_value=28)
        type(self.boxscore)._away_points = fake_away_points
        type(self.boxscore)._home_points = fake_home_points

        assert self.boxscore.winner == HOME

    def test_winning_name_is_home(self):
        expected_name = 'Home Name'

        fake_winner = PropertyMock(return_value=HOME)
        fake_home_name = PropertyMock(return_value=MockName(expected_name))
        type(self.boxscore).winner = fake_winner
        type(self.boxscore)._home_name = fake_home_name

        assert self.boxscore.winning_name == expected_name

    def test_winning_name_is_away(self):
        expected_name = 'Away Name'

        fake_winner = PropertyMock(return_value=AWAY)
        fake_away_name = PropertyMock(return_value=MockName(expected_name))
        type(self.boxscore).winner = fake_winner
        type(self.boxscore)._away_name = fake_away_name

        assert self.boxscore.winning_name == expected_name

    def test_winning_abbr_is_home(self):
        expected_name = 'HOME'

        flexmock(utils) \
            .should_receive('_parse_abbreviation') \
            .and_return(expected_name)

        fake_winner = PropertyMock(return_value=HOME)
        type(self.boxscore).winner = fake_winner

        assert self.boxscore.winning_abbr == expected_name

    def test_winning_abbr_is_away(self):
        expected_name = 'AWAY'

        flexmock(utils) \
            .should_receive('_parse_abbreviation') \
            .and_return(expected_name)

        fake_winner = PropertyMock(return_value=AWAY)
        type(self.boxscore).winner = fake_winner

        assert self.boxscore.winning_abbr == expected_name

    def test_losing_name_is_home(self):
        expected_name = 'Home Name'

        fake_winner = PropertyMock(return_value=AWAY)
        fake_home_name = PropertyMock(return_value=MockName(expected_name))
        type(self.boxscore).winner = fake_winner
        type(self.boxscore)._home_name = fake_home_name

        assert self.boxscore.losing_name == expected_name

    def test_losing_name_is_away(self):
        expected_name = 'Away Name'

        fake_winner = PropertyMock(return_value=HOME)
        fake_away_name = PropertyMock(return_value=MockName(expected_name))
        type(self.boxscore).winner = fake_winner
        type(self.boxscore)._away_name = fake_away_name

        assert self.boxscore.losing_name == expected_name

    def test_losing_abbr_is_home(self):
        expected_name = 'HOME'

        flexmock(utils) \
            .should_receive('_parse_abbreviation') \
            .and_return(expected_name)

        fake_winner = PropertyMock(return_value=AWAY)
        type(self.boxscore).winner = fake_winner

        assert self.boxscore.losing_abbr == expected_name

    def test_losing_abbr_is_away(self):
        expected_name = 'AWAY'

        flexmock(utils) \
            .should_receive('_parse_abbreviation') \
            .and_return(expected_name)

        fake_winner = PropertyMock(return_value=HOME)
        type(self.boxscore).winner = fake_winner

        assert self.boxscore.losing_abbr == expected_name

    def test_game_summary_with_no_scores_returns_none(self):
        result = Boxscore(None)._parse_summary(
            pq("""<table class="linescore nohover stats_table no_freeze">
    <tbody>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
    </tbody>
</table>"""))

        assert result == {'away': [None], 'home': [None]}

    @patch('requests.get', side_effect=mock_pyquery)
    def test_invalid_url_returns_none(self, *args, **kwargs):
        result = Boxscore(None)._retrieve_html_page('bad')

        assert result is None

    def test_url_404_page_returns_none(self):
        result = Boxscore(None)._retrieve_html_page('404')

        assert result is None

    def test_no_class_information_returns_dataframe_of_none(self):
        mock_points = PropertyMock(return_value=None)
        type(self.boxscore)._home_points = mock_points
        type(self.boxscore)._away_points = mock_points

        assert self.boxscore.dataframe is None

    def test_empty_attribute_returns_none(self):
        fake_rushes = PropertyMock(return_value=None)
        type(self.boxscore)._away_rush_attempts = fake_rushes

        assert self.boxscore.away_rush_attempts is None

    def test_non_int_value_returns_none(self):
        fake_rushes = PropertyMock(return_value='bad')
        type(self.boxscore)._away_rush_attempts = fake_rushes

        assert self.boxscore.away_rush_attempts is None

    def test_nfl_game_information(self):
        fields = {
            'attendance': 62881,
            'date': 'Thursday Nov 8, 2018',
            'duration': '2:49',
            'stadium': 'Heinz Field',
            'time': '8:20pm'
        }

        mock_field = """Thursday Nov 8, 2018
Start Time: 8:20pm
Stadium: Heinz Field
Attendance: 62,881
Time of Game: 2:49
Logos via Sports Logos.net / About logos
"""

        m = MockBoxscoreData(MockField(mock_field))

        self.boxscore._parse_game_date_and_location(m)
        for field, value in fields.items():
            assert getattr(self.boxscore, field) == value

    def test_nfl_game_limited_information(self):
        fields = {
            'attendance': 22000,
            'date': 'Sunday Sep 8, 1940',
            'duration': None,
            'stadium': 'Forbes Field',
            'time': None
        }

        mock_field = """Sunday Sep 8, 1940
Stadium: Forbes Field
Attendance: 22,000
Logos via Sports Logos.net / About logos
"""

        m = MockBoxscoreData(MockField(mock_field))

        self.boxscore._parse_game_date_and_location(m)
        for field, value in fields.items():
            assert getattr(self.boxscore, field) == value

    def test_nfl_away_abbreviation(self):
        away_name = PropertyMock(return_value='<a href="/teams/kan/2018.htm" \
itemprop="name">Kansas City Chiefs</a>')
        type(self.boxscore)._away_name = away_name

        assert self.boxscore.away_abbreviation == 'kan'

    def test_nfl_home_abbreviation(self):
        home_name = PropertyMock(return_value='<a href="/teams/nwe/2018.htm" \
itemprop="name">New England Patriots</a>')
        type(self.boxscore)._home_name = home_name

        assert self.boxscore.home_abbreviation == 'nwe'

    def test_nfl_datetime_missing_time(self):
        date = PropertyMock(return_value='Sunday Oct 7, 2018')
        time = PropertyMock(return_value=None)
        type(self.boxscore)._date = date
        type(self.boxscore)._time = time

        assert self.boxscore.datetime == datetime(2018, 10, 7)

    def test_nfl_game_details(self):
        fields = {
            'won_toss': 'Dolphins',
            'roof': 'Outdoors',
            'surface': 'Fieldturf',
            'weather': '87 degrees, wind 4 mph',
            'vegas_line': 'Cincinnati Bengals -6.5',
            'over_under': '47.5 (under)'
        }

        mock_field = """<table id="game_info">
<tr><th data-stat="info">Won Toss</th><td data-stat="stat">Dolphins</td></tr>
<tr><th data-stat="info">Roof</th><td data-stat="stat">outdoors</td></tr>
<tr><th data-stat="info">Surface</th><td data-stat="stat">fieldturf </td></tr>
<tr><th data-stat="info">Duration</th><td data-stat="stat">3:02</td></tr>
<tr><th data-stat="info">Attendance</th><td data-stat="stat">52,708</td></tr>
<tr><th data-stat="info">Weather</th>
    <td data-stat="stat">87 degrees, wind 4 mph</td></tr>
<tr><th data-stat="info">Vegas Line</th>
    <td data-stat="stat">Cincinnati Bengals -6.5</td></tr>
<tr><th data-stat="info">Over/Under</th>
    <td data-stat="stat">47.5 <b>(under)</b></td></tr>
</table>
"""

        self.boxscore._parse_game_details(pq(mock_field))

        for field, value in fields.items():
            assert getattr(self.boxscore, field) == value

    def test_finding_home_team_with_no_abbrs(self):
        mock_html = pq('<td data-stat="team">KAN</td>')
        abbr = PropertyMock(return_value='KAN')
        self.boxscore._home_abbr = None
        self.boxscore._away_abbr = None
        type(self.boxscore).home_abbreviation = abbr
        team = self.boxscore._find_home_or_away(mock_html)

        assert team == HOME

    def test_finding_away_team_with_no_abbrs(self):
        mock_html = pq('<td data-stat="team">HTX</td>')
        abbr = PropertyMock(return_value='KAN')
        self.boxscore._home_abbr = None
        self.boxscore._away_abbr = None
        type(self.boxscore).home_abbreviation = abbr
        team = self.boxscore._find_home_or_away(mock_html)

        assert team == AWAY

    def test_missing_abbreviations(self):
        table = '<table id="team_stats"><thead></thead></table>'
        output = self.boxscore._alt_abbreviations(pq(table))

        assert output == (None, None)
Пример #11
0
    def test_url_404_page_returns_none(self):
        result = Boxscore(None)._retrieve_html_page('404')

        assert result is None
Пример #12
0
    def test_invalid_url_returns_none(self, *args, **kwargs):
        result = Boxscore(None)._retrieve_html_page('bad')

        assert result is None
    'interceptions', 'yards_returned_from_interception',
    'interceptions_returned_for_touchdown', 'longest_interception_return',
    'passes_defended', 'sacks', 'combined_tackles', 'solo_tackles',
    'assists_on_tackles', 'tackles_for_loss', 'quarterback_hits',
    'fumbles_recovered', 'yards_recovered_from_fumble',
    'fumbles_recovered_for_touchdown', 'fumbles_forced', 'kickoff_returns',
    'kickoff_return_yards', 'average_kickoff_return_yards',
    'kickoff_return_touchdown', 'longest_kickoff_return', 'punt_returns',
    'punt_return_yards', 'yards_per_punt_return', 'punt_return_touchdown',
    'longest_punt_return', 'extra_points_made', 'extra_points_attempted',
    'field_goals_made', 'field_goals_attempted', 'punts', 'total_punt_yards',
    'yards_per_punt', 'longest_punt'
])

for x in sb_app.keys():
    game_data = Boxscore(x)

    home_df = game_data.home_players[0].dataframe
    for player in game_data.home_players[1:]:
        home_df = pd.concat([home_df, player.dataframe], axis=0)
    home_df['name'] = [x.name for x in game_data.home_players]
    home_df['team'] = game_data.home_abbreviation
    home_df['season'] = sb_app[x]

    away_df = game_data.away_players[0].dataframe
    for player in game_data.away_players[1:]:
        away_df = pd.concat([away_df, player.dataframe], axis=0)
    away_df['name'] = [x.name for x in game_data.away_players]
    away_df['team'] = game_data.away_abbreviation
    away_df['season'] = sb_app[x]
def get_nfl_player_performance(season, cred='credentials.json'):
    # establishing connection and cursor
    f = open(cred)

    login = json.load(f)

    engine = sql.create_engine(
        "mysql+pymysql://{}:{}@localhost:3306/nfl".format(
            str(login['userId']), str(login['password'])))

    f.close()

    # Iterating through teams
    teams = Teams.Teams(year=season)

    for team in teams:
        team_id = team.abbreviation.upper()

        schedule = Team(team_name=team_id, year=season).schedule
        months_dict = {
            'January': '01',
            'February': '02',
            'March': '03',
            'April': '04',
            'May': '05',
            'June': '06',
            'July': '07',
            'August': '08',
            'September': '09',
            'October': '10',
            'November': '11',
            'December': '12'
        }

        # Iterating through the schedule of each one of those teams
        for game in schedule:
            game_split = str(game).split(' ')

            # handling for Jan/Feb spillover into the next year
            if str(game).split(' ')[0] == 'January' or str(game).split(
                    ' ')[0] == 'February':
                game_id = str(season + 1) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) + '0' \
                          + team_id.lower()
                boxscore = Boxscore(uri=game_id)

            else:
                game_id = uri = str(season) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) \
                                + '0' + team_id.lower()
                boxscore = Boxscore(uri=game_id)

            # boxscore url's are only generated for home games, so handling for such exceptions
            if boxscore.home_abbreviation == 'None':
                print(
                    'Game data does not exist. Game is either away game for {} or is yet to be played.'
                    .format(team))
                pass

            else:
                print("Inputting {} for team {}".format(game_id, team_id))

                # inputting home player performance
                for player in boxscore.home_players:
                    home_df = player.dataframe

                    home_df['game_id'] = game_id
                    home_df['team_id'] = team_id
                    home_df['player_id'] = home_df.index[0]

                    try:
                        home_df.to_sql('fact_nfl_performance',
                                       con=engine,
                                       index=False,
                                       if_exists='append')
                        print('Inputted: {} {} for game {}.'.format(
                            player.name, team_id, game_id))

                    except sql.exc.IntegrityError:
                        print(
                            'Player {} performance for {} already present in performance table.'
                            .format(player.name, game_id))

                    except AttributeError:
                        print('Exception: Empty String')

                # inputting away player performance
                for player in boxscore.away_players:

                    away_df = player.dataframe

                    away_df['game_id'] = game_id
                    away_df['team_id'] = str(
                        boxscore.away_abbreviation).upper()
                    away_df['player_id'] = away_df.index[0]

                    try:
                        away_df.to_sql('fact_nfl_performance',
                                       con=engine,
                                       index=False,
                                       if_exists='append')
                        print('Inputted: {} {} for game {}.'.format(
                            player.name, team_id, game_id))

                    except sql.exc.IntegrityError:
                        print(
                            'Player {} performance for {} already present in performance table.'
                            .format(player.name, game_id))

                    except AttributeError:
                        print('Exception: Empty String')