Exemplo n.º 1
0
    def setup_method(self, *args, **kwargs):
        self.results = {
            'game': 2,
            'boxscore_index': 'TBA/TBA201704040',
            'date': 'Tuesday, Apr 4',
            'datetime': datetime(2017, 4, 4),
            'game_number_for_day': 1,
            'location': AWAY,
            'opponent_abbr': 'TBR',
            'result': WIN,
            'runs_scored': 5,
            'runs_allowed': 0,
            'innings': 9,
            'record': '1-1',
            'rank': 3,
            'games_behind': 0.5,
            'winner': 'Sabathia',
            'loser': 'Odorizzi',
            'save': None,
            'game_duration': '3:07',
            'day_or_night': NIGHT,
            'attendance': 19366,
            'streak': '+'
        }
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)
        flexmock(Boxscore) \
            .should_receive('dataframe') \
            .and_return(pd.DataFrame([{'key': 'value'}]))
        flexmock(utils) \
            .should_receive('_todays_date') \
            .and_return(MockDateTime(YEAR, MONTH))

        self.schedule = Schedule('NYY')
Exemplo n.º 2
0
def schedule_df(team_name):
    team_obj = Schedule(team_name, year=2019)
    df = team_obj.dataframe
    df.reset_index(inplace=True)
    df.rename(columns={"index": "team_name"},inplace=True)
    df['team_name'] = team_name
    return df
Exemplo n.º 3
0
    def test_empty_page_return_no_games(self):
        flexmock(utils) \
            .should_receive('_no_data_found') \
            .once()
        flexmock(utils) \
            .should_receive('_get_stats_table') \
            .and_return(None)

        schedule = Schedule('NYY')

        assert len(schedule) == 0
Exemplo n.º 4
0
    def test_no_dataframes_extended_returns_none(self):
        flexmock(Schedule) \
            .should_receive('_pull_schedule') \
            .and_return(None)
        schedule = Schedule('HOU')

        fake_game = flexmock(dataframe_extended=None)
        fake_games = PropertyMock(return_value=fake_game)
        type(schedule).__iter__ = fake_games

        assert schedule.dataframe_extended is None
Exemplo n.º 5
0
    def test_invalid_dataframe_not_included_with_schedule_dataframes(self):
        # If a DataFrame is not valid, it should not be included with the
        # dataframes property. If no dataframes are present, the DataFrame
        # should return the default value of None.
        flexmock(Schedule) \
            .should_receive('_pull_schedule') \
            .and_return(None)
        schedule = Schedule('HOU')

        fake_game = flexmock(_runs_scored=None, _runs_allowed=None)
        fake_games = PropertyMock(return_value=fake_game)
        type(schedule).__iter__ = fake_games

        assert schedule.dataframe is None
    def test_invalid_dataframe_not_included_with_schedule_dataframes(self):
        # If a DataFrame is not valid, it should not be included with the
        # dataframes property. If no dataframes are present, a ValueError
        # should be raised.
        flexmock(Schedule) \
            .should_receive('_pull_schedule') \
            .and_return(None)
        schedule = Schedule('HOU')

        fake_game = flexmock(_runs_scored=None, _runs_allowed=None)
        fake_games = PropertyMock(return_value=fake_game)
        type(schedule).__iter__ = fake_games

        with pytest.raises(ValueError):
            schedule.dataframe
Exemplo n.º 7
0
    def test_mlb_invalid_default_year_reverts_to_previous_year(self,
                                                               *args,
                                                               **kwargs):
        results = {
            'game': 2,
            'boxscore_index': 'TBA/TBA201704040',
            'date': 'Tuesday, Apr 4',
            'datetime': datetime(2017, 4, 4),
            'game_number_for_day': 1,
            'location': AWAY,
            'opponent_abbr': 'TBR',
            'result': WIN,
            'runs_scored': 5,
            'runs_allowed': 0,
            'innings': 9,
            'record': '1-1',
            'rank': 3,
            'games_behind': 0.5,
            'winner': 'Sabathia',
            'loser': 'Odorizzi',
            'save': None,
            'game_duration': '3:07',
            'day_or_night': NIGHT,
            'attendance': 19366,
            'streak': '+'
        }
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)
        flexmock(Boxscore) \
            .should_receive('dataframe') \
            .and_return(pd.DataFrame([{'key': 'value'}]))
        flexmock(utils) \
            .should_receive('_find_year_for_season') \
            .and_return(2018)

        schedule = Schedule('NYY')

        for attribute, value in results.items():
            assert getattr(schedule[1], attribute) == value
Exemplo n.º 8
0
def get_day(str):
    return datetime.strptime(str, " %B %d, %Y").day


def to_duration(str):
    t1 = datetime.strptime(str, '%H:%M')
    t2 = datetime(1900, 1, 1)

    return (t1 - t2).total_seconds() / 60.0


today = date.today()
teams = Teams()
for team in teams:
    abr = team.abbreviation
    sched = Schedule(abr)

    #open csv
    #get the last row month and day
    fname = abr + '.csv'
    old_df = pd.read_csv(fname)
    new_rows = []

    last_month = int(old_df.tail(1)['month'])
    last_day = int(old_df.tail(1)['day'])
    last_date = datetime(today.year, last_month, last_day)

    for game in sched:
        if game.datetime > last_date and len(game.boxscore_index) == 16:
            new_rows.append(Boxscore(game.boxscore_index).dataframe)
Exemplo n.º 9
0
from sportsreference.mlb.teams import Teams
from sportsreference.mlb.boxscore import Boxscore
from sportsreference.mlb.schedule import Schedule
import pandas as pd

##### Single Team Work ######
# Clean up date format
sea_df = Schedule('SEA').dataframe_extended
sea_df['date'] = pd.to_datetime(sea_df.date)
sea_df['date'] = sea_df['date'].dt.strftime('%m/%d/%Y')

# Add Home and Away Team fields
home_team = []
away_team = []

for ind in sea_df.index:
    if sea_df['winner'][ind] == 'Home':
        home_team.append(sea_df['winning_abbr'][ind])
    else:
        home_team.append(sea_df['losing_abbr'][ind])

    if sea_df['winner'][ind] == 'Away':
        away_team.append(sea_df['winning_abbr'][ind])
    else:
        away_team.append(sea_df['losing_abbr'][ind])

sea_df.insert(81, 'home_team', home_team, True)
sea_df.insert(82, 'away_team', away_team, True)

sea_df['team'] = 'SEA'
Exemplo n.º 10
0
class TestMLBSchedule:
    @mock.patch('requests.get', side_effect=mock_pyquery)
    def setup_method(self, *args, **kwargs):
        self.results = {
            'game': 2,
            'boxscore_index': 'TBA/TBA201704040',
            'date': 'Tuesday, Apr 4',
            'datetime': datetime(2017, 4, 4),
            'game_number_for_day': 1,
            'location': AWAY,
            'opponent_abbr': 'TBR',
            'result': WIN,
            'runs_scored': 5,
            'runs_allowed': 0,
            'innings': 9,
            'record': '1-1',
            'rank': 3,
            'games_behind': 0.5,
            'winner': 'Sabathia',
            'loser': 'Odorizzi',
            'save': None,
            'game_duration': '3:07',
            'day_or_night': NIGHT,
            'attendance': 19366,
            'streak': '+'
        }
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)
        flexmock(Boxscore) \
            .should_receive('dataframe') \
            .and_return(pd.DataFrame([{'key': 'value'}]))
        flexmock(utils) \
            .should_receive('_todays_date') \
            .and_return(MockDateTime(YEAR, MONTH))

        self.schedule = Schedule('NYY')

    def test_mlb_schedule_returns_correct_number_of_games(self):
        assert len(self.schedule) == NUM_GAMES_IN_SCHEDULE

    def test_mlb_schedule_returns_requested_match_from_index(self):
        match_two = self.schedule[1]

        for attribute, value in self.results.items():
            assert getattr(match_two, attribute) == value

    def test_mlb_schedule_returns_requested_match_from_date(self):
        match_two = self.schedule(datetime(2017, 4, 4))

        for attribute, value in self.results.items():
            assert getattr(match_two, attribute) == value

    def test_mlb_schedule_returns_second_game_in_double_header(self):
        match_two = self.schedule(datetime(2017, 5, 14), 2)
        results = {
            'game': 35,
            'date': 'Sunday, May 14 (2)',
            'datetime': datetime(2017, 5, 14),
            'game_number_for_day': 2,
            'location': HOME,
            'opponent_abbr': 'HOU',
            'result': LOSS,
            'runs_scored': 7,
            'runs_allowed': 10,
            'innings': 9,
            'record': '22-13',
            'rank': 1,
            'games_behind': -0.5,
            'winner': 'Morton',
            'loser': 'Tanaka',
            'save': None,
            'game_duration': '3:49',
            'day_or_night': NIGHT,
            'attendance': 47883,
            'streak': '-'
        }

        for attribute, value in results.items():
            assert getattr(match_two, attribute) == value

    def test_mlb_schedule_dataframe_returns_dataframe(self):
        df = pd.DataFrame([self.results], index=['NYY'])

        match_two = self.schedule[1]
        # Pandas doesn't natively allow comparisons of DataFrames.
        # Concatenating the two DataFrames (the one generated during the test
        # and the expected one above) and dropping duplicate rows leaves only
        # the rows that are unique between the two frames. This allows a quick
        # check of the DataFrame to see if it is empty - if so, all rows are
        # duplicates, and they are equal.
        frames = [df, match_two.dataframe]
        df1 = pd.concat(frames).drop_duplicates(keep=False)

        assert df1.empty

    def test_mlb_schedule_dataframe_extended_returns_dataframe(self):
        df = pd.DataFrame([{'key': 'value'}])

        result = self.schedule[1].dataframe_extended

        frames = [df, result]
        df1 = pd.concat(frames).drop_duplicates(keep=False)

        assert df1.empty

    def test_mlb_schedule_all_dataframe_returns_dataframe(self):
        result = self.schedule.dataframe.drop_duplicates(keep=False)

        assert len(result) == NUM_GAMES_IN_SCHEDULE
        assert set(result.columns.values) == set(self.results.keys())

    def test_mlb_schedule_all_dataframe_extended_returns_dataframe(self):
        result = self.schedule.dataframe_extended

        assert len(result) == NUM_GAMES_IN_SCHEDULE

    def test_no_games_for_date_raises_value_error(self):
        with pytest.raises(ValueError):
            self.schedule(datetime.now())

    def test_empty_page_return_no_games(self):
        flexmock(utils) \
            .should_receive('_no_data_found') \
            .once()
        flexmock(utils) \
            .should_receive('_get_stats_table') \
            .and_return(None)

        schedule = Schedule('NYY')

        assert len(schedule) == 0

    def test_game_string_representation(self):
        game = self.schedule[0]

        assert game.__repr__() == 'Sunday, Apr 2 - TBR'

    def test_schedule_string_representation(self):
        expected = """Sunday, Apr 2 - TBR
Tuesday, Apr 4 - TBR
Wednesday, Apr 5 - TBR
Friday, Apr 7 - BAL
Saturday, Apr 8 - BAL
Sunday, Apr 9 - BAL
Monday, Apr 10 - TBR
Wednesday, Apr 12 - TBR
Thursday, Apr 13 - TBR
Friday, Apr 14 - STL
Saturday, Apr 15 - STL
Sunday, Apr 16 - STL
Monday, Apr 17 - CHW
Tuesday, Apr 18 - CHW
Wednesday, Apr 19 - CHW
Friday, Apr 21 - PIT
Saturday, Apr 22 - PIT
Sunday, Apr 23 - PIT
Wednesday, Apr 26 - BOS
Thursday, Apr 27 - BOS
Friday, Apr 28 - BAL
Saturday, Apr 29 - BAL
Sunday, Apr 30 - BAL
Monday, May 1 - TOR
Tuesday, May 2 - TOR
Wednesday, May 3 - TOR
Friday, May 5 - CHC
Saturday, May 6 - CHC
Sunday, May 7 - CHC
Monday, May 8 - CIN
Tuesday, May 9 - CIN
Thursday, May 11 - HOU
Friday, May 12 - HOU
Sunday, May 14 (1) - HOU
Sunday, May 14 (2) - HOU
Tuesday, May 16 - KCR
Wednesday, May 17 - KCR
Thursday, May 18 - KCR
Friday, May 19 - TBR
Saturday, May 20 - TBR
Sunday, May 21 - TBR
Monday, May 22 - KCR
Tuesday, May 23 - KCR
Wednesday, May 24 - KCR
Friday, May 26 - OAK
Saturday, May 27 - OAK
Sunday, May 28 - OAK
Monday, May 29 - BAL
Tuesday, May 30 - BAL
Wednesday, May 31 - BAL
Thursday, Jun 1 - TOR
Friday, Jun 2 - TOR
Saturday, Jun 3 - TOR
Sunday, Jun 4 - TOR
Tuesday, Jun 6 - BOS
Wednesday, Jun 7 - BOS
Thursday, Jun 8 - BOS
Friday, Jun 9 - BAL
Saturday, Jun 10 - BAL
Sunday, Jun 11 - BAL
Monday, Jun 12 - LAA
Tuesday, Jun 13 - LAA
Wednesday, Jun 14 - LAA
Thursday, Jun 15 - OAK
Friday, Jun 16 - OAK
Saturday, Jun 17 - OAK
Sunday, Jun 18 - OAK
Tuesday, Jun 20 - LAA
Wednesday, Jun 21 - LAA
Thursday, Jun 22 - LAA
Friday, Jun 23 - TEX
Saturday, Jun 24 - TEX
Sunday, Jun 25 - TEX
Monday, Jun 26 - CHW
Tuesday, Jun 27 - CHW
Wednesday, Jun 28 - CHW
Thursday, Jun 29 - CHW
Friday, Jun 30 - HOU
Saturday, Jul 1 - HOU
Sunday, Jul 2 - HOU
Monday, Jul 3 - TOR
Tuesday, Jul 4 - TOR
Wednesday, Jul 5 - TOR
Friday, Jul 7 - MIL
Saturday, Jul 8 - MIL
Sunday, Jul 9 - MIL
Friday, Jul 14 - BOS
Saturday, Jul 15 - BOS
Sunday, Jul 16 (1) - BOS
Sunday, Jul 16 (2) - BOS
Monday, Jul 17 - MIN
Tuesday, Jul 18 - MIN
Wednesday, Jul 19 - MIN
Thursday, Jul 20 - SEA
Friday, Jul 21 - SEA
Saturday, Jul 22 - SEA
Sunday, Jul 23 - SEA
Tuesday, Jul 25 - CIN
Wednesday, Jul 26 - CIN
Thursday, Jul 27 - TBR
Friday, Jul 28 - TBR
Saturday, Jul 29 - TBR
Sunday, Jul 30 - TBR
Monday, Jul 31 - DET
Tuesday, Aug 1 - DET
Wednesday, Aug 2 - DET
Thursday, Aug 3 - CLE
Friday, Aug 4 - CLE
Saturday, Aug 5 - CLE
Sunday, Aug 6 - CLE
Tuesday, Aug 8 - TOR
Wednesday, Aug 9 - TOR
Thursday, Aug 10 - TOR
Friday, Aug 11 - BOS
Saturday, Aug 12 - BOS
Sunday, Aug 13 - BOS
Monday, Aug 14 - NYM
Tuesday, Aug 15 - NYM
Wednesday, Aug 16 - NYM
Thursday, Aug 17 - NYM
Friday, Aug 18 - BOS
Saturday, Aug 19 - BOS
Sunday, Aug 20 - BOS
Tuesday, Aug 22 - DET
Wednesday, Aug 23 - DET
Thursday, Aug 24 - DET
Friday, Aug 25 - SEA
Saturday, Aug 26 - SEA
Sunday, Aug 27 - SEA
Monday, Aug 28 - CLE
Wednesday, Aug 30 (1) - CLE
Wednesday, Aug 30 (2) - CLE
Thursday, Aug 31 - BOS
Friday, Sep 1 - BOS
Saturday, Sep 2 - BOS
Sunday, Sep 3 - BOS
Monday, Sep 4 - BAL
Tuesday, Sep 5 - BAL
Thursday, Sep 7 - BAL
Friday, Sep 8 - TEX
Saturday, Sep 9 - TEX
Sunday, Sep 10 - TEX
Monday, Sep 11 - TBR
Tuesday, Sep 12 - TBR
Wednesday, Sep 13 - TBR
Thursday, Sep 14 - BAL
Friday, Sep 15 - BAL
Saturday, Sep 16 - BAL
Sunday, Sep 17 - BAL
Monday, Sep 18 - MIN
Tuesday, Sep 19 - MIN
Wednesday, Sep 20 - MIN
Friday, Sep 22 - TOR
Saturday, Sep 23 - TOR
Sunday, Sep 24 - TOR
Monday, Sep 25 - KCR
Tuesday, Sep 26 - TBR
Wednesday, Sep 27 - TBR
Thursday, Sep 28 - TBR
Friday, Sep 29 - TOR
Saturday, Sep 30 - TOR
Sunday, Oct 1 - TOR"""

        assert self.schedule.__repr__() == expected
Exemplo n.º 11
0
# already integrated with pandas

# print datafram for each team
for team in Teams():
    print(team.name)
    print(team.dataframe)
# use .loc[] to access a single ron

for team in Teams():
    print(team.name)
    schedule = team.schedule  # Request the current team's schedule
    for game in schedule:
        print(game.date, game.points_scored, game.points_allowed)
# different way to access schedule

houston_schedule = Schedule('HOU')
for game in houston_schedule:
    print(game.date, game.points_scored, game.points_allowed)

# get boxscores

game_data = Boxscore('BOS/BOS201808020')
print(game_data.away_runs, game_data.home_runs)
print(game_data.dataframe)

# different way to get boxscores

houston_schedule = Schedule('HOU')
for game in houston_schedule:
    print(game.boxscore_index)  # Prints the boxscore URI for each game
    # Returns an instance of the Boxscore class for this specific game
Exemplo n.º 12
0
            elif re.search(sPattern2, tLinkText):
                teams.append(tLinkText)
    else:
        for tLink in tLinks:
            tLinkText = tLink.text
            if re.search(sPattern1, tLinkText):
                teams.append(tLinkText)

    #removing duplicates
    teams = list(dict.fromkeys(teams))
    baseBall.append(teams)

    #loop through each baseball team, if any, and see if they are playing at home today
    if len(teams) > 0:
        for team in teams:
            sch = Schedule(mlbAbbr[team])
            for game in sch:
                if game.date == today and game.location == 'Home':
                    isPlaying.append(team)
    whosePlaying.append(isPlaying)

#creating the data frame
df = pd.DataFrame(list(
    zip(Names, States, Lat, Long, TZ, Pops, Area, Demonyms, featColl, baseBall,
        whosePlaying, weather)),
                  columns=[
                      'City Name', 'State', 'Latitude', 'Longitude',
                      'Time Zone', 'Population', 'Area', 'Demonyms',
                      'Notable Educational Insitiutiions', 'Baseball Team(s)',
                      'What team is playing at home today', 'the weather'
                  ])
Exemplo n.º 13
0
def predict_mlb_game(team1, team2):

    # FIELDS_TO_DROP = ['away_points', 'home_points', 'date', 'location',
    #               'losing_abbr', 'losing_name', 'winner', 'winning_abbr',
    #               'winning_name', 'home_ranking', 'away_ranking']

    # dataset = pd.DataFrame()
    # auburn_schedule = Schedule('AUBURN')
    # dataset = pd.concat([dataset, auburn_schedule.dataframe_extended])
    # # for team in teams:
    # #     dataset = pd.concat([dataset, team.schedule.dataframe_extended])
    # X = dataset.drop(FIELDS_TO_DROP, 1).dropna().drop_duplicates()
    # y = dataset[['home_points', 'away_points']].values
    # X_train, X_test, y_train, y_test = train_test_split(X, y)
    # parameters = {'bootstrap': False,
    #             'min_samples_leaf': 3,
    #             'n_estimators': 50,
    #             'min_samples_split': 10,
    #             'max_features': 'sqrt',
    #             'max_depth': 6}
    # model = RandomForestRegressor(**parameters)
    # model.fit(X_train, y_train)

    # predicted_scores = model.predict(X_test).astype(int).tolist()
    # results = {"predicted": predicted_scores, "actual": y_test.tolist()}
    # json_results = json.dumps(results)
    # return json_results

    dataset = {}
    teams = [team1, team2]
    for num, team in enumerate(teams):
        df = MLB_Schedule(team, year=2019).dataframe
        df = df[[
            'runs_scored'
        ]]  #started at 130 on August 24th 2019 - now on 147 Sep 7 hasnt run yet

        forecast_out = int(1)
        print(df.shape)
        df['Prediction'] = df[['runs_scored']].shift(-forecast_out)

        X = np.array(df.drop(['Prediction'], 1))
        X = preprocessing.scale(X)

        X_forecast = X[-forecast_out:]
        X = X[:-forecast_out]

        y = np.array(df['Prediction'])
        y = y[:-forecast_out]

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        clf = LinearRegression()
        clf.fit(X_train, y_train)

        confidence = clf.score(X_test, y_test)

        forecast_prediction = clf.predict(X_forecast)
        lists_of_forecast = forecast_prediction.tolist()
        if num == 0:
            dataset[team1] = {
                "confidence": confidence,
                "predicted_score": lists_of_forecast
            }
        else:
            dataset[team2] = {
                "confidence": confidence,
                "predicted_score": lists_of_forecast
            }

    json_forecast = json.dumps(dataset, default=str)

    return json_forecast