def setup_method(self, *args, **kwargs):
        self.results = {
            'game': 2,
            'boxscore_index': '2017-11-14-21-kansas',
            'date': 'Tue, Nov 14, 2017',
            'time': '9:30 pm/est',
            'datetime': datetime(2017, 11, 14, 21, 30),
            'type': REGULAR_SEASON,
            'location': NEUTRAL,
            'opponent_abbr': 'kentucky',
            'opponent_name': 'Kentucky',
            'opponent_rank': 7,
            'opponent_conference': 'SEC',
            'result': WIN,
            'points_for': 65,
            'points_against': 61,
            'overtimes': 0,
            'season_wins': 2,
            'season_losses': 0,
            'streak': 'W 2',
            'arena': 'United Center'
        }
        flexmock(utils) \
            .should_receive('_todays_date') \
            .and_return(MockDateTime(YEAR, MONTH))
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)
        flexmock(Boxscore) \
            .should_receive('dataframe') \
            .and_return(pd.DataFrame([{'key': 'value'}]))

        self.schedule = Schedule('KANSAS')
Пример #2
0
 def _pull_team_stats(self):
     for year in range(2015, 2021):
         for team in self.teams:
             try:
                 self.df = self.df.append(Schedule(team, year).dataframe_extended)
             except HTTPError:
                 print("HTTP Error")
Пример #3
0
def check_if_game_today():
    game_today_bool = False
    now = datetime.datetime.now()
    # date_time = now.strftime("%Y-%m-%d %H:%M:%S")
    now_date_time = now.strftime("%Y-%m-%d")

    print("Current date:")
    print(now_date_time)
    print()

    sdsu_schedule = Schedule('san-diego-state')
    for game in sdsu_schedule:
        game_date = game.datetime.strftime("%Y-%m-%d")
        game_hour = game.datetime.strftime("%H")
        game_hour_int = int(game_hour)
        game_hour_pst = game_hour_int - 3
        if (now_date_time == game_date):
            print("Hey there's a game today!")
            print(game.datetime)
            print(game_date)
            print(game_hour_pst)
            print(game.opponent_name)
            game_today_bool = True
            break
    return game_today_bool




    
    def test_empty_page_return_no_games(self):
        flexmock(utils) \
            .should_receive('_no_data_found') \
            .once()
        flexmock(utils) \
            .should_receive('_get_stats_table') \
            .and_return(None)

        schedule = Schedule('KANSAS')

        assert len(schedule) == 0
Пример #5
0
    def test_no_dataframes_extended_returns_none(self):
        flexmock(Schedule) \
            .should_receive('_pull_schedule') \
            .and_return(None)
        schedule = Schedule('PURDUE')

        fake_game = flexmock(dataframe_extended=None)
        fake_games = PropertyMock(return_value=fake_game)
        type(schedule).__iter__ = fake_games

        assert schedule.dataframe_extended is None
Пример #6
0
def isNeutral(team, date):
    # team = teamNameConversion(team)
    team = nameConversion(team)
    # print(team)
    try:
        schedule = Schedule(team)
        neutral1 = ""
        for game in schedule:
            gameDate = game.date
            sameDate = dateTransform(gameDate, date)
            if sameDate:
                neutral1 = game.location
                print("same date!")
        if neutral1 == "Neutral":
            return True
    except:
        print("some error happened, possibly 404 not found")

    return False
    def test_invalid_default_year_reverts_to_previous_year(self,
                                                           *args,
                                                           **kwargs):
        results = {
            'game': 2,
            'boxscore_index': '2017-11-14-21-kansas',
            'date': 'Tue, Nov 14, 2017',
            'time': '9:30 pm/est',
            'datetime': datetime(2017, 11, 14, 21, 30),
            'type': REGULAR_SEASON,
            'location': NEUTRAL,
            'opponent_abbr': 'kentucky',
            'opponent_name': 'Kentucky',
            'opponent_rank': 7,
            'opponent_conference': 'SEC',
            'result': WIN,
            'points_for': 65,
            'points_against': 61,
            'overtimes': 0,
            'season_wins': 2,
            'season_losses': 0,
            'streak': 'W 2',
            'arena': 'United Center'
        }
        flexmock(utils) \
            .should_receive('_find_year_for_season') \
            .and_return(2018)
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)
        flexmock(Boxscore) \
            .should_receive('dataframe') \
            .and_return(pd.DataFrame([{'key': 'value'}]))

        schedule = Schedule('KANSAS')

        for attribute, value in results.items():
            assert getattr(schedule[1], attribute) == value
def get_remaining_schedule(conference):
    # remaining_schedule is a list of lists with the inner list being
    # the home first, followed by the away team (ie. [home, away])
    remaining_schedule = []
    current_records = {}
    conference_name_short = conference['name'].replace(' Conference', '')

    for team in teams_list(conference):
        schedule = Schedule(team)
        conference_wins = get_conference_wins(team)
        current_records[team] = conference_wins
        for game in schedule:
            # Find all conference matchups that the team hasn't played yet.
            if game.opponent_abbr in teams_list(conference) and \
               not game.points_for:
                if game.location == 'AWAY':
                    remaining_schedule.append([game.opponent_abbr, team])
                else:
                    remaining_schedule.append([team, game.opponent_abbr])
    remaining_schedule.sort()
    # Return a list of non-duplicate matches
    schedule = list(s for s, _ in itertools.groupby(remaining_schedule))
    return schedule, current_records
Пример #9
0

def dateTransform(date1, date2):
    datetime_object1 = datetime.strptime(date1, "%a, %b %d, %Y")
    # print(datetime_object1.date())
    date2 = date2[0:8]
    datetime_object2 = datetime.strptime(date2, "%Y%m%d")
    # print(datetime_object2.date())
    return datetime_object1.date() == datetime_object2.date()


# print(dateTransform("Sat, Mar 28, 2021", "20210328 12:30:00"))

# schedule testing

testschedule = Schedule("Gonzaga")
for game in testschedule:
    print("location : " + game.location)
    print("date : " + game.date)


# need to check typeof game.date/game.location, figure out how to transform date, get date (from comparison?)
def isNeutral(team, date):
    # team = teamNameConversion(team)
    team = nameConversion(team)
    # print(team)
    try:
        schedule = Schedule(team)
        neutral1 = ""
        for game in schedule:
            gameDate = game.date
Пример #10
0
import os
import pandas as pd

from sportsreference.ncaab.teams import Teams
from sportsreference.ncaab.schedule import Schedule

errors = []

for year in range(2010, 2020):
    year = str(year)
    print(year)
    for team in Teams(year=year):
        try:
            df = pd.DataFrame()
            index = 0
            schedule = Schedule(team.abbreviation, year=year)
            for game in schedule:
                temp_df = pd.DataFrame(
                    {
                        'year': year,
                        'arena': game.arena,
                        'location': game.location,
                        'team': team.abbreviation,
                        'opponent': game.opponent_abbr,
                        'team_points': game.points_for,
                        'opponent_points': game.points_against
                    },
                    index=[index])
                df = pd.concat([df, temp_df], ignore_index=True)
                index += 1
            output_path = os.path.join('data', 'sportsreference_data',
Пример #11
0
    def random_forest_regressor(self, year):
        """calculates score of each team to predict winner

        Using the stats in data, the regressor calculates a score for each team
        The team with the higher score is the predicted winner

        Args:
            year (int): Season year

        returns:
            team (team): the predicted winning team
        """
        #fields brought in by sports reference api that we don't want
        FIELDS_TO_DROP = ['away_points', 'home_points', 'date', 'location', 'losing_abbr',
                          'losing_name', 'winner', 'winning_abbr', 'winning_name', 'home_ranking',
                          'away_ranking', 'away_defensive_rebounds', 'home_defensive_rebounds',
                          'away_two_point_field_goal_attempts', 'away_two_point_field_goal_percentage',
                          'away_two_point_field_goals', 'home_two_point_field_goal_attempts',
                          'home_two_point_field_goal_percentage', 'home_two_point_field_goals', 'pace',
                          'away_defensive_rating', 'away_defensive_rebound_percentage',
                          'home_defensive_rating', 'home_defensive_rebound_percentage']

        #pull in the scores for all games played in a certain season for both teams
        team1_name = self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").replace("(", "").replace(")", "").replace("'", "")
        team2_name = self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").replace("(", "").replace(")", "").replace("'", "")
        if team1_name == "UC-Irvine":
            team1_name = "CALIFORNIA-IRVINE"
        if team2_name == "UC-Irvine":
            team2_name = "CALIFORNIA-IRVINE"

        team1_schedule = Schedule(team1_name, year)
        team2_schedule = Schedule(team2_name, year)
        #team1_schedule.dataframe_extended.to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\team1_schedule.xlsx', index=True)
        print("got schedules")

        team1_df = team1_schedule.dataframe_extended
        #print(team1_df.head())
        team1_df_home = team1_df[team1_df.index.str.contains(self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())]
        team1_df_away = team1_df[~team1_df.index.str.contains(self.team1.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())]

        team2_df = team2_schedule.dataframe_extended
        #print(team2_df.head())
        team2_df_home = team2_df[team2_df.index.str.contains(self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())]
        team2_df_away = team2_df[~team2_df.index.str.contains(self.team2.get_team_name().replace(" NCAA", "").replace(" ", "-").lower())]

        # box_file = "data.csv"
        # bs = ScheduleData(2019, box_file)
        # print("tester.py: ScheduleData initialization successful.")
        # data_bf = bs.box_df
        # print("Initialized data")

        # team1_df_home = data_bf[data_bf.iloc[:,0].str.contains(self.team1.get_team_name().replace(" NCAA","").replace(" ","-").lower())]
        # team1_df_away = data_bf[~data_bf.iloc[:,0].str.contains(self.team1.get_team_name().replace(" NCAA","").replace(" ","-").lower())]

        # #print(team1_df_home)

        # team2_df_home = data_bf[data_bf.iloc[:,0].str.contains(self.team2.get_team_name().replace(" NCAA","").replace(" ","-").lower())]
        # team2_df_away = data_bf[~data_bf.iloc[:,0].str.contains(self.team2.get_team_name().replace(" NCAA","").replace(" ","-").lower())]

        print("seperated home and away")
        #compile into one dataset
        dataset_1 = pd.concat([team1_df_home, team2_df_away])
        dataset_2 = pd.concat([team2_df_home, team1_df_away])

        # dataset_1.drop(dataset_1.columns[[0]], axis=1, inplace=True)
        # dataset_2.drop(dataset_2.columns[[0]], axis=1, inplace=True)
        print('concated proper dataframes')
        #create training sets from datasetf
        X_train_1 = dataset_1.drop(FIELDS_TO_DROP, 1).dropna().drop_duplicates()
        X_train_2 = dataset_2.drop(FIELDS_TO_DROP, 1).dropna().drop_duplicates()


        #print(X_train_1)
        #print(X_train_2)

        Y_train_1 = dataset_1[['home_points', 'away_points']]
        Y_train_2 = dataset_2[['home_points', 'away_points']]

        #print(Y_train_1)
        #print(Y_train_2)

        print('created training sets')
        #pd.DataFrame(X_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_train.xlsx', index=False)
        #pd.DataFrame(Y_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\Y_train.xlsx', index=False)

        while len(X_train_1) != len(Y_train_1):
            Y_train_1 = Y_train_1[:-1]

        while len(X_train_2) != len(Y_train_2):
            Y_train_2 = Y_train_2[:-1]

        #create the x test (need to create method)
        X_test_1 = self.get_regeressor_info(self.team1, self.team2)#team1.get_attributes() + team2.get_attributes
        X_test_2 = self.get_regeressor_info(self.team2, self.team1)#team1.get_attributes() + team2.get_attributes

        print('got test sets')
        #pd.DataFrame(X_train).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_train_{0}.xlsx'.format(self.team1.get_team_name()))
        #pd.DataFrame(X_test).to_excel(r'C:\Users\dr171\OneDrive\Documents\College\Spring2020\sd&d\RedTeamMarchMadness\X_test_{0}.xlsx'.format(self.team1.get_team_name()))
        #print(X_train)
        #print(X_test)
        #parameters for model (could use tweaking to improve accuracy in the future)

        parameters = {
            'bootstrap': True,
            'max_depth': 6,
            'max_features': None,
            'min_samples_leaf': 50,
            'min_samples_split': 12,
            'n_estimators': 100}
        #create model
        model_1 = RandomForestRegressor(**parameters)
        model_2 = RandomForestRegressor(**parameters)

        # print('started threading')
        # p1 = Process(target=model_1.fit,args = (X_train_1,Y_train_1,))
        # p1.start()
        # p2 = Process(target=model_2.fit,args = (X_train_2,Y_train_2,))
        # p2.start()
        # p1.join()
        # p2.join()
        print('finished join')

        model_1.fit(X_train_1, Y_train_1)
        model_2.fit(X_train_2, Y_train_2)

        #predict outcome of game based of season statistics for both teams
        spread_1 = model_1.predict(X_test_1).astype(int)
        spread_2 = model_2.predict(X_test_2).astype(int)

        print('predicted spreads')
        spread_1 = str(spread_1[0]).replace("[", "").replace("]", "").split(" ")
        spread_2 = str(spread_2[0]).replace("[", "").replace("]", "").split(" ")
        
        team1_score = int(spread_1[0]) + int(spread_2[1])
        team2_score = int(spread_1[1]) + int(spread_2[0])

        print("Team 1 score", team1_score, self.team1.get_team_name())
        print("Team 2 score", team2_score, self.team2.get_team_name())


        if team1_score > team2_score:
            return self.team1
        if team1_score < team2_score:
            return self.team2
        if team1_score == team2_score:
            return random.choice([self.team1, self.team2])