class Odds_Calculator: opener = None scraper = None algo = None universal = None user_agents = [] #can be nba, nhl, nfl, mlb league = "nba" num_periods = {'nba': 4, 'nhl': 3, 'nfl': 4, 'mlb': 9} def __init__(self, league): self.league = league.lower() self.universal = Universal_Functions(self.league) self.espn_scraper = ESPN_Scraper(self.league) #analyzes a single team def single_team_analysis(self, team): cur_year = input("Current season year: ") self.espn_scraper.update_data(team, cur_year) data = self.universal.load_data(team, "", cur_year) self.analyze(team, data, cur_year) #analyzes 2 teams and compares to determine which has best chance of winning def team_comparison(self, algo_version, team1, team2, date, cur_year): self.algo = Algo(self.league) self.espn_scraper.update_data(team1, cur_year) self.espn_scraper.update_data(team2, cur_year) data1 = self.universal.load_data(team1, date, cur_year) data2 = self.universal.load_data(team2, date, cur_year) returned1 = self.analyze2(team1, team2, data1, "away") returned2 = self.analyze2(team2, team1, data2, "home") # print(str(team1)+" | "+str(team2)) # print(returned1) # print(returned2) # print() if algo_version == "Algo_V1": algo_data = self.algo.calculate(date, returned1, returned2) elif algo_version == "Algo_V2": algo_data = self.algo.calculate_V2(date, returned1, returned2) record_points = algo_data['record_points'] home_away_points = algo_data['home_away_points'] home_away_10_games_points = algo_data['home_away_10_games_points'] last_10_games_points = algo_data['last_10_games_points'] avg_points = algo_data['avg_points'] avg_points_10_games = algo_data['avg_points_10_games'] # win_streak_10_games= algo_data['win_streak_10_games'] if self.league == "nhl": win_streak_home_away = algo_data['win_streak_home_away'] total = algo_data['total'] to_output = [] to_output.append("") to_output.append("Date: " + str(date)) to_output.append("Away: " + str(team1[1]) + " | Home: " + str(team2[1])) if algo_version == "Algo_V1": win_streak = algo_data['win_streak'] win_streak_home_away = algo_data['win_streak_home_away'] if self.league == "nba": to_output.append("Seasonal Record: " + str(record_points * 10) + "/10 = " + str(record_points)) to_output.append("Home Away: " + str(home_away_points * 10) + "/10 = " + str(home_away_points)) to_output.append("Home away 10: " + str(home_away_10_games_points * 5) + "/5 = " + str(home_away_10_games_points)) to_output.append("Last 10 games: " + str(last_10_games_points * 5) + "/5 = " + str(last_10_games_points)) to_output.append("Avg points: " + str(avg_points * 8) + "/8 = " + str(avg_points)) to_output.append("Avg points 10: " + str(avg_points_10_games * 8) + "/8 = " + str(avg_points_10_games)) to_output.append("Win streak: " + str(win_streak * 3) + "/3 = " + str(win_streak)) to_output.append("Win streak home away: " + str(win_streak_home_away * 3) + "/3 = " + str(win_streak_home_away)) else: to_output.append("Seasonal Record: " + str(record_points * 5) + "/5 = " + str(record_points)) to_output.append("Home Away: " + str(home_away_points * 5) + "/5 = " + str(home_away_points)) to_output.append("Home away 10: " + str(home_away_10_games_points * 5) + "/5 = " + str(home_away_10_games_points)) to_output.append("Last 10 games: " + str(last_10_games_points * 5) + "/5 = " + str(last_10_games_points)) to_output.append("Avg points: " + str(avg_points / 2) + "*2 = " + str(avg_points)) to_output.append("Avg points 10: " + str(avg_points_10_games / 2) + "*2 = " + str(avg_points_10_games)) to_output.append("Win streak: " + str(win_streak * 3) + "/3 = " + str(win_streak)) to_output.append("Win streak home away: " + str(win_streak_home_away * 3) + "/3 = " + str(win_streak_home_away)) to_output.append("--------") to_output.append("Total: " + str(total)) to_output.append("--------") elif algo_version == "Algo_V2": to_output.append("Seasonal Record: " + str(record_points) + "%") to_output.append("Home Away: " + str(home_away_points) + "%") to_output.append("Home away 10: " + str(home_away_10_games_points) + "%") to_output.append("Last 10 games: " + str(last_10_games_points) + "%") to_output.append("Avg points: " + str(avg_points) + "%") to_output.append("Avg points 10: " + str(avg_points_10_games) + "%") # to_output.append("Win streak: "+str(win_streak)+"%") if self.league == "nhl": to_output.append("Win streak home away: " + str(win_streak_home_away) + "%") to_output.append("--------") to_output.append("Total: " + str(total) + "%") to_output.append("--------") #chance of favorable team winning if algo_version == "Algo_V1": winning_odds = self.get_odds(total) elif algo_version == "Algo_V2": winning_odds = abs(total) to_output.append("Perc chance to win: " + str(winning_odds) + "%") favorable_odds = (100 / (100 - winning_odds) - 1) * 100 underdog_odds = (100 / (100 - winning_odds) - 1) * 100 to_output.append("Favorable team odds: -" + str(favorable_odds)) to_output.append("Underdog team odds: +" + str(underdog_odds)) return to_output #gets odds of winning for algo_V1 def get_odds(self, total_points): #puts total points at a max of 27 max_points = 27 if abs(total_points) > max_points: total_points = max_points x = abs(total_points) / max_points * 10 #2D polynomial that follows the percentage chance of winning per level of ranking 1-10 if self.league == "nba": y = -0.23 * (x**2) + 7.25 * x + 47.9 else: y = -0.23 * (x**2) + 7.25 * x + 47.9 if y < 50: y = 50 return y #analyzes current team def analyze(self, team, data, end_year): if os.path.isdir("./" + str(self.league) + "/analyze/single_analysis/" + str(team[1])) == False: os.mkdir("./" + str(self.league) + "/analyze/single_analysis/" + str(team[1])) home_away = input("Are they home or away: ").lower() other_team = input("Playing against (letter abbreviation): ") returned = self.analyze2(team, other_team, data, home_away) self.save_analysis(team, data, returned, home_away) returned['output'] = self.get_output_analysis("", team, returned, home_away) more_output = self.analyze_wins_ranked_teams(team, data, end_year) # more_output=[] for line in more_output: returned['output'].append(line) self.universal.save_to_txt( "./" + str(self.league) + "/analyze/single_analysis/" + str(team[1]) + "/" + str(team[1]) + "_analysis.txt", returned['output']) #analyzes whatever team needed for self.analyze() def analyze2(self, team, other_team, data, home_away): print("Analyzing " + str(team)) to_return = {} season_record = self.get_seasonal_records(data) # print("Season record: "+str(season_record)) # input("waiting...") #seasonal win-loss ratio to_return['seasonal_records'] = self.get_seasonal_records(data) #average point stats to_return['avg_game_points'] = self.get_avg_points(data) #stats in home vs away games to_return['home_away_record'] = self.get_home_away_record(data) #seasonal win-loss ratio to_return['current_win_ratio'] = self.get_current_win_ratio(data) #last 10 games win ratio to_return['10_game_win_ratio'] = self.analyze_10_games_win_ratio(data) #winning or losing streaks against specified team #definition only accepts "lal" and not ["lal", "los-angeles-lakers"], so check if isinstance(other_team, list): to_return[ 'win_loss_streaks_against'] = self.get_win_streaks_against( other_team[0], data) else: to_return[ 'win_loss_streaks_against'] = self.get_win_streaks_against( other_team, data) return to_return def save_analysis(self, team, data, returned, home_away): #seasonal win-loss ratio records = returned['seasonal_records'] to_save = [] for x in range(0, len(records)): to_save.append( ["1-1-" + str(data[x]['year']), records[x][0] - records[x][1]]) path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_seasonal_records.csv" self.universal.save_to_csv(path, to_save) print("Saved to " + str(path)) #average point stats avg_points = returned['avg_game_points'] to_save = [] for x in range(0, len(avg_points['avg_game_points'])): to_add = [] to_add.append("1-1-" + str(data[x]['year'])) to_add.append(avg_points['avg_game_points'][x]) to_add.append(avg_points['avg_other_game_points'][x]) to_add.append(avg_points['avg_game_points'][x] + avg_points['avg_other_game_points'][x]) for y in range(0, len(avg_points['avg_quarter_points'][x])): to_add.append(avg_points['avg_quarter_points'][x][y]) to_save.append(to_add) path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_avg_game_points.csv" self.universal.save_to_csv(path, to_save) print("Saved to " + str(path)) #stats in home vs away games home_away_records = returned['home_away_record'] to_save = [] for x in range(0, len(home_away_records['home_record'])): to_add = [] to_add.append("1-1-" + str(data[x]['year'])) to_add.append(home_away_records['home_record'][x][0]) to_add.append(home_away_records['home_record'][x][1]) to_save.append(to_add) to_save.append(["", "", ""]) to_save.append(["", "", ""]) to_save.append(["", "", ""]) for x in range(0, len(home_away_records['away_record'])): to_add = [] to_add.append("1-1-" + str(data[x]['year'])) to_add.append(home_away_records['away_record'][x][0]) to_add.append(home_away_records['away_record'][x][1]) to_save.append(to_add) path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_home_away_record.csv" self.universal.save_to_csv(path, to_save) print("Saved to " + str(path)) #seasonal win-loss ratio win_loss = returned['current_win_ratio'] path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_current_win_ratio.csv" self.universal.save_to_csv(path, win_loss) print(path) #last 10 games win ratio last_10_games = returned['10_game_win_ratio'] to_save = [] to_save.append(["Year", "win-loss", "num wins", "num games"]) for x in range(0, len(last_10_games)): for y in range(-10, 11, 2): to_add = [] #only has year at beginning of listing if y == -10: to_add.append(data[x]['year']) else: to_add.append("") # to_add.append(str(y)) temp = { '-10': '"0-10"', '-8': '"1-9"', '-6': '"2-8"', '-4': '"3-7"', '-2': '"4-6"', '0': '"5-5"', '2': '"6-4"', '4': '"7-3"', '6': '"8-2"', '8': '"9-1"', '10': '"10-0"' } #turns -4 into "3-7" to_add.append(temp[str(y)]) to_add.append(last_10_games[x][str(y)][0]) to_add.append(last_10_games[x][str(y)][1]) #gets win percentage if last_10_games[x][str(y)][1] != 0: to_add.append("=C" + str(len(to_save) + 1) + "/D" + str(len(to_save) + 1) + "*100") else: to_add.append(0) to_save.append(to_add) to_save.append(["", "", "", ""]) path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_10_game_win_ratio.csv" self.universal.save_to_csv(path, to_save) print(path) #winning or losing streaks against specified team to_save = [] wins_against = returned['win_loss_streaks_against'] to_save.append(["Losing streak", wins_against['games_since_last_win']]) to_save.append( ["Winning streak", wins_against['games_since_last_loss']]) if home_away == "away": to_save.append([ "Losing streak away", wins_against['games_since_last_win_away'] ]) to_save.append([ "Winning streak away", wins_against['games_since_last_loss_away'] ]) elif home_away == "home": to_save.append([ "Losing streak home", wins_against['games_since_last_win_home'] ]) to_save.append([ "Winning streak home", wins_against['games_since_last_loss_home'] ]) path = "./" + str(self.league) + "/analyze/single_analysis/" + str( team[1]) + "/" + str(team[1]) + "_win_loss_streaks_against.csv" self.universal.save_to_csv(path, to_save) print(path) def get_output_analysis(self, indent, team, returned, home_away): records = returned['seasonal_records'] avg_points = returned['avg_game_points'] home_away_records = returned['home_away_record'] win_loss = returned['current_win_ratio'] last_10_games = returned['10_game_win_ratio'] wins_against = returned['win_loss_streaks_against'] #### output #### to_output = [] to_output.append("") to_output.append("") to_output.append(indent + team[1]) if (records[-1][0] - records[-1][1]) > (records[-2][0] - records[-2][1]): temp = "uptrend" else: temp = "downtrend" to_output.append(indent + "Season: " + str(records[-1][0] - records[-1][1]) + " on " + str(temp)) if home_away == "away": to_output.append(indent + "Home-Away: " + str(home_away_records['away_record'][-1][0]) + "-" + str(home_away_records['away_record'][-1][1]) + " away") to_output.append(indent + " Last 10 away games: " + str(home_away_records['away_10_games'][-1][0]) + "-" + str(home_away_records['away_10_games'][-1][1])) elif home_away == "home": to_output.append(indent + "Home-Away: " + str(home_away_records['home_record'][-1][0]) + "-" + str(home_away_records['home_record'][-1][1]) + " home") to_output.append(indent + " Last 10 home games: " + str(home_away_records['home_10_games'][-1][0]) + "-" + str(home_away_records['home_10_games'][-1][1])) win_10_games = 0 for x in range(len(win_loss) - 1, len(win_loss) - 11, -1): win_10_games += win_loss[x][2] temp = { '-10': '0-10', '-8': '1-9', '-6': '2-8', '-4': '3-7', '-2': '4-6', '0': '5-5', '2': '6-4', '4': '7-3', '6': '8-2', '8': '9-1', '10': '10-0' } to_output.append(indent + "10 Games: " + temp[str(win_10_games)]) won = last_10_games[-1][str(win_10_games)][0] num_games = last_10_games[-1][str(win_10_games)][1] if num_games != 0: to_output.append(indent + " " + str(won) + " won out of " + str(num_games) + " games | " + str(won / num_games * 100) + "%") else: to_output.append(indent + " " + str(won) + " won out of " + str(num_games) + " games | N/A%") to_output.append(indent + "Avg points: " + str(avg_points['avg_game_points'][-1]) + " - " + str(avg_points['avg_other_game_points'][-1])) to_output.append(indent + " Last 10 games: " + str(avg_points['avg_10_games'][-1]) + " - " + str(avg_points['avg_other_10_games'][-1])) #on winning streak if wins_against['games_since_last_loss'] > 0: to_output.append(indent + "Winning streak against " + str(wins_against['other_team']) + ": " + str(wins_against['games_since_last_loss'])) to_output.append(indent + " Winning streak " + home_away + ": " + str(wins_against['games_since_last_loss_' + str(home_away)])) elif wins_against['games_since_last_win'] > 0: to_output.append(indent + "Losing streak against " + str(wins_against['other_team']) + ": " + str(wins_against['games_since_last_win'])) to_output.append(indent + " Losing streak " + home_away + ": " + str(wins_against['games_since_last_win_' + str(home_away)])) return to_output #analyzes number of wins against teams of certain rankings. Like # wins against even teams (23-25 to 27-25) or against good teams (30-15) or bad teams (15-30)... etc def analyze_wins_ranked_teams(self, team, data, end_year): total_output = [] for x in range( len(data[-1]['other_team']) - 1, len(data[-1]['other_team']) - 11, -1): other_team = [] other_team.append(data[-1]['other_team'][x]) other_team.append("") date = data[-1]['dates'][x] # print("Date: "+str(date)) home_away = data[-1]['home_away'][x] if home_away == "home": other_home_away = "away" elif home_away == "away": other_home_away = "home" temp = [] temp.append(date) temp.append(other_team) # temp.append() league_teams = self.universal.load_league_teams() #gets "los-angeles-lakers" if given "lal" for y in range(0, len(league_teams)): name = league_teams[y] if name[0] == other_team[0]: other_team[1] = name[1] indent = " " cur_data = self.universal.load_data(team, date, end_year) print(cur_data[-1]['other_team'][-1]) returned = self.analyze2(team, other_team[0], cur_data, data[-1]['home_away'][x]) output = self.get_output_analysis(indent, team, returned, data[-1]['home_away'][x]) for line in output: total_output.append(line) other_data = self.universal.load_data(other_team, date, end_year) print( str(other_data[-1]['other_team'][-1]) + " | " + str(date) + " | " + str(other_data[-1]['dates'][-5])) returned = self.analyze2(other_team, team[0], other_data, other_home_away) output = self.get_output_analysis(indent, other_team, returned, other_home_away) print() for line in output: print(line) total_output.append(line) total_output.append("") #adds winner and scores cur_team_score = data[-1]['game_scores'][x][0] other_team_score = data[-1]['game_scores'][x][1] if cur_team_score > other_team_score: total_output.append(indent + "Winner: " + team[1] + " | " + str(cur_team_score) + "-" + str(other_team_score)) else: total_output.append(indent + "Winner: " + other_team[1] + " | " + str(other_team_score) + "-" + str(cur_team_score)) total_output.append(indent + "----------------------------------------") print() return total_output #returns wins/loss streaks against other_team def get_win_streaks_against(self, other_team, original_data): to_return = {} to_return['other_team'] = other_team to_return['games_since_last_win'] = 0 to_return['games_since_last_loss'] = 0 to_return['games_since_last_win_away'] = 0 to_return['games_since_last_win_home'] = 0 to_return['games_since_last_loss_away'] = 0 to_return['games_since_last_loss_home'] = 0 for x in range(0, len(original_data)): data = original_data[x] year = data['year'] for y in range(0, len(data['other_team'])): if data['other_team'][y] == other_team: # if x==len(original_data)-1: # print(str(year)+" | "+str(other_team)+" | "+str(data['game_scores'][y][0])+"-"+str(data['game_scores'][y][1])) #if won if data['game_scores'][y][0] > data['game_scores'][y][1]: to_return['games_since_last_win'] = 0 to_return['games_since_last_loss'] += 1 if data['home_away'][y] == "away": to_return['games_since_last_win_away'] = 0 to_return['games_since_last_loss_away'] += 1 else: to_return['games_since_last_win_home'] = 0 to_return['games_since_last_loss_home'] += 1 #if lost else: to_return['games_since_last_win'] += 1 to_return['games_since_last_loss'] = 0 if data['home_away'][y] == "away": to_return['games_since_last_win_away'] += 1 to_return['games_since_last_loss_away'] = 0 else: to_return['games_since_last_win_home'] += 1 to_return['games_since_last_loss_home'] = 0 return to_return # #gets percentage of games won if ahead after 1st quarter, 2nd quarter, etc. # def get_perc_win_quarters_ahead(self, data): # #gets total goals for and goals against # def get_goals_for_against(self, data): #determines whether teams win or lose more often if they have a good or bad last 10 games def analyze_10_games_win_ratio(self, original_data): to_return = [] for x in range(0, len(original_data)): data = original_data[x] year = data['year'] #win_data['4'] will hold data for last 10 games with ratio 7-3 #increments by 2 since subtracting losses from wins of last 10 games will never have odd number win_data = {} for y in range(-10, 11, 2): win_data[str(y)] = [0, 0] last_10_record = [] for y in range(0, len(data['other_team'])): #only gets win ratio if 10 records present if len(last_10_record) == 10: temp = sum(last_10_record) #adding 1 or -1 is same as subtracting num losses from num wins if data['game_scores'][y][0] > data['game_scores'][y][1]: #only counts this win if 10 records already present if len(last_10_record) == 10: win_data[str(sum(last_10_record))][0] += 1 win_data[str(sum(last_10_record))][1] += 1 last_10_record.append(1) else: if len(last_10_record) == 10: win_data[str(sum(last_10_record))][1] += 1 last_10_record.append(-1) if len(last_10_record) > 10: last_10_record.pop(0) to_return.append(win_data) return to_return #gets win-loss ratio during each game during the current season def get_current_win_ratio(self, original_data): data = original_data[-1] to_return = [] cur_score = 0 for x in range(0, len(data['game_scores'])): to_add = [] to_add.append(data['game_scores'][x][0]) to_add.append(data['game_scores'][x][1]) # print(data['other_team'][x]+" | "+str(to_add)) if data['game_scores'][x][0] > data['game_scores'][x][1]: temp = 1 else: temp = -1 to_add.append(temp) cur_score += temp to_add.append(cur_score) to_return.append(to_add) return to_return #gets wins-losses while at home or away def get_home_away_record(self, original_data): to_return = {} to_return['home_record'] = [] to_return['away_record'] = [] to_return['home_10_games'] = [] to_return['away_10_games'] = [] for x in range(0, len(original_data)): data = original_data[x] home_away = data['home_away'] game_scores = data['game_scores'] home_record = [] away_record = [] for y in range(0, len(home_away)): if home_away[y] == "home": if game_scores[y][0] > game_scores[y][1]: home_record.append(1) else: home_record.append(-1) elif home_away[y] == "away": if game_scores[y][0] > game_scores[y][1]: away_record.append(1) else: away_record.append(-1) to_return['home_record'].append( [home_record.count(1), home_record.count(-1)]) to_return['away_record'].append( [away_record.count(1), away_record.count(-1)]) #gets stats on last 10 games home_10_games = [ home_record[-10:].count(1), home_record[-10:].count(-1) ] away_10_games = [ away_record[-10:].count(1), away_record[-10:].count(-1) ] to_return['home_10_games'].append(home_10_games) to_return['away_10_games'].append(away_10_games) return to_return #calculates a bunch of average points stats def get_avg_points(self, original_data): to_return = {} avg_game_points = [] avg_other_game_points = [] avg_10_games = [] avg_other_10_games = [] avg_quarters = [] for x in range(0, len(original_data)): data = original_data[x] if len(data['other_team']) != 0: # print("Year: "+str(original_data[x]['year'])) #gets avg_game_points total_points = 0 other_total_points = 0 for y in range(0, len(data['other_team'])): total_points += data['game_scores'][y][0] other_total_points += data['game_scores'][y][1] average = total_points / len(data['other_team']) average_other = other_total_points / len(data['other_team']) avg_game_points.append(self.universal.convert_number(average)) avg_other_game_points.append( self.universal.convert_number(average_other)) #gets average points for last 10 games total_points = 0 other_total_points = 0 for y in range( len(data['other_team']) - 1, len(data['other_team']) - 11, -1): total_points += data['game_scores'][y][0] other_total_points += data['game_scores'][y][1] average = total_points / 10 avg_10_games.append(self.universal.convert_number(average)) average = other_total_points / 10 avg_other_10_games.append( self.universal.convert_number(average)) #gets avg_game_points num_periods = self.num_periods[self.league] total_quarters = [0] * num_periods * 2 for y in range(0, len(data['other_team'])): # print(data['period_scores'][y]) # print("Num periods: "+str(num_periods)) #adds current team's 4 quarters try: for z in range(0, num_periods): total_quarters[z] += int( data['period_scores'][y][0][z]) except Exception as error: pass #adds other team's 4 quarters try: for z in range(0, len(data['period_scores'][y][1])): total_quarters[z + num_periods] += int( data['period_scores'][y][1][z]) except Exception as error: pass #gets average quarter scores for y in range(0, len(total_quarters)): total_quarters[y] = total_quarters[y] / len( data['other_team']) avg_quarters.append(total_quarters) to_return['avg_game_points'] = avg_game_points to_return['avg_other_game_points'] = avg_other_game_points to_return['avg_10_games'] = avg_10_games to_return['avg_other_10_games'] = avg_other_10_games to_return['avg_quarter_points'] = avg_quarters return to_return #gets records like 2016: 49-20 for all seasons def get_seasonal_records(self, original_data): records = [] for x in range(0, len(original_data)): data = original_data[x] num_wins = 0 for y in range(0, len(data['other_team'])): if data['game_scores'][y][0] > data['game_scores'][y][1]: num_wins += 1 # record=num_wins-len(data['game_scores'])-num_wins record = [num_wins, len(data['game_scores']) - num_wins] records.append(record) return records
class Backtester: odds_calculator = None universal = None user_agents = [] league_teams = [] #can be nba, nhl, nfl, mlb league = "nba" num_periods = {'nba': 4, 'nhl': 3, 'nfl': 4, 'mlb': 9} #algo_version = "Algo_V1" or "Algo_V2" def __init__(self, league, algo_version): self.league = league.lower() self.algo_version = algo_version self.odds_calculator = Odds_Calculator(self.league) self.universal = Universal_Functions(self.league) self.league_teams = self.universal.load_league_teams() #backtests algorithm for games played on certain days #goes from start_date to end_date one day at a time during the season, and analyzes games played on those days #each day takes about 4 seconds on my desktop def backtest_csv_output(self, start_date, end_date): #breaks up date temp = start_date.split("-") month = int(temp[0]) day = int(temp[1]) year = int(temp[2]) cur_date = start_date content = [] #this is actually a do-while loop while True: games = self.universal.get_games(cur_date) print("date: " + cur_date) for game in games: print(" Game: " + str(game)) to_save = [] to_save.append([ cur_date, "Away", "Home", "Algo points", "Proj winner", "Winner", "", "", "Score" ]) for x in range(0, len(games)): #team might not exist anymore, so don't count that game if len(games[x]['team1']) != 0 and len(games[x]['team2']) != 0: data1 = self.universal.load_data(games[x]['team1'], games[x]['date']) data2 = self.universal.load_data(games[x]['team2'], games[x]['date']) # print("Teams: "+str(games[x]['team1'])+" | "+str(games[x]['team2'])) # print("date: "+str(games[x]['date'])) # print(data1) # print(data1[0]['dates'][0]) # print(data1[0]['dates'][-1]) returned1 = self.odds_calculator.analyze2( games[x]['team1'], games[x]['team2'], data1, "away") returned2 = self.odds_calculator.analyze2( games[x]['team2'], games[x]['team1'], data2, "home") # print(returned1) # print() # print(returned2) # print() algo = Algo(self.league) if self.algo_version == "Algo_V1": algo_data = algo.calculate(games[x]['date'], returned1, returned2) elif self.algo_version == "Algo_V2": algo_data = algo.calculate_V2(games[x]['date'], returned1, returned2) total = algo_data['total'] to_add = [] to_add.append("") to_add.append(games[x]['team1'][0]) to_add.append(games[x]['team2'][0]) to_add.append(total) if self.algo_version == "Algo_V1": #categorizes odds (points) levels = [] levels.append([0, 3]) levels.append([3, 6]) levels.append([6, 9]) levels.append([9, 12]) levels.append([12, 15]) levels.append([15, 18]) levels.append([18, 21]) levels.append([21, 24]) levels.append([24, 27]) levels.append([27, 100]) elif self.algo_version == "Algo_V2": #categorizes odds (percentage) levels = [] levels.append([50, 55]) levels.append([55, 60]) levels.append([60, 65]) levels.append([65, 70]) levels.append([70, 75]) levels.append([75, 80]) levels.append([80, 85]) levels.append([85, 90]) levels.append([90, 95]) levels.append([95, 100]) level = 0 for y in range(0, len(levels)): if (total >= levels[y][0] and total < levels[y][1] ) or (total * -1 >= levels[y][0] and total * -1 < levels[y][1]): level = y + 1 #appends projected team if total > 0: to_add.append(games[x]['team1'][0]) elif total <= 0: to_add.append(games[x]['team2'][0]) #appends winning team if games[x]['game_scores'][0] > games[x]['game_scores'][1]: to_add.append(games[x]['team1'][0]) else: to_add.append(games[x]['team2'][0]) #appends score score = str(games[x]['game_scores'][0]) + "-" + str( games[x]['game_scores'][1]) to_add.append(score) # #appends algo data # if to_add[-2]==to_add[-3]: # to_add.append("") # if self.algo_version=="Algo_V1": # to_add.append(str(level)) # elif self.algo_version=="Algo_V2": # to_add.append(total) # elif to_add[-3]!="": # if self.algo_version=="Algo_V1": # to_add.append(str(level)) # elif self.algo_version=="Algo_V2": # to_add.append(total) # to_add.append("") # else: # to_add.append("") # to_add.append("") #appends algo data if to_add[-2] == to_add[-3]: to_add.append("") to_add.append(str(level)) elif to_add[-3] != "": to_add.append(str(level)) to_add.append("") else: to_add.append("") to_add.append("") #appends betting odds if self.algo_version == "Algo_V1": odds_calculator = Odds_Calculator(self.league) odds = odds_calculator.get_odds(total) elif self.algo_version == "Algo_V2": odds = abs(total) favorable_odds = (100 / (100 - abs(odds)) - 1) * 100 underdog_odds = (100 / (100 - abs(odds)) - 1) * 100 if total > 0: to_add.append("-" + str(favorable_odds)) to_add.append("+" + str(underdog_odds)) else: to_add.append("+" + str(underdog_odds)) to_add.append("-" + str(favorable_odds)) to_save.append(to_add) #space between data to_save.append(["", "", "", "", "", "", "", "", ""]) #only saves day's games if there were actually games on that day if len(to_save) > 2: content.append(to_save) #breaks loop to act like do-while if cur_date == end_date: break day += 1 if day > 31: month += 1 day = 1 #doesn't increment year since the season's year doesn't change if month > 12: month = 1 day = 1 #increments season at the end of the season to sometime in the middle if self.league == "nba": if "4-1-" in cur_date: year += 1 month = 2 day = 1 elif self.league == "nhl": if "4-1-" in cur_date: year += 1 month = 2 day = 1 elif self.league == "mlb": if "10-1-" in cur_date: year += 1 month = 7 day = 1 cur_date = str(month) + "-" + str(day) + "-" + str(year) #has most recent games first content.reverse() to_save = [] for x in range(0, len(content)): for y in range(0, len(content[x])): to_save.append(content[x][y]) if start_date != end_date: self.universal.save_to_csv( "./" + str(self.league) + "/analyze/" + str(self.league) + "_" + str(self.algo_version) + "_" + str(start_date) + "_" + str(end_date) + "_analysis.csv", to_save) else: self.universal.save_to_csv( "./" + str(self.league) + "/analyze/" + str(self.league) + "_" + str(self.algo_version) + "_" + str(end_date) + "_analysis.csv", to_save) #backtests algo_V2 for games played on each day #goes from start_date to end_date one day at a time during the season, runs algo_V2 on those days, compares projected odds versus oddsportal odds, and simulates betting # each day takes about 4 seconds on my desktop def backtest_odds(self, start_date, end_date): #breaks up date temp = start_date.split("-") month = int(temp[0]) day = int(temp[1]) year = int(temp[2]) cur_date = start_date content = [] #this is actually a do-while loop while True: games = self.universal.get_games(cur_date) print("date: " + cur_date) for game in games: print(" Game: " + str(game)) # - Strategy 0.0: Bet on algo's projected winner, no matter the odds. # - Strategy 0.1: Bet on oddsmaker's projected winner, no matter the odds. # All below strategies incorporate placing a bet if the algorithm projects a team to win more often than the oddsmaker projects # - Strategy 1: Default strategy. # - Strategy 2: Placing a bet if that team is also the algo's favorite. # - Strategy 3: Placing a bet if that team is the algo's favorite, and the oddsmaker's underdog. # - Strategy 4: Placing a bet if the difference between the algorithm's projected odds and the oddsmaker's odds is also >= 45 to_save = [] strat00 = {"total_bet": 0, "total_win": 0} strat01 = {"total_bet": 0, "total_win": 0} strat1 = {"total_bet": 0, "total_win": 0} strat2 = {"total_bet": 0, "total_win": 0} strat3 = {"total_bet": 0, "total_win": 0} strat4 = {"total_bet": 0, "total_win": 0} for x in range(0, len(games)): #team might not exist anymore, so don't count that game if len(games[x]['team1']) != 0 and len(games[x]['team2']) != 0: data1 = self.universal.load_data(games[x]['team1'], games[x]['date']) data2 = self.universal.load_data(games[x]['team2'], games[x]['date']) returned1 = self.odds_calculator.analyze2( games[x]['team1'], games[x]['team2'], data1, "away") returned2 = self.odds_calculator.analyze2( games[x]['team2'], games[x]['team1'], data2, "home") # print(returned1) # print() # print(returned2) # print() algo = Algo(self.league) algo_data = algo.calculate_V2(games[x]['date'], returned1, returned2) total = algo_data['total'] # to_return={} # to_return['record_points']= odds['records'] # to_return['home_away_points']= odds['home_away'] # to_return['home_away_10_games_points']= odds['home_away_10_games'] # to_return['last_10_games_points']= odds['last_10_games'] # to_return['avg_points']= odds['avg_points'] # to_return['avg_points_10_games']= odds['avg_points_10_games'] # # to_return['win_streak']= win_streak # to_return['win_streak_home_away']= odds['win_streak_home_away'] # to_return['total']= self.universal.convert_number(average) odds = abs(total) favorable_odds = round((100 / (100 - abs(odds)) - 1) * 100) underdog_odds = round((100 / (100 - abs(odds)) - 1) * 100) # print(str(year)+" | "+str(games[x]['team1'])+" | "+str(games[x]['team2'])+" | "+str(games[x]['game_scores'])) oddsportal_odds = self.universal.get_odds_game( year, games[x]['team1'], games[x]['team2'], games[x]['game_scores']) if oddsportal_odds[0] != 0: to_add = [] #date to_add.append(cur_date) #away to_add.append(games[x]['team1'][1]) #home to_add.append(games[x]['team2'][1]) #Algo Proj to_add.append(str(total) + "%") #Away proj away_proj = 0 if total < 0: away_proj = favorable_odds else: away_proj = underdog_odds * -1 to_add.append(away_proj) #Home proj home_proj = 0 if total > 0: home_proj = favorable_odds else: home_proj = underdog_odds * -1 to_add.append(home_proj) #Away odds to_add.append(oddsportal_odds[0]) #Home odds to_add.append(oddsportal_odds[1]) #Diff Away away_diff = 0 if abs(away_proj - oddsportal_odds[0]) > 200: away_diff = abs(away_proj - oddsportal_odds[0]) - 200 else: away_diff = abs(away_proj - oddsportal_odds[0]) to_add.append(away_diff) #Diff Home home_diff = 0 if abs(home_proj - oddsportal_odds[1]) > 200: home_diff = abs(home_proj - oddsportal_odds[1]) - 200 else: home_diff = abs(home_proj - oddsportal_odds[1]) to_add.append(home_diff) ## Strategy 0.0 ## if away_proj < 0: #Bet to_add.append("$100") strat00['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat00['total_win'] += (100 + to_win) else: to_add.append("$0") else: #Bet to_add.append("$100") strat00['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat00['total_win'] += (100 + to_win) else: to_add.append("$0") ## Strategy 0.1 ## if oddsportal_odds[0] < 0 and oddsportal_odds[ 0] < oddsportal_odds[1]: #Bet to_add.append("$100") strat01['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat01['total_win'] += (100 + to_win) else: to_add.append("$0") else: #Bet to_add.append("$100") strat01['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat01['total_win'] += (100 + to_win) else: to_add.append("$0") ## Strategy 1 ## if oddsportal_odds[0] > away_proj: #Bet to_add.append("$100") strat1['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat1['total_win'] += (100 + to_win) else: to_add.append("$0") elif oddsportal_odds[1] > home_proj: #Bet to_add.append("$100") strat1['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat1['total_win'] += (100 + to_win) else: to_add.append("$0") else: to_add.append("") to_add.append("") to_add.append("") ## Strategy 2 ## if oddsportal_odds[0] > away_proj and away_proj < 0: #Bet to_add.append("$100") strat2['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat2['total_win'] += (100 + to_win) else: to_add.append("$0") elif oddsportal_odds[1] > home_proj and home_proj < 0: #Bet to_add.append("$100") strat2['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat2['total_win'] += (100 + to_win) else: to_add.append("$0") else: to_add.append("") to_add.append("") to_add.append("") ## Strategy 3 ## if oddsportal_odds[ 0] > away_proj and away_proj < 0 and oddsportal_odds[ 0] > 0: #Bet to_add.append("$100") strat3['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat3['total_win'] += (100 + to_win) else: to_add.append("$0") elif oddsportal_odds[ 1] > home_proj and home_proj < 0 and oddsportal_odds[ 1] > 0: #Bet to_add.append("$100") strat3['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat3['total_win'] += (100 + to_win) else: to_add.append("$0") else: to_add.append("") to_add.append("") to_add.append("") ## Strategy 4 ## if self.league == "mlb": diff_amount = 45 elif self.league == "nba": diff_amount = 100 if oddsportal_odds[ 0] > away_proj and away_diff >= diff_amount: #Bet to_add.append("$100") strat4['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[0] > 0): to_win = 100 * (oddsportal_odds[0] / 100) else: to_win = 100 / (oddsportal_odds[0] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][0] > games[x][ 'game_scores'][1]: to_add.append("$" + str(100 + to_win)) strat4['total_win'] += (100 + to_win) else: to_add.append("$0") elif oddsportal_odds[ 1] > home_proj and home_diff >= diff_amount: #Bet to_add.append("$100") strat4['total_bet'] += 100 #To Win to_win = 0 if (oddsportal_odds[1] > 0): to_win = 100 * (oddsportal_odds[1] / 100) else: to_win = 100 / (oddsportal_odds[1] * -1 / 100) to_add.append("$" + str(to_win)) #Won if games[x]['game_scores'][1] > games[x][ 'game_scores'][0]: to_add.append("$" + str(100 + to_win)) strat4['total_win'] += (100 + to_win) else: to_add.append("$0") else: to_add.append("") to_add.append("") to_add.append("") else: to_add = [] # #appends winning team # if games[x]['game_scores'][0]>games[x]['game_scores'][1]: # to_add.append(games[x]['team1'][0]) # else: # to_add.append(games[x]['team2'][0]) # #appends score # score=str(games[x]['game_scores'][0])+"-"+str(games[x]['game_scores'][1]) # to_add.append(score) if len(to_add) != 0: to_save.append(to_add) # to_save.append(["Date", "Away", "Home", "Algo proj", "Away proj", "Home proj", "Away odds", "Home odds", "Diff away", "Diff home", "Bet", "To win", "Won"]) #only saves day's games if there were actually games on that day if len(to_save) > 2: #summary strat00_profit = strat00['total_win'] - strat00['total_bet'] strat00_perc = strat00_profit / strat00['total_bet'] * 100 strat01_profit = strat01['total_win'] - strat01['total_bet'] strat01_perc = strat01_profit / strat01['total_bet'] * 100 strat1_profit = strat1['total_win'] - strat1['total_bet'] strat1_perc = strat1_profit / strat1['total_bet'] * 100 strat2_profit = strat2['total_win'] - strat2['total_bet'] if (strat2['total_bet'] > 0): strat2_perc = strat2_profit / strat2['total_bet'] * 100 else: strat2_perc = 0 strat3_profit = strat3['total_win'] - strat3['total_bet'] if (strat3['total_bet'] > 0): strat3_perc = strat3_profit / strat3['total_bet'] * 100 else: strat3_perc = 0 strat4_profit = strat4['total_win'] - strat4['total_bet'] if (strat4['total_bet'] > 0): strat4_perc = strat4_profit / strat4['total_bet'] * 100 else: strat4_perc = 0 #initializes with buffer columns summary = ["", "", "", "", "", "", "", "", "", ""] summary.append("$" + str(strat00['total_bet'])) summary.append("$" + str(strat00_profit)) summary.append(str(strat00_perc) + "%") summary.append("$" + str(strat01['total_bet'])) summary.append("$" + str(strat01_profit)) summary.append(str(strat01_perc) + "%") summary.append("$" + str(strat1['total_bet'])) summary.append("$" + str(strat1_profit)) summary.append(str(strat1_perc) + "%") summary.append("$" + str(strat2['total_bet'])) summary.append("$" + str(strat2_profit)) summary.append(str(strat2_perc) + "%") summary.append("$" + str(strat3['total_bet'])) summary.append("$" + str(strat3_profit)) summary.append(str(strat3_perc) + "%") summary.append("$" + str(strat4['total_bet'])) summary.append("$" + str(strat4_profit)) summary.append(str(strat4_perc) + "%") to_save.append(summary) #space between data to_save.append([ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ]) content.append(to_save) #breaks loop to act like do-while if cur_date == end_date: break day += 1 if day > 31: month += 1 day = 1 #doesn't increment year since the season's year doesn't change if month > 12: month = 1 day = 1 #increments season at the end of the season to sometime in the middle if self.league == "nba": # if "4-1-" in cur_date: # year+=1 # month=2 # day=1 if "4-1-" in cur_date: year += 1 month = 1 day = 15 elif self.league == "nhl": if "4-1-" in cur_date: year += 1 month = 2 day = 1 elif self.league == "mlb": if "10-1-" in cur_date: year += 1 month = 7 day = 1 cur_date = str(month) + "-" + str(day) + "-" + str(year) # #has most recent games first # content.reverse() to_save = [] to_save.append([ "Date", "Away", "Home", "Algo proj", "Away proj", "Home proj", "Away odds", "Home odds", "Diff away", "Diff home", "Bet", "To win", "Won", "Bet", "To win", "Won", "Bet", "To win", "Won", "Bet", "To win", "Won" ]) for x in range(0, len(content)): for y in range(0, len(content[x])): to_save.append(content[x][y]) if start_date != end_date: self.universal.save_to_csv( "./" + str(self.league) + "/analyze/" + str(self.league) + "_Algo_V2_" + str(start_date) + "_" + str(end_date) + "_backtest_odds.csv", to_save) else: self.universal.save_to_csv( "./" + str(self.league) + "/analyze/" + str(self.league) + "_Algo_V2_" + str(end_date) + "_backtest_odds.csv", to_save) #backtests algorithm for games played on certain days def backtest_algo(self, start_date, end_date, algo): #breaks up date temp = start_date.split("-") month = int(temp[0]) day = int(temp[1]) year = int(temp[2]) cur_date = start_date #creates saving path if doesn't exist algo_list = str(algo).replace("[", "").replace("]", "").replace(" ", "") path = "./" + str(self.league) + "/analyze/backtests/" + str( self.algo_version) + "_" + str(algo_list) + "_" + str( start_date) + "_" + str(end_date) + ".txt" if os.path.exists(path) == True: return #gets list of dates to backtest dates = [] while True: dates.append(cur_date) #breaks loop to act like do-while if cur_date == end_date: break print("Cur date: " + str(cur_date) + " | End date: " + str(end_date)) day += 1 if day > 31: month += 1 day = 1 #doesn't increment year since the season's year doesn't change if month > 12: month = 1 day = 1 #increments season once April is reached since it's so close to the end of the season if self.league == "nhl": if "4-1-" in cur_date: year += 1 month = 2 day = 1 elif self.league == "nba": if "4-1-" in cur_date: year += 1 month = 2 day = 1 elif self.league == "mlb": if "10-1-" in cur_date: year += 1 month = 7 day = 1 cur_date = str(month) + "-" + str(day) + "-" + str(year) #6 is hardcoded in self.backtest_algo2 # 6 processes takes up 75% of my desktop CPU, and backtest takes 20 min to complete # c4.4xlarge has 16 logical processors, and takes 4.5 min to complete # 6 processes for the c4.4xlarge takes ~10 min to complete num_processes = 16 processes = [] #creates processes for x in range(0, num_processes): path = "./" + str(self.league) + "/analyze/backtests/" + str( algo_list) + "_temp" + str(x) + ".csv" if os.path.exists(path) == False: process = Process(target=self.backtest_algo2, args=( x, dates, algo, )) processes.append(process) #starts processes for x in range(0, len(processes)): processes[x].start() #joins them so they don't wait for each other for x in range(0, len(processes)): processes[x].join() # self.backtest_algo2(0, dates, algo) #loads results from processes since I you can't return anything from processes wins = [0] * 11 losses = [0] * 11 for x in range(0, num_processes): path = "./" + str(self.league) + "/analyze/backtests/" + str( algo_list) + "_temp" + str(x) + ".csv" contents = self.universal.read_from_csv(path) for y in range(0, len(contents)): losses[y] += int(contents[y][0]) wins[y] += int(contents[y][1]) os.remove(path) to_output = [] to_output.append(str(algo)) to_output.append("") total_wins = 0 total_losses = 0 #starts at 1 since levels start at 1 for x in range(1, len(wins)): total_wins += wins[x] total_losses += losses[x] if wins[x] + losses[x] != 0: perc_won = wins[x] / (wins[x] + losses[x]) * 100 else: perc_won = "N/A" to_output.append( str(x) + ": " + str(losses[x]) + " - " + str(wins[x]) + ": " + str(perc_won)) to_output.append("") to_output.append(str(total_losses) + " - " + str(total_wins)) path = "./" + str(self.league) + "/analyze/backtests/" + str( self.algo_version) + "_" + str(algo_list) + "_" + str( start_date) + "_" + str(end_date) + ".txt" self.universal.save_to_txt(path, to_output) #used in backtest_algo() def backtest_algo2(self, number, dates, algo): #level number corresponds to an index wins = [0] * 11 losses = [0] * 11 #creates processes num_processes = 16 start = int(number * (len(dates) / num_processes)) end = int((number + 1) * (len(dates) / num_processes)) print() print(number) print(start) print(end) print() #this is actually a do-while loop for x in range(start, end): print("At " + str(dates[x]) + " END: " + str(dates[end - 1])) games = self.universal.get_games(dates[x]) for y in range(0, len(games)): #team might not exist anymore, so don't count that game if len(games[y]['team1']) != 0 and len(games[y]['team2']) != 0: data1 = self.universal.load_data(games[y]['team1'], games[y]['date']) data2 = self.universal.load_data(games[y]['team2'], games[y]['date']) returned1 = self.odds_calculator.analyze2( games[y]['team1'], games[y]['team2'], data1, "away") returned2 = self.odds_calculator.analyze2( games[y]['team2'], games[y]['team1'], data2, "home") algorithm = Algo(self.league) #sets algo to backtest algorithm.algorithm[self.league] = algo if self.algo_version == "Algo_V1": algo_data = algorithm.calculate( games[y]['date'], returned1, returned2) elif self.algo_version == "Algo_V2": algo_data = algorithm.calculate_V2( games[y]['date'], returned1, returned2) total = algo_data['total'] if self.algo_version == "Algo_V1": #categorizes odds levels = [] levels.append([0, 3]) levels.append([3, 6]) levels.append([6, 9]) levels.append([9, 12]) levels.append([12, 15]) levels.append([15, 18]) levels.append([18, 21]) levels.append([21, 24]) levels.append([24, 27]) levels.append([27, 100]) elif self.algo_version == "Algo_V2": #categorizes odds levels = [] levels.append([50, 55]) levels.append([55, 60]) levels.append([60, 65]) levels.append([65, 70]) levels.append([70, 75]) levels.append([75, 80]) levels.append([80, 85]) levels.append([85, 90]) levels.append([90, 95]) levels.append([95, 100]) level = 0 for z in range(0, len(levels)): if (total >= levels[z][0] and total < levels[z][1] ) or (total * -1 >= levels[z][0] and total * -1 < levels[z][1]): level = z + 1 # #0 is team1, and 1 is team2 # projected_team=0 # if self.league=="nba": # #if team1 is projected to win # if total>0: # projected_team=0 # #go with home team # elif total<=0: # projected_team=1 # else: # #if team1 is projected to win # if total>0: # projected_team=0 # #go with home team # elif total<=0: # projected_team=1 #0 is team1, and 1 is team2 #if team1 is projected to win if total > 0: projected_team = 0 #go with home team elif total <= 0: projected_team = 1 #0 is team1, and 1 is team2 winning_team = 0 if games[y]['game_scores'][0] > games[y]['game_scores'][1]: winning_team = 0 else: winning_team = 1 #if algo was right if projected_team == winning_team: wins[level] += 1 else: losses[level] += 1 temp = [] for x in range(0, len(wins)): temp.append([losses[x], wins[x]]) algo_list = str(algo).replace("[", "").replace("]", "").replace(" ", "") path = "./" + str(self.league) + "/analyze/backtests/" + str( algo_list) + "_temp" + str(number) + ".csv" self.universal.save_to_csv(path, temp)
class ESPN_Scraper: opener = None universal = None user_agents = [] output = [] output_path = "" #can be nba, nhl, nfl, mlb league = "nba" def __init__(self, league): self.league = league.lower() self.universal = Universal_Functions(league.lower()) self.initialize_user_agents() self.output = [] today = self.universal.get_today() time = self.universal.get_current_time() #10:45 becomes 1045 time = time.replace(":", "") self.output_path = "./" + league + "/output/espn_scraper_" + str( today['month']) + "-" + str(today['day']) + "-" + str( today['year']) + "_" + str(time) + ".txt" self.output.append("---- Start output ----") def update_data(self, team, year): #scrapes all years worth of data if year == "": years = self.get_seasons(team) start = 0 end = len(years) #scrapes certain years worth fo data else: years = [year] start = 0 end = 1 for x in range(start, end): if self.league == "nhl": url = "http://espn.com/" + str( self.league) + "/team/schedule/_/name/" + str( team[0]) + "/year/" + str(years[x]) + "/" + str( team[1]) data = self.scrape_game_scores(url) elif self.league == "nba": #seasontype of 2 refers to regular season, while 1 and 3 refer to pre and post season respectively url = "http://espn.go.com/" + str( self.league) + "/team/schedule/_/name/" + str( team[0]) + "/year/" + str( years[x]) + "/seasontype/2/" + str(team[1]) data = self.scrape_game_scores(url) elif self.league == "mlb": #seasontype of 2 refers to regular season. MLB splits season into 2 halves url = "http://espn.go.com/" + str( self.league) + "/team/schedule/_/name/" + str( team[0]) + "/year/" + str( years[x]) + "/seasontype/2/half/1/" + str(team[1]) url2 = "http://espn.go.com/" + str( self.league) + "/team/schedule/_/name/" + str( team[0]) + "/year/" + str( years[x]) + "/seasontype/2/half/2/" + str(team[1]) data = self.scrape_game_scores(url) data2 = self.scrape_game_scores(url2) for y in range(0, len(data2['dates'])): data['dates'].append(data2['dates'][y]) data['home_away'].append(data2['home_away'][y]) data['game_urls'].append(data2['game_urls'][y]) data['game_scores'].append(data2['game_scores'][y]) path = "./" + str(self.league) + "/team_data/" + str( years[x]) + "/" + team[1] + ".csv" self.to_print("Loading existing data") #gets proper season/year of data existing_data = self.universal.load_data(team, "", years[x]) for y in range(0, len(existing_data)): if str(existing_data[y]['year']) == str(years[x]): existing_data = existing_data[y] break data['other_team'] = [] data['period_scores'] = [] for y in range(0, len(data['dates'])): #check if game data already scraped exists = False for z in range(0, len(existing_data['dates'])): if existing_data['dates'][z] == data['dates'][y]: exists = True break #if game data hasn't been scraped, scrape it and add it if exists == False: game_url = data['game_urls'][y] self.to_print("Returned " + str(data['dates'][y]) + "'s game results") #goes to playbyplay page since it has the same info and loads more quickly game_period_url = game_url.replace("recap", "playbyplay") #scrapes for period score data self.to_print("Scraping " + str(data['dates'][y]) + "'s period data: " + str(game_period_url)) # time.sleep(1) period_data = self.scrape_period_data( team, url, game_period_url) other_team = period_data['other_team'] period_scores = period_data['scores'] if period_data['other_team'] == -1: self.to_print("Scraping " + str(data['dates'][y]) + "'s period data again") time.sleep(5) period_data = self.scrape_period_data( team, url, game_period_url) other_team = period_data['other_team'] period_scores = period_data['scores'] # other_team="lad" # period_scores=[[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0]] #goes to boxscore page since it has the player's data for the game game_players_url = game_url.replace("recap", "boxscore") # #scrapes for players stats # player_stats=self.scrape_player_data(years[x], team, url, game_players_url) # if len(player_stats['away'])==0: # time.sleep(5) # self.scrape_player_data(years[x], team, url, game_players_url) # input("Pausing...") data['other_team'].append(other_team) # data['player_stats'].append(player_stats) data['game_scores'][y] = data['game_scores'][y].split("-") existing_data['dates'].append(data['dates'][y]) existing_data['other_team'].append(other_team) existing_data['home_away'].append(data['home_away'][y]) existing_data['game_scores'].append(data['game_scores'][y]) existing_data['period_scores'].append(period_scores) self.save_output() to_save = [] for y in range(0, len(existing_data['game_scores'])): if existing_data['other_team'][y] != "" and existing_data[ 'period_scores'][y][0][0] != -1: score = existing_data['game_scores'][y] temp = [] temp.append(str(existing_data['dates'][y])) temp.append(existing_data['other_team'][y]) temp.append(existing_data['home_away'][y]) temp.append(score[0]) temp.append(score[1]) for period in existing_data['period_scores'][y][0]: temp.append(period) for period in existing_data['period_scores'][y][1]: temp.append(period) to_save.append(temp) self.to_print("Saving data to " + str(path)) self.universal.save_to_csv(path, to_save) self.save_output() def update_player_data(self, team, year): #scrapes all years worth of data if year == "": years = self.get_seasons(team) start = 0 end = len(years) #scrapes certain years worth fo data else: years = [year] start = 0 end = 1 for x in range(start, end): url = "http://espn.go.com/" + str( self.league) + "/team/schedule/_/name/" + str( team[0]) + "/year/" + str(years[x]) + "/" + str(team[1]) path = "./" + str(self.league) + "/team_data/" + str( years[x]) + "/" + team[1] + ".csv" print("Scraping: " + str(url)) data = self.scrape_game_scores(url) data['other_team'] = [] data['period_scores'] = [] for y in range(0, len(data['dates'])): game_url = data['game_urls'][y] #goes to boxscore page since it has the player's data for the game game_players_url = game_url.replace("recap", "boxscore") #scrapes for players stats player_stats = self.scrape_player_data(years[x], team, url, game_players_url) if len(player_stats['away']) == 0: time.sleep(5) self.scrape_player_data(years[x], team, url, game_players_url) #gets teams playing today def get_schedule(self): url = "http://espn.go.com/" + str(self.league) + "/schedule" data = self.scrape_webpage(url) ### OLD html ### # to_find="<caption>" # start=data.find(to_find)+len(to_find) # end=data[start:].find(to_find) to_start = "<tbody>" to_end = "</tbody>" start = data.find(to_start) + len(to_start) end = data[start:].find(to_end) new_data = data[start:start + end] abbrs = new_data.split("<abbr") abbrs.pop(0) teams = self.load_league_teams() games = [] temp = [] for x in range(0, len(abbrs)): start = abbrs[x].index('">') + 2 name_abbr = abbrs[x][start:abbrs[x].index("</abbr>")].lower() full_team_name = [] for y in range(0, len(teams)): if teams[y][0] == name_abbr: full_team_name = teams[y] if x % 2 == 0: temp.append(full_team_name) else: temp.append(full_team_name) games.append(temp) temp = [] return games #gets years for listed seasons on ESPN's website def get_seasons(self, team): url = "http://espn.go.com/" + str( self.league) + "/team/schedule/_/name/" + str(team[0]) data = self.scrape_webpage(url) start = data.index("Year:") end = data[start:].index("</form>") new_data = data[start:start + end] # print(new_data) split = new_data.split('<option value="') #removes excess form data split.pop(0) #removes current season's url because we'll get it later split.pop(0) #retrieves season's year from URL in select HTML element to_return = [] for item in split: temp = item.split('"') url = temp[0] index = url.index("year/") year = url[index + 5:index + 5 + 4] to_return.append(int(year)) #Sorts smallest to largest then increments latest year to get current year #since ESPN's website doesn't include current season's to_return.sort() to_return.append(to_return[-1] + 1) return to_return #gets scores and game urls #if new data, retrieve period scores and add all to global lists def scrape_game_scores(self, url): data = {} data['dates'] = [] data['home_away'] = [] data['game_urls'] = [] data['game_scores'] = [] try: self.to_print("Scraping game scores from " + str(url)) content = self.scrape_webpage(url) start = content.index("Regular Season Schedule") #espn has preseason stats for NHL teams, and that messes up the html if self.league == "nhl" and "preseason schedule" in content[ start:].lower(): end = content.index("Preseason Schedule") else: end = content.index("<!-- begin sponsored links -->") new_data = content[start:end] #separates each game temp = new_data.split('<li class="team-name">') #gets scores and game urls for better scores old_date = "" for x in range(0, len(temp)): try: #if lost game lost = "game-status loss" in temp[x] #determines whether game was at home or away if "@" in temp[x - 1]: home_away = "away" else: home_away = "home" try: #gets game date temp_split = temp[x].split('<tr class="evenrow') if len(temp_split) == 1: temp_split = temp[x].split('<tr class="oddrow') #turns ...][ team-90-24"><td>Wed, Oct 14</td><td><ul... into [ team-90-24"><td>Wed, Oct 14] temp_split = temp_split[1][:temp_split[1].index("</td>" )] #turns [ team-90-24"><td>Wed, Oct 14] into "Wed, Oct 14" string_date = temp_split.split("<td>")[1] #turns "Wed, Oct 14" into "Oct 14" string_date = string_date.split(", ")[1] #turns "Oct 14" into ["Oct", "14"] split_date = string_date.split(" ") months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 } #gets year from url split_url = url.split("/year/") year = int(split_url[1][:split_url[1].index("/")]) date = str(months[split_date[0]] ) + "-" + split_date[1] + "-" + str(year) if x == 0: old_date = date except Exception as error: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split( exc_tb.tb_frame.f_code.co_filename)[1] to_print = exc_type, fname, exc_tb.tb_lineno self.to_print("scrape_game_scores(), scraping date: " + str(to_print)) # self.to_print("Date: "+str(date)) # self.to_print("Old date: "+str(date)) string_to_find = '<li class="score"><a href="' # self.to_print("String to find: "+str(string_to_find)) # print() game_link = temp[x][temp[x].index(string_to_find) + len(string_to_find):] score = game_link[game_link.index('">') + 2:game_link.index("</a>")] # self.to_print("Score: "+str(score)) #if lost game, switch score order since site always lists highest score first if lost: temp2 = score.split("-") score = temp2[1] + "-" + temp2[0] #removes extra innings string "F/12" from scores temp2 = score.split("-") temp2[0] = temp2[0].split(" ") temp2[0] = temp2[0][0] temp2[1] = temp2[1].split(" ") temp2[1] = temp2[1][0] score = temp2[0] + "-" + temp2[1] game_link = game_link[:game_link.index('"')] # game_link="http://espn.go.com"+game_link game_link = "http:" + game_link data['dates'].append(old_date) data['game_urls'].append(game_link) data['game_scores'].append(score) data['home_away'].append(home_away) old_date = date except Exception as error: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split( exc_tb.tb_frame.f_code.co_filename)[1] to_print = exc_type, fname, exc_tb.tb_lineno self.to_print("scrape_game_scores(): " + str(to_print)) return data except Exception as error: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] to_print = exc_type, fname, exc_tb.tb_lineno self.to_print("scrape_game_scores(): " + str(to_print)) return data #gets a game's scores for each period def scrape_period_data(self, team, team_url, game_url): #espn uses old HTML code for nhl games if self.league == "nhl": print() data = self.scrape_webpage(game_url) print("Game url: " + str(game_url)) # if data=="" or game_url=="http://espn.go.com": if data == "" or game_url == "http:": return { 'other_team': -1, 'scores': [[-1, -1, -1], [-1, -1, -1]] } #gets teams playing in this game start = data.index("gameInfo:") end = data[start:].index(",") + start #now should have gameInfo:"nhl:game:gameid=400884409-ana+at+dal", # print(data[start:end]) split = data[start:end].split("-") temp = split[1] #temp should now be ana+at+dal", split = temp.split("+") team1 = split[0].replace('"', "") team2 = split[2].replace('"', "") #team1 and team2 are the 3 letter abbreviations of teams EX: ana for anaheim-ducks print("Team1: " + str(team1) + " | Team2: " + str(team2)) print("Cur team: " + str(team[0]) + " | " + str(team[1])) if team1 == team[0]: other_team = team2 else: other_team = team1 # input() # print(data) start = data.index( '<table cellspacing="0" id="gp-linescore" class="linescore" >' ) end = start + data[start:].index("</table>") new_data = data[start:end].replace("\n", "").replace("\t", "") #separates each game rows = new_data.split('<a href="') if len(rows) == 2: #if first team listed is one with an old name if '<td class="team">' not in rows[1]: temp = rows[0].split('<td class="team">') temp.pop(0) temp.pop(0) temp.pop() rows[0] = temp[-1] #if second team listed is one with old name else: rows.pop(0) rows = rows[0].split('<td class="team">') else: rows.pop(0) for x in range(0, len(rows)): print(str(x) + " | " + str(rows[x])) scores = [] for row in rows: #separates each quarter quarters = row.split('text-align:center" >') temp = [] for quarter in quarters: score = quarter[:quarter.index("</td>")].strip() temp.append(score) scores.append(temp) #if team is listed 2nd, make it listed 1st for consistency #can't do last 2 characters because they could be in url even though not correct team. 5 guarenttes a / in test url for best comparison print("URL: " + str(team_url)) print("Scores: " + str(scores)) if len(scores) != 0: if team_url[-5:] in scores[1][0]: temp = scores[1] scores[1] = scores[0] scores[0] = temp scores[0].pop(0) scores[1].pop(0) #some games don't include a 3rd quarter while len(scores[0]) < 3: scores[0].append(0) while len(scores[1]) < 3: scores[1].append(0) to_return = {} to_return['other_team'] = other_team to_return['scores'] = scores # return scores return to_return else: return { 'other_team': "", 'scores': [[-1, -1, -1], [-1, -1, -1]] } elif self.league == "nba": data = self.scrape_webpage(game_url) print("Game url: " + str(game_url)) if data == "": return { 'other_team': -1, 'scores': [[0, 0, 0, 0], [0, 0, 0, 0]] } try: start = data.index('<div id="custom-nav"') end = data[start:].index( '<div id="gamepackage-links-wrap">') + start except Exception as error: print("scrape_period_data.py" + str(error)) return { 'other_team': -1, 'scores': [[0, 0, 0, 0], [0, 0, 0, 0]] } # split_data[start:end].split('class="abbrev"') split = data[start:end].split('<span class="abbrev"') split.pop(0) #returns [lal, lal, okc, okc] for x in range(0, len(split)): split[x] = split[x][split[x].index(">") + 1:split[x].index("<")] # split.pop(0) # split.pop(1) team1 = split[0].lower() team2 = split[1].lower() print("Team1: " + str(team1) + " | Team2: " + str(team2)) print("Cur team: " + str(team[0]) + " | " + str(team[1])) # start=data.index('Final</span>') start = data.index('<div id="custom-nav"') end = start + data[start:].index("</table>") new_data = data[start:end].replace("\n", "").replace("\t", "") #separates each game rows = new_data.split('final-score">') rows.pop(0) rows.pop() for x in range(0, len(rows)): rows[x] = rows[x].split('team-name">')[-1] # temp=rows[0].replace("<", "").replace(">","").split("td") for x in range(0, len(rows)): rows[x] = rows[x].split("<td") rows[x].pop(0) rows[x].pop() for y in range(0, len(rows[x])): rows[x][y] = rows[x][y].replace("</td>", "") rows[x][y] = rows[x][y].replace(">", "") scores = rows if team1 == team[0]: other_team = team2 else: temp = scores[1] scores[1] = scores[0] scores[0] = temp other_team = team1 #some games don't include a 4th period while len(scores[0]) < 4: scores[0].append(0) while len(scores[1]) < 4: scores[1].append(0) to_return = {} to_return['other_team'] = other_team to_return['scores'] = scores # return scores return to_return # Baseball MLB elif self.league == "mlb": print() data = self.scrape_webpage(game_url) self.to_print("Game url: " + str(game_url)) if data == "" or game_url == "http://espn.go.com" or game_url == "http://espn.go.com#": return { 'other_team': -1, 'scores': [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1]] } #gets teams playing in this game start = data.index("var omniPageName =") end = data[start:].index(";") + start #gets other team playing split = data[start:end].split("+") split.pop(0) split.pop(0) team1 = split[0].replace('"', "") team2 = split[2].replace('"', "") self.to_print("Team1: " + str(team1) + " | Team2: " + str(team2)) self.to_print("Cur team: " + str(team[0]) + " | " + str(team[1])) if team1 == team[0]: other_team = team2 else: other_team = team1 # else: # return {'other_team': "", 'scores': [[-1,-1,-1-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1]]} self.to_print("Other team: " + str(other_team)) #isolates period data html start = data.index('class="linescore"') end = start + data[start:].index("</table>") new_data = data[start:end].replace("\n", "").replace("\t", "") #separates each team rows = new_data.split('<a href="') if len(rows) == 2: #if first team listed is one with an old name if '<td class="team" style="width: 3em !important">' not in rows[ 1]: temp = rows[0].split( '<td class="team" style="width: 3em !important">') temp.pop(0) temp.pop(0) temp.pop() rows[0] = temp[-1] #if second team listed is one with old name else: rows.pop(0) rows = rows[0].split( '<td class="team" style="width: 3em !important">') #removes column headers else: rows.pop(0) print() for x in range(0, len(rows)): print(str(x) + " | " + str(rows[x])) # input() scores = [] for row in rows: #separates each quarter quarters = row.split('text-align:center">') temp = [] for quarter in quarters: score = quarter[:quarter.index("</td>")].strip() temp.append(score) scores.append(temp) #if team is listed 2nd, make it listed 1st for consistency #can't do last 2 characters because they could be in url even though not correct team. 5 guarenttes a / in test url for best comparison self.to_print("URL: " + str(team_url)) self.to_print("Scores: " + str(scores)) # input() if len(scores) != 0 and len(scores[0]) > 1: #sorts scores to match teams if team_url[-5:] in scores[1][0]: temp = scores[1] scores[1] = scores[0] scores[0] = temp #remove urls from scores list scores[0].pop(0) scores[1].pop(0) #some games don't include a 3rd quarter while len(scores[0]) < 9: scores[0].append(0) while len(scores[1]) < 9: scores[1].append(0) #9th inning is "-" if team didn't have to go to bottom of the 9th if scores[0][-1] == "-": scores[0][-1] = 0 if scores[1][-1] == "-": scores[1][-1] = 0 to_return = {} to_return['other_team'] = other_team to_return['scores'] = scores return to_return else: return { 'other_team': "", 'scores': [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1]] } def scrape_player_data(self, season, team, team_url, game_url): #espn uses old HTML code for nhl games # if self.league=="nhl": game_id = int( game_url.replace("http://espn.go.com/nba/boxscore?gameId=", "").replace("http://espn.go.com/nba/boxscore?id=", "")) data = self.scrape_webpage(game_url) print("Game url: " + str(game_url)) if data == "" or game_url == "http://espn.go.com": return {'away': [], 'home': []} #gets first team listed start = data.index('<span class="team-name-short">') end = data[start:].index("</span>") + start first_team = data[start:end].lower() #gets second team listed start = data[end:].index('<span class="team-name-short">') + end end = data[start:].index("</span>") + start second_team = data[start:end].lower() #gets players playing in the game start = data.index('<article class="boxscore-tabs') end = start + data[start:].index("</article>") new_data = data[start:end] #gets html for away team away_team = new_data[new_data.find('gamepackage-away-wrap">'):new_data. find('gamepackage-home-wrap">')] #gets html for home team home_team = new_data[new_data.find('gamepackage-home-wrap">'):] away_player_stats = self.scrape_player_data2(away_team) home_player_stats = self.scrape_player_data2(home_team) #consolidates player stats player_ids = away_player_stats['player_ids'] for player_id in home_player_stats['player_ids']: player_ids.append(player_id) print(player_id) player_stats = [] for stats in away_player_stats['player_stats']: player_stats.append(stats) player_stats[-1]['home_away'] = "away" for stats in home_player_stats['player_stats']: player_stats.append(stats) player_stats[-1]['home_away'] = "home" #add game_id #add season #add team #add home_away for x in range(0, len(player_ids)): #saves player data to sqlite file db = sqlite3.connect("./" + str(self.league) + "/player_data/" + str(player_ids[x]) + ".sqlite") cursor = db.cursor() try: cursor.execute('''CREATE TABLE "Games" ( game_id TEXT PRIMARY KEY, season INTEGER, home_away TEXT, pts INTEGER, min INTEGER, fg TEXT, pt3 TEXT, ft TEXT, rb INTEGER, oreb INTEGER, dreb INTEGER, ast INTEGER, stl INTEGER, blk INTEGER, turn INTEGER, pf INTEGER )''') # print("Created table") except Exception as exception: # print("First exception") # print(exception) pass try: cursor.execute( '''INSERT INTO "Games" ( game_id, season, home_away, pts, min, fg, pt3, ft, rb, oreb, dreb, ast, stl, blk, turn, pf ) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', ( game_id, season, player_stats[x]['home_away'], player_stats[x]['points'], player_stats[x]['minutes_played'], player_stats[x]['field_goals'], player_stats[x]['three_pointers'], player_stats[x]['free_throws'], player_stats[x]['rebounds'], player_stats[x]['offensive_rebounds'], player_stats[x]['defensive_rebounds'], player_stats[x]['assists'], player_stats[x]['steals'], player_stats[x]['blocks'], player_stats[x]['turnovers'], player_stats[x]['personal_fouls'], )) # print("Added data") except Exception as exception: pass # print("2nd exception") # print(exception) db.commit() db.close() to_return = {} to_return['away'] = away_player_stats to_return['home'] = home_player_stats return to_return def scrape_player_data2(self, data): table = data.split('<tbody>') table.pop(0) starters = table[0] bench = table[1] starter_rows = starters.split("<tr>") starter_rows.pop(0) starter_rows.pop() bench_rows = bench.split("<tr") bench_rows.pop(0) bench_rows.pop() bench_rows.pop() player_ids = [] player_stats = [] starter_returned = self.scrape_player_data3(starter_rows) bench_returned = self.scrape_player_data3(bench_rows) for ids in starter_returned['player_ids']: player_ids.append(ids) for ids in bench_returned['player_ids']: player_ids.append(ids) for stats in starter_returned['player_stats']: player_stats.append(stats) for stats in bench_returned['player_stats']: player_stats.append(stats) # print() # for ids in player_ids: # print(ids) # for stat in player_stats: # print(stat) # print() to_return = {} to_return['player_ids'] = player_ids to_return['player_stats'] = player_stats return to_return #scrapes player data from rows of players def scrape_player_data3(self, rows): player_ids = [] player_stats = [] for x in range(0, len(rows)): start = rows[x].find('href="') + len('href="') url = rows[x][start:rows[x][start:].index('"') + start] player_id = int( url.replace("http://espn.go.com/nba/player/_/id/", "")) player_ids.append(player_id) #fails if player didn't play try: player_data = rows[x].split("<td") player_data.pop(0) player_data.pop(0) stats = {} stats['minutes_played'] = int( player_data[0][player_data[0].index(">") + 1:player_data[0].index("<")]) stats['field_goals'] = player_data[1][ player_data[1].index(">") + 1:player_data[1].index("<")] stats['three_pointers'] = player_data[2][ player_data[2].index(">") + 1:player_data[2].index("<")] stats['free_throws'] = player_data[3][ player_data[3].index(">") + 1:player_data[3].index("<")] stats['offensive_rebounds'] = int( player_data[4][player_data[4].index(">") + 1:player_data[4].index("<")]) stats['defensive_rebounds'] = int( player_data[5][player_data[5].index(">") + 1:player_data[5].index("<")]) stats['rebounds'] = int( player_data[6][player_data[6].index(">") + 1:player_data[6].index("<")]) stats['assists'] = int( player_data[7][player_data[7].index(">") + 1:player_data[7].index("<")]) stats['steals'] = int( player_data[8][player_data[8].index(">") + 1:player_data[8].index("<")]) stats['blocks'] = int( player_data[9][player_data[9].index(">") + 1:player_data[9].index("<")]) stats['turnovers'] = int( player_data[10][player_data[10].index(">") + 1:player_data[10].index("<")]) stats['personal_fouls'] = int( player_data[11][player_data[11].index(">") + 1:player_data[11].index("<")]) stats['points'] = int( player_data[13][player_data[13].index(">") + 1:player_data[13].index("<")]) player_stats.append(stats) except Exception as error: player_stats.append({'minutes_played': '0'}) # print("Url: "+str(url)+" | Stats: "+str(stats)) to_return = {} to_return['player_ids'] = player_ids to_return['player_stats'] = player_stats return to_return def scrape_webpage(self, url): try: #initializes url variables self.opener.addheaders = [('User-agent', random.choice(self.user_agents))] response = self.opener.open(url, timeout=30) http_code = response.code info = response.info() data = response.read() data = data.decode('UTF-8', errors='ignore') #decode HTML h = html.parser.HTMLParser() data = h.unescape(data) return data except Exception as exception: print(exception) return "" #loads list of league teams def load_league_teams(self): file_open = open('./' + str(self.league) + '/' + str(self.league) + '_teams.txt') teams = [] for line in file_open: temp = line.split("|") for x in range(0, len(temp)): temp[x] = temp[x].strip() teams.append(temp) return teams def to_print(self, to_print): time = self.universal.get_current_time() to_print = "[" + str(time) + "] " + str(to_print) print(to_print) self.output.append(str(to_print)) def save_output(self): self.universal.save_to_txt(self.output_path, self.output) def initialize_user_agents(self): self.user_agents.append( "Mozilla/5.0 (X10; Ubuntu; Linux x86_64; rv:25.0)") self.user_agents.append("Mozilla/5.0 (Windows NT 6.0; WOW64; rv:12.0)") self.user_agents.append( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537" ) self.user_agents.append( "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/540 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/540" ) self.user_agents.append( "Mozilla/5.0 (Windows; U; Windows NT 5.2; it; rv:1.8.1.11) Gecko/20071327 Firefox/2.0.0.10" ) self.user_agents.append("Opera/9.3 (Windows NT 5.1; U; en)") #initializes url variables self.opener = urllib.request.build_opener( urllib.request.HTTPRedirectHandler(), urllib.request.HTTPHandler(debuglevel=0)) self.opener.addheaders = [('User-agent', random.choice(self.user_agents))]