def load_url(player_id): global temp_pages db = DatabaseConnection(sandbox_mode) if len( db.read('select * from players where playerid = "' + player_id + '";')) == 0: try: temp_pages[player_id] = BeautifulSoup( urlopen("https://www.baseball-reference.com/players/" + player_id[0] + "/" + player_id + ".shtml"), 'html.parser') except: temp_pages[player_id] = None else: temp_pages[player_id] = None db.close()
def write_to_file(stats, year, stat_type): logger.log("\tWriting " + stat_type + " data") start_time = time.time() db = DatabaseConnection(sandbox_mode) counter = 0 for team in stats: team_id = translate_team_id(team[0], year) if db.read('select league from team_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')[0][0].upper() in ['AL', 'NL']: counter += 1 db.write('update team_years set ' + stat_type + '_year = ' + str(counter) + ' where teamId = "' + team_id + '" and year = ' + str(year) + ';') else: continue db.close() logger.log("\t\tTime = " + time_converter(time.time() - start_time))
def consolidate_hitter_spots(ty_uid): db = DatabaseConnection(sandbox_mode=True) batting_spots = {} query_fields = '' for num in range(9): query_fields += num_to_word(num + 1) + ', ' for match_up in ['vr', 'vl']: for data in db.read('select playerId, ' + query_fields[:-2] + ' from hitter_spots where ty_uniqueidentifier = ' + str(ty_uid) + ' and matchup = "' + match_up + '";'): player_id = data[0] if player_id not in batting_spots: batting_spots[player_id] = {} batting_spots[player_id][match_up] = {} spot = 1 for ent in data[1:]: if ent: batting_spots[player_id][match_up][spot] = ent spot += 1 db.close() return ensure_both_match_ups_are_present(batting_spots)
def get_year_data(year): driver_logger.log('\tGathering year data') print("Gathering year data") start_time = time.time() global pages global strings pages = {} strings = {} logger.log('Beginning year_data download for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) batting_list = { 'PA': 'pa', 'AB': 'ab', 'R': 'r', 'H': 'h', '2B': '2b', '3B': '3b', 'HR': 'hr', 'RBI': 'rbi', 'SB': 'sb', 'BB': 'bb', 'SO': 'so', 'batting_avg': 'ba', 'onbase_perc': 'obp', 'slugging_perc': 'slg', 'onbase_plus_slugging': 'ops' } pitching_list = { 'earned_run_avg': 'era', 'SV': 'sv', 'IP': 'ip', 'ER': 'er', 'whip': 'whip', 'strikeouts_per_nine': 'k_9', 'strikeouts_per_base_on_balls': 'k_bb' } fielding_list = {'E_def': 'e', 'fielding_perc': 'f_percent'} stat_list = { "batting": batting_list, "pitching": pitching_list, "fielding": fielding_list } db = DatabaseConnection(sandbox_mode) if len(db.read('select * from years where year = ' + str(year) + ';')) == 0: db.write('alter table years drop index year;') db.write('insert into years (year) values (' + str(year) + ');') db.write('alter table years add index(year);') db.close() write_opening_day(year) download_start = time.time() logger.log("making HTTP requests for year data") with ThreadPoolExecutor(3) as executor1: for key, value in stat_list.items(): executor1.submit(load_url, year, key) logger.log("\tdone making HTTP requests: time = " + time_converter(time.time() - download_start)) for key, dictionary in stat_list.items(): assemble_stats(key, dictionary, pages[key]) write_start = time.time() logger.log("writing to database") with ThreadPoolExecutor(3) as executor2: for key, value in stat_list.items(): executor2.submit(write_to_db, year, strings[key], key) logger.log("\tdone writing to database: time = " + time_converter(time.time() - write_start)) total_time = time_converter(time.time() - start_time) logger.log('year_data download completed: time = ' + total_time + '\n\n') driver_logger.log('\t\tTime = ' + total_time)
def write_to_file(innings_url, player_type, player_id, team_id, year, month, day, match_up, count, pitch_type, ball_strike, swing_take, outcome, trajectory, field, direction, original_player_id, x_coord, y_coord, velocity): if player_id is None: with open( os.path.join("..", "..", "baseball-sync", "src", "import_data", "player_data", "pitch_fx", "multiple_players.csv"), 'a') as file: file.write( str(original_player_id) + ': --> insert into ' + player_type + '_pitches (pitchid, playerid, ' 'year, matchup, count, pitch_type, swing_take, ball_strike, outcome, trajectory, field, ' 'direction, P' + player_type[0] + '_uniqueidentifier) values (default, ' + str(player_id) + ', ' + str(year) + ', "' + match_up + '", "' + str(count) + '", "' + pitch_type + '", "' + swing_take + '", "' + ball_strike + '", "' + outcome + '", "' + trajectory + '", "' + field + '", "' + direction + '", (select P' + player_type[0] + '_uniqueidentifier from player_' + player_type[:-2] + 'ing where year = ' + str(year) + ' and pt_uniqueidentifier = (select ' 'pt_uniqueidentifier from player_teams where playerid = "' + str(player_id) + '" and teamid = "' + team_id + '"))); -- ' + innings_url + '\n') else: db_to_read = DatabaseConnection(sandbox_mode=True) # if len(db_to_read.read('select p' + player_type[0] + '_uniqueidentifier from player_' + player_type + ' where ' # 'playerdId = ' + player_id + ' and year = ' + str(year) + ';')[0]) > 1: # team_id = 'TOT' <---this code would've made sure that all players that played for more than one team p_uid = str( db_to_read.read( 'select P' + player_type[0] + '_uniqueidentifier from player_' + player_type[:-2] + 'ing where year = ' + str(year) + ' and pt_uniqueidentifier = (select ' 'pt_uniqueidentifier from player_teams where playerid = "' + player_id + '" and ' 'teamid = "' + team_id + '");')[0][0]) db_to_read.close() db = PitchFXDatabaseConnection(sandbox_mode=True) if x_coord == 'None' or y_coord == 'None': if velocity == 'None': db.write( 'insert into ' + player_type + '_pitches (pitchid, playerid, year, month, day, matchup, count,' ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, P' + player_type[0] + '_uniqueidentifier) values (default, "' + player_id + '", ' + str(year) + ', ' + month + ', ' + day + ', "' + match_up + '", "' + str(count) + '", "' + pitch_type + '", "' + swing_take + '", "' + ball_strike + '", "' + outcome + '", "' + trajectory + '", "' + field + '", "' + direction + '", ' + p_uid + ');') else: db.write( 'insert into ' + player_type + '_pitches (pitchid, playerid, year, month, day, matchup, count,' ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, velocity, P' + player_type[0] + '_uniqueidentifier) values (default, "' + player_id + '", ' + str(year) + ', ' + month + ', ' + day + ', "' + match_up + '", "' + str(count) + '", "' + pitch_type + '", "' + swing_take + '", "' + ball_strike + '", "' + outcome + '", "' + trajectory + '", "' + field + '", "' + direction + '", ' + velocity + ', ' + p_uid + ');') else: if velocity == 'None': db.write( 'insert into ' + player_type + '_pitches (pitchid, playerid, year, month, day, matchup, count,' ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, x, y, P' + player_type[0] + '_uniqueidentifier) values (default, "' + player_id + '", ' + str(year) + ', ' + month + ', ' + day + ', "' + match_up + '", "' + str(count) + '", "' + pitch_type + '", "' + swing_take + '", "' + ball_strike + '", "' + outcome + '", "' + trajectory + '", "' + field + '", "' + direction + '", ' + x_coord + ', ' + y_coord + ', ' + p_uid + ');') else: db.write( 'insert into ' + player_type + '_pitches (pitchid, playerid, year, month, day, matchup, count,' ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, x, y, velocity,' ' P' + player_type[0] + '_uniqueidentifier) values (default, "' + player_id + '", ' + str(year) + ', ' + month + ', ' + day + ', "' + match_up + '", "' + str(count) + '", "' + pitch_type + '", "' + swing_take + '", "' + ball_strike + '", "' + outcome + '", "' + trajectory + '", "' + field + '", "' + direction + '", ' + x_coord + ', ' + y_coord + ', ' + velocity + ', ' + p_uid + ');') db.close()
def get_teams(year): db = DatabaseConnection(sandbox_mode) teams = db.read('select teamid from team_years where year = ' + str(year) + ';') db.close() return teams
def gather_team_home_numbers(team_id, team_key, year, team_count): logger.log("\tCalculating home numbers and downloading manager data: " + team_id) db = DatabaseConnection(sandbox_mode) if len(db.read('select BY_uniqueidentifier from ballpark_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')) == 0: global pages table = str(pages[team_key]) manager_ids = {} try: manager_data = table.split('<strong>Manager')[1].split('<p>')[0].split('<a href="/managers/')[1:] home_row = table.split('<h2>Home or Away</h2>')[1].split('<tbody>')[1].split('</tbody>')[0] \ .split('<tr >')[1] ballpark_platoon_splits = table.split('<h2>Hit Location</h2>')[1].split('<tbody>')[1].split('</tbody>')[0]\ .split('<tr >') hit_trajectory_table = table.split('<h2>Hit Trajectory</h2>')[1].split('<tbody>')[1].split('</tbody>')[0]\ .split('<tr >') stats = ['AB', 'H', '2B', '3B', 'HR'] overall_stats = {} home_percent = {} season_total_row = table.split('<h2>Season Totals</h2>')[1].split('<tbody>')[1].split('</tbody>')[0].\ split('<tr>')[1] for stat in stats: league_total = int(db.read("select " + stat + " from years where year = " + str(year) + ";")[0][0]) team_total = int(season_total_row.split('data-stat="' + stat + '">')[1].split('<')[0]) overall_stats[stat] = team_total / ((league_total - team_total) / (team_count - 2)) home = int(home_row.split('data-stat="' + stat + '" >')[1].split('<')[0]) home_percent[stat] = home / team_total db.close() stat_translate = {'AB': 'AB', 'H': 'H', '2B': 'double', '3B': 'triple', 'HR': 'homerun'} r_location = {'Up Mdle': 'centerfield', 'Opp Fld': 'rightfield', 'Pulled': 'leftfield'} l_location = {'Up Mdle': 'centerfield', 'Opp Fld': 'leftfield', 'Pulled': 'rightfield'} location = {'AB_centerfield': 0, 'AB_leftfield': 0, 'AB_rightfield': 0, 'H_centerfield': 0, 'H_leftfield': 0, 'H_rightfield': 0, 'double_centerfield': 0, 'double_leftfield': 0, 'double_rightfield': 0, 'triple_centerfield': 0, 'triple_leftfield': 0, 'triple_rightfield': 0, 'homerun_centerfield': 0, 'homerun_leftfield': 0, 'homerun_rightfield': 0} trajectory = {"Ground_Balls_PA": 0, "Ground_Balls_AB": 0, "Ground_Balls_H": 0, "Ground_Balls_2B": 0, "Ground_Balls_3B": 0, "Ground_Balls_HR": 0, "Fly_Balls_PA": 0, "Fly_Balls_AB": 0, "Fly_Balls_H": 0, "Fly_Balls_2B": 0, "Fly_Balls_3B": 0, "Fly_Balls_HR": 0, "Line_Drives_PA": 0, "Line_Drives_AB": 0, "Line_Drives_H": 0, "Line_Drives_2B": 0, "Line_Drives_3B": 0, "Line_Drives_HR": 0} for row in ballpark_platoon_splits: if '-RHB' in row: handedness = "R" elif '-LHB' in row: handedness = "L" else: continue for stat in stats: if handedness == "R": location[stat_translate[stat] + '_' + r_location[row.split('data-stat="split_name" >')[1] .split('-')[0]]] += (int(row.split('data-stat="' + stat + '" >')[1].split('<')[0]) * home_percent[stat]) / overall_stats[stat] else: location[stat_translate[stat] + '_' + l_location[row.split('data-stat="split_name" >')[1] .split('-')[0]]] += (int(row.split('data-stat="' + stat + '" >')[1].split('<')[0]) * home_percent[stat]) / overall_stats[stat] for row in hit_trajectory_table: for key, value in trajectory.items(): if key.split('_')[0] + ' ' + key.split('_')[1] in row: stat = key.split('_')[-1] if stat != 'PA': trajectory[key] = int(row.split('data-stat="' + key.split('_')[2] + '" >')[1]. split('<')[0]) * home_percent[stat] except (IndexError, ZeroDivisionError): table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/teams/' + team_key + '/' + str(year) + '.shtml'), 'html.parser')) manager_data = table.split('<strong>Manager')[1].split('<p>')[0].split('<a href="/managers/')[1:] location = {} trajectory = {} manager_page = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/managers/' + manager_data[0] .split('.shtml')[0] + '.shtml'), 'html.parser')) try: manager_pic_url = manager_page.split('<img class="" src="')[1].split('"')[0] urlretrieve(manager_pic_url, os.path.join("interface", "static", "images", "model", "managers", manager_data[0].split('.shtml')[0] + ".jpg")) except Exception as e: logger.log('\t\t' + str(e)) try: team_pic_url = table.split('<div class="media-item logo loader">')[1].split('<')[1].split('src="')[1].\ split('"')[0] except IndexError: table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/teams/' + team_key + '/' + str(year) + '.shtml'), 'html.parser')) team_pic_url = table.split('<div class="media-item logo loader">')[1].split('<')[1].split('src="')[1].\ split('"')[0] try: urlretrieve(team_pic_url, os.path.join("interface", "static", "images", "model", "teams", team_id + str(year) + ".jpg")) except Exception as e: logger.log('\t\t' + str(e)) for i in manager_data: manager_ids[i.split('.shtml')[0]] = i.split('(')[1].split(')')[0] try: park_name = table.split('<strong>Ballpark')[1].split('</strong> ')[1].split('</p>')[0][:-1] except IndexError as e: logger.log('\t\t' + str(e)) park_name = "No Home Field" write_to_db(team_id, location, trajectory, manager_ids, year, park_name)
def consolidate_player_stats(ty_uid, player_type, year): player_stats = {} db = DatabaseConnection(sandbox_mode=True) if player_type != 'fielding': for p_uid in db.read('select p_uid from direction_' + player_type + ' group by p_uid;'): p_uids = [p_uid[0]] if player_is_on_this_team(ty_uid, p_uid, player_type, year): if player_was_on_more_than_one_team(p_uid, player_type, year): p_uids.append( get_uid_of_player_for_this_team( ty_uid, p_uid, player_type, year)) player_id = get_player_id(p_uid, player_type) player_stats[player_id] = {} player_stats[player_id]['standard_' + player_type + '_stats'] = {} for uid in p_uids: player_stats[player_id]['standard_' + player_type + '_stats'][get_team_id(uid, player_type)] = \ consolidate_traditional_player_stats( db.read('select * from player_' + player_type + ' where p' + player_type[0] + '_uniqueidentifier = ' + str(uid) + ';')[0], get_db_field_names( db.read('describe player_' + player_type + ';'))) try: with open( os.path.join( "..", "background", player_type + "_pitch_fx_tables.csv")) as tables_file: tables = tables_file.readlines() except FileNotFoundError: with open(os.path.join("..", "..", "..", "background", player_type + "_pitch_fx_tables.csv")) as \ tables_file: tables = tables_file.readlines() player_stats[player_id]['advanced_' + player_type + '_stats'] = {} for table in tables: player_stats[player_id]['advanced_' + player_type + '_stats'][table[:-1]] = \ consolidate_pitch_fx(db.read('select * from ' + table[:-1] + ' where p_uid = ' + str(p_uid[0]) + ';'), table[:-1], get_db_field_names(db.read('describe ' + table[:-1] + ';'))) else: for player_role in ['batting', 'pitching']: for p_uid in db.read('select p_uid from direction_' + player_role + ' group by p_uid;'): p_uids = [p_uid[0]] try: if player_is_on_this_team( ty_uid, db.read( player_fielding_uid_query( p_uid[0], player_role, year, selector='pf_uniqueidentifier'))[0], player_type, year): if player_was_on_more_than_one_team( p_uid, player_type, year): p_uids.append( get_uid_of_player_for_this_team( ty_uid, p_uid, player_type, year)) player_id = get_player_id(p_uid, player_role) for uid in p_uids: player_stats[ player_id] = consolidate_traditional_player_stats( db.read( player_fielding_uid_query( uid, player_role, year))[0], get_db_field_names( db.read('describe player_fielding;'))) except IndexError: continue db.close() return player_stats
logger.log('\t' + winner.get_team_id() + ' wins the ' + str(games) + ' game series - ' + str(away_wins) + ' to ' + str(home_wins)) else: winner = home_team logger.log('\t' + winner.get_team_id() + ' wins the ' + str(games) + ' game series - ' + str(home_wins) + ' to ' + str(away_wins)) return winner.get_team_id() + ' wins the ' + str(games) + ' game series: ' + away_team.get_team_id()\ + ' ' + str(away_wins) + ' | ' + home_team.get_team_id() + ' ' + str(home_wins) else: return 'The ' + str(away_wins + home_wins) + ' game series has ended in a tie' from utilities.database.wrappers.baseball_data_connection import DatabaseConnection from ast import literal_eval away_year = 2017 home_year = 2017 db = DatabaseConnection(sandbox_mode=True) away_year_info = literal_eval( db.read('select year_info from years where year = ' + str(away_year) + ';')[0][0]) home_year_info = literal_eval( db.read('select year_info from years where year = ' + str(home_year) + ';')[0][0]) db.close() simulation('MIN', away_year, away_year_info, 'LAD', home_year, home_year_info, 1)
from utilities.database.wrappers.baseball_data_connection import DatabaseConnection from ast import literal_eval db = DatabaseConnection(sandbox_mode=True) print( literal_eval( db.read( 'select team_info from team_years where teamid = "min" and year = 2017;' )[0][0])['batter_stats']['doziebr01']['advanced_batting_stats'] ['trajectory_batting']['vl']['0-0']) #.keys()) db.close()
def retrieve_league(self): db = DatabaseConnection(sandbox_mode) league = db.read('select league from team_years where teamid = "' + self.home_team + '" and year = ' + str(self.year) + ';')[0][0].upper() db.close() return league
def get_ws_winner(year): db = DatabaseConnection(sandbox_mode) champ = db.read('select ws_champ from years where year = ' + str(year) + ';')[0][0] db.close() return champ
def set_batting_handedness(self): db = DatabaseConnection(sandbox_mode=True) self.bats_with = db.read( 'select batsWith from players where playerId = "' + self.player_id + '";')[0][0] db.close()
def retrieve_throwing_handedness(self): db = DatabaseConnection(sandbox_mode) self.throws_with = db.read( 'select throwsWith from players where playerId = "' + self.player_id + '";')[0][0] db.close()
def reduce_functionality(year, ent): stats1 = { "To Infield_PA": ["pa_infield", ""], "To Outfield_PA": ["pa_outfield", ""], "To Infield_H": ["infield_hits", ""], "Ball In Play_PA": ["pa_balls_in_play", ""], "Pulled_PA_R": ["r_pa_pulled", ""], "Up Mdle_PA_R": ["r_pa_middle", ""], "Opp Fld_PA_R": ["r_pa_oppo", ""], "To Infield_AB": ["ab_infield", ""], "To Outfield_AB": ["ab_outfield", ""], "Ball In Play_AB": ["ab_balls_in_play", ""], "Pulled_AB_R": ["r_ab_pulled", ""], "Up Mdle_AB_R": ["r_ab_middle", ""], "Opp Fld_AB_R": ["r_ab_oppo", ""], "Pulled_H_R": ["r_h_pull", ""], "Up Mdle_H_R": ["r_h_middle", ""], "Opp Fld_H_R": ["r_h_oppo", ""], "Pulled_2B_R": ["r_double_pull", ""], "Up Mdle_2B_R": ["r_double_middle", ""], "Opp Fld_2B_R": ["r_double_oppo", ""], "Pulled_3B_R": ["r_triple_pull", ""], "Up Mdle_3B_R": ["r_triple_middle", ""], "Opp Fld_3B_R": ["r_triple_oppo", ""], "Pulled_HR_R": ["r_hr_pull", ""], "Up Mdle_HR_R": ["r_hr_middle", ""], "Opp Fld_HR_R": ["r_hr_oppo", ""], "Pulled_PA_L": ["l_pa_pulled", ""], "Up Mdle_PA_L": ["l_pa_middle", ""], "Opp Fld_PA_L": ["l_pa_oppo", ""], "Pulled_AB_L": ["l_ab_pulled", ""], "Up Mdle_AB_L": ["l_ab_middle", ""], "Opp Fld_AB_L": ["l_ab_oppo", ""], "Pulled_H_L": ["l_h_pull", ""], "Up Mdle_H_L": ["l_h_middle", ""], "Opp Fld_H_L": ["l_h_oppo", ""], "Pulled_2B_L": ["l_double_pull", ""], "Up Mdle_2B_L": ["l_double_middle", ""], "Opp Fld_2B_L": ["l_double_oppo", ""], "Pulled_3B_L": ["l_triple_pull", ""], "Up Mdle_3B_L": ["l_triple_middle", ""], "Opp Fld_3B_L": ["l_triple_oppo", ""], "Pulled_HR_L": ["l_hr_pull", ""], "Up Mdle_HR_L": ["l_hr_middle", ""], "Opp Fld_HR_L": ["l_hr_oppo", ""] } stats2 = { "Ground_Balls_PA": "", "Ground_Balls_AB": "", "Ground_Balls_H": "", "Ground_Balls_2B": "", "Ground_Balls_3B": "", "Ground_Balls_HR": "", "Fly_Balls_PA": "", "Fly_Balls_AB": "", "Fly_Balls_H": "", "Fly_Balls_2B": "", "Fly_Balls_3B": "", "Fly_Balls_HR": "", "Line_Drives_PA": "", "Line_Drives_AB": "", "Line_Drives_H": "", "Line_Drives_2B": "", "Line_Drives_3B": "", "Line_Drives_HR": "" } db = DatabaseConnection(sandbox_mode) player_id = db.read( "select playerId from player_teams where PT_uniqueidentifier = " + str(ent[0]) + ";")[0][0] if db.read( 'select pa_infield from player_pitching where PT_uniqueidentifier = ' + str(ent[0]) + ' and year = ' + str(year) + ';')[0][0] is None: try: page = str( BeautifulSoup( urlopen( 'https://www.baseball-reference.com/players/split.fcgi?id=' + player_id + '&year=' + str(year) + '&t=p'), 'html.parser')) except Exception: global bad_gateway_data bad_gateway_data.append([ent, stats1, stats2]) return try: table_rows1 = page.split('<h2>Hit Location</h2>')[1].split( '<tbody>')[1].split('</tbody>')[0].split('<tr') for row in table_rows1: for key, value in stats1.items(): if key.split('_')[0] in row: if key not in [ "To Infield_PA", "To Outfield_PA", "To Infield_H", "Ball In Play_PA" ]: if key[-1] != row.split('HB</th><td')[0][-1]: continue try: stats1[key][1] = row.split('data-stat="' + key.split('_')[1] + '" >')[1].split('<')[0] except IndexError: continue table_rows2 = page.split('<h2>Hit Trajectory</h2>')[1].split( '<tbody>')[1].split('</tbody>')[0].split('<tr') for row in table_rows2: for key, value in stats2.items(): this_key = key.split('_') if this_key[0] + ' ' + this_key[1] in row: try: stats2[key] = row.split('data-stat="' + key.split('_')[2] + '" >')[1].split('<')[0] except IndexError: continue except IndexError: for key, value in stats1.items(): stats1[key][1] = '0' for key, value in stats2.items(): stats2[key] = '0' finally: write_to_file(year, ent[0], stats1, stats2)
def get_year_list(): db = DatabaseConnection(sandbox_mode=False) years = db.read('select year from years;') db.close() return reversed([year[0] for year in years])
def get_oldest_year(): db = DatabaseConnection(sandbox_mode) oldest_year = int( db.read("select year from years order by year limit 1;")[0][0]) db.close() return oldest_year