def team_certainties(year): print('aggregating team statistic certainties') driver_logger.log("\tAggregating team statistic certainties") start_time = time.time() logger.log("Calculating team certainties || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) db = DatabaseConnection(sandbox_mode) stat_types = ["batting", "pitching"] for stat_type in stat_types: ty_uids = db.read('select ty_uniqueidentifier, teamid from team_years where year = ' + str(year)) for ty_uid in ty_uids: pau = 0 player_list = list(db.read('select playerid from player_positions where ty_uniqueidentifier = ' + str(ty_uid[0]) + ';')) for player in player_list: pt_uid = db.read('select pt_uniqueidentifier from player_teams where playerid = "' + player[0] + '" and' ' teamid = "' + ty_uid[1] + '";')[0][0] try: ent = db.read('select pa, certainty from player_' + stat_type + ' where year = ' + str(year) + ' and pt_uniqueidentifier = ' + str(pt_uid) + ';') pau += int(ent[0][0]) - (int(ent[0][0]) * float(ent[0][1])) except IndexError: continue except TypeError: continue pa = int(db.read('select pa from team_years where ty_uniqueidentifier = ' + str(ty_uid[0]) + ';')[0][0]) db.write('update team_years set certainty = ' + str((pa - pau) / pa) + ' where ty_uniqueidentifier = ' + str(ty_uid[0]) + ';') db.close() total_time = time_converter(time.time() - start_time) logger.log("Done calculating team certainties: time = " + total_time + '\n\n') driver_logger.log("\t\tTime = " + total_time)
def hof_finder(): print("adding HOF data") driver_logger.log("\tAdding HOF data") start_time = time.time() logger.log("Begin finding hall of famers || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) hof_table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/hof.shtml'), 'html.parser')).\ split('<tbody>')[1].split('</tbody>')[0] rows = hof_table.split('<tr>')[1:] db = DatabaseConnection(sandbox_mode) for row in rows: person = row.split('data-append-csv="')[1].split('"')[0] year = row.split('<a href="/awards/hof_')[1].split('.shtml')[0] induction_type = row.split('data-stat="category_hof">')[1].split( '<')[0] if induction_type == 'Player': db.write('update players set HOF = ' + str(year) + ' where playerId = "' + person + '";') elif induction_type == 'Manager': db.write('update managers set HOF = ' + str(year) + ' where managerId = "' + person + '";') else: continue db.close() total_time = time_converter(time.time() - start_time) logger.log("Done finding hall of famers: time = " + total_time + '\n\n') driver_logger.log("\t\tTime = " + total_time)
def write_to_file(final_data, greater_than, field): db = DatabaseConnection(sandbox_mode) counter = 0 while len(final_data) > 0: target = None target_year = None for a, b in final_data.items(): if target is not None: if greater_than: if b[0][1] > target[1]: target = b[0] target_year = a else: continue else: if b[0][1] < target[1]: target = b[0] target_year = a else: continue else: target = b[0] target_year = a if db.read('select league from team_years where teamId = "' + translate_team_id(target[0], target_year) + '" and year = ' + str(target_year) + ';')[0][0].upper() in ['AL', 'NL']: counter += 1 db.write('update team_years set ' + field + ' = ' + str(counter) + ' where teamId = "' + translate_team_id(target[0], target_year) + '" and year = ' + str(target_year) + ';') del final_data[target_year][0] if len(final_data[target_year]) == 0: del final_data[target_year] else: continue db.close()
def write_pickoff(pitcher, team_id, year, base, attempts_successes): db = DatabaseConnection(sandbox_mode=True) if len( db.read( 'select pp_uniqueidentifier from player_pitching where playerdId = ' + pitcher + ' and year = ' + str(year) + ';')[0]) > 1: team_id = 'TOT' pt_uid = db.read( 'select pt_uniqueidentifier from player_teams where playerid = "' + pitcher + '" and teamid = "' + team_id + '";')[0][0] if db.read('select pickoff_' + base + '_' + attempts_successes + ' from player_pitching where pt_uniqueidentifier' ' = ' + str(pt_uid) + ' and year = ' + str(year) + ';')[0][0] is None: db.write('update player_pitching set pickoff_' + base + '_' + attempts_successes + ' = 1 where ' 'pt_uniqueidentifier = ' + str(pt_uid) + ' and year = ' + str(year) + ';') else: db.write( 'update player_pitching set pickoff_' + base + '_' + attempts_successes + ' = ' + str( int( db.read('select pickoff_' + base + '_' + attempts_successes + ' from player_pitching where ' 'pt_uniqueidentifier = ' + str(pt_uid) + ' and year = ' + str(year) + ';')[0][0]) + 1) + ' where pt_uniqueidentifier = ' + str(pt_uid) + ' and year = ' + str(year) + ';') db.close()
def write_to_file(year): for team_id, table in pages.items(): logger.log("\t\tgathering and writing " + team_id + " positions") db = DatabaseConnection(sandbox_mode) primary_keys = [] try: for row in table: if 'data-append-csv="' in row: this_string = "" primary_keys.append( row.split('data-append-csv="')[1].split('"') [0].replace("'", "\'")) this_string += '"' + primary_keys[-1] + '","' position_summary = row.split( 'data-stat="pos_summary" >')[1].split('<')[0] if '-' in position_summary: positions = position_summary.split('-') for position_index in range(len(positions)): if this_is_position_player_pitching(primary_keys[-1], positions, position_index, team_id, year) or \ this_is_pitcher_playing_in_the_field(primary_keys[-1], positions, position_index, team_id, year): continue # don't give positions players RP eligibility who threw mop-up innings else: if position_index != len(positions) - 1: this_string += positions[ position_index] + "," else: this_string += positions[position_index] else: this_string += position_summary this_string += '"' ty_uid = str( db.read( 'select TY_uniqueidentifier from team_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')[0][0]) if len( db.read( 'select PPos_uniqueidentifier from player_positions where playerId=' + this_string.split(',')[0] + ' and TY_uniqueidentifier = ' + ty_uid + ';')) == 0: db.write( 'insert into player_positions (PPos_uniqueidentifier, playerId, positions, ' 'TY_uniqueidentifier) values (default, ' + this_string + ', ' + ty_uid + ');') else: split_positions = this_string.split(',')[1:] if split_positions[-1] == '"': del split_positions[-1] split_positions[-1] += '"' db.write('update player_positions set positions = ' + ','.join(split_positions) + ' where ' 'playerId = ' + this_string.split(',')[0] + ' and TY_uniqueidentifier = ' + ty_uid + ';') except IndexError: pass db.close()
def clean_up_deadlocked_file(): driver_logger.log("\tCleaning up deadlocked records") with open(os.path.join("utilities", "deadlocked.txt"), 'r') as f: db = DatabaseConnection(sandbox_mode) for line in f: db.write(line) db.close() file = open(os.path.join("utilities", "deadlocked.txt"), "w").close()
def write_to_file(year, awards_dict_list): if len(awards_dict_list[0]) + len(awards_dict_list[1]) + len(awards_dict_list[2]) + len(awards_dict_list[3]) \ + len(awards_dict_list[4]) + len(awards_dict_list[5]) > 0: db = DatabaseConnection(sandbox_mode) this_string = "" for dictionary in awards_dict_list: for key, value in dictionary.items(): this_string += key + ' = "' + value.replace("'", "\'") + '", ' db.write('update years set ' + this_string[:-2] + ' where year = ' + str(year) + ';') db.close()
def write_playoff_data(year, playoff_data): db = DatabaseConnection(sandbox_mode) sets = '' for accomplishment, team_id in playoff_data.items(): if team_id is not None: if 'lds' not in accomplishment: sets += accomplishment[:-1] + ' = "' + team_id + '", ' else: sets += accomplishment + ' = "' + team_id + '", ' if len(sets) > 0: db.write('update years set ' + sets[:-2] + ' where year = ' + str(year) + ';') db.close()
def write_to_file(team_data, year): db = DatabaseConnection(sandbox_mode) for team, data in team_data.items(): logger.log("\tWriting " + team + " data to database") sets = '' for field, value in data.items(): if value != '': sets += field + ' = ' + value + ', ' else: continue db.write('update team_years set ' + sets[:-2] + ' where teamid = "' + team + '" and year = ' + str(year) + ';') db.close()
def write_to_file(year, manager_team, tendencies): db = DatabaseConnection(sandbox_mode) sets = '' for stat, total in tendencies.items(): if len(total) > 0: sets += stat + ' = ' + total + ', ' db.write( 'update manager_year set ' + sets[:-2] + ' where year = ' + str(year) + ' and mt_uniqueidentifier = ' '(select mt_uniqueidentifier from manager_teams where managerid = "' + manager_team.split(';')[0] + '" and teamid = "' + manager_team.split(';')[1] + '");') db.close()
def write_to_db(this_string, team_id, year): logger.log("\tWriting " + team_id + " to team_years") db = DatabaseConnection(sandbox_mode) if len( db.read( 'select TY_uniqueidentifier from team_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')) == 0: db.write( 'Insert into team_years (TY_uniqueidentifier, teamId, year, league, division, wins, loses, playoffs, ' 'BY_uniqueidentifier) values (default,' + this_string + ', (select BY_uniqueidentifier from ' 'ballpark_years where teamId = "' + team_id + '" and year = ' + str(year) + '));') db.close()
def write_to_file(year, pt_uid, stat_list1, stat_list2): db = DatabaseConnection(sandbox_mode) query_string = "" for key, value in stat_list1.items(): if value[1] == '': value[1] = '0' query_string += value[0] + ' = ' + value[1] + ', ' for key, value in stat_list2.items(): if value == '': value = '0' query_string += key + ' = ' + value + ', ' db.write('update player_pitching set ' + query_string[:-2] + ' where PT_uniqueidentifier = ' + str(pt_uid) + ' and year = ' + str(year) + ';') db.close()
def write_to_file(year, all_stars): db = DatabaseConnection(sandbox_mode) for player in all_stars: pt_uids = db.read('select PT_uniqueidentifier from player_teams ' + 'where playerId = "' + player + '";') for pt_uid in pt_uids: db.write( 'update player_batting set all_star = TRUE where PT_uniqueidentifier = ' + str(pt_uid[0]) + ' and ' 'year = ' + str(year) + ';') db.write( 'update player_pitching set all_star = TRUE where PT_uniqueidentifier = ' + str(pt_uid[0]) + ' and' ' year = ' + str(year) + ';') db.close()
def write_league_champs_non_ws(champs, year): db = DatabaseConnection(sandbox_mode) sets = '' team_count = 0 for league, team in champs.items(): if league in ['AL', 'NL']: sets += league + 'cs_champ = "' + team + '", ' else: sets += league + '_champ = "' + team + '", ' team_count += 1 if len(sets) > 0: sets = 'update years set ' + sets[:-2] + ' where year = ' + str( year) + ';' db.write(sets) db.close()
def write_to_file(stats, year, stat_type): logger.log("\tWriting " + stat_type + " data") start_time = time.time() db = DatabaseConnection(sandbox_mode) counter = 0 for team in stats: team_id = translate_team_id(team[0], year) if db.read('select league from team_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')[0][0].upper() in ['AL', 'NL']: counter += 1 db.write('update team_years set ' + stat_type + '_year = ' + str(counter) + ' where teamId = "' + team_id + '" and year = ' + str(year) + ';') else: continue db.close() logger.log("\t\tTime = " + time_converter(time.time() - start_time))
def write_to_file(player, positions_dict): db = DatabaseConnection(sandbox_mode) positions = sorted(positions_dict.items(), key=lambda kv: kv[1], reverse=True) secondary = positions[1:] secondary_positions = [] for ent in secondary: secondary_positions.append(ent[0]) if len(secondary_positions) == 0: db.write('update players set primaryPosition = "' + positions[0][0] + '", secondaryPositions' + ' = Null where ' 'playerId = "' + player + '";') else: db.write('update players set primaryPosition = "' + positions[0][0] + '", secondaryPositions' + ' = "' + ','.join(secondary_positions) + '" where playerId = "' + player + '";') db.close()
def write_to_file(year, player_id, stat_list): db = DatabaseConnection(sandbox_mode) pa = [] records = [] pt_uids = list( db.read( 'select PT_uniqueidentifier from player_teams where playerId = "' + player_id + '";')) for pt_uid in pt_uids: if len( db.read( "select PP_uniqueidentifier from player_pitching where PT_uniqueidentifier = " + str(pt_uid[0]) + " and year = " + str(year) + ";")) != 0: try: pa.append( int( db.read( "select pa from player_pitching where pt_uniqueidentifier = " + str(pt_uid[0]) + " and year = " + str(year) + ";")[0][0])) except TypeError: continue records.append(pt_uid) if len(pa) != 0: for pt in records: query_string = "" for key, value in stat_list.items(): if key != "PA_unknown": query_string += value[0] + ' = ' + value[1] + ', ' else: try: query_string += value[0] + ' = ' + str( pa[0] / (pa[0] + int(value[1]))) + ', ' except ZeroDivisionError: query_string += value[0] + ' = ' + '1.0' + ', ' if len( db.read( 'select pp_pa from player_pitching where PT_uniqueidentifier = ' + str(pt[0]) + ' and year = ' + str(year) + ';')): db.write('update player_pitching set ' + query_string[:-2] + ' where PT_uniqueidentifier = ' + str(pt[0]) + ' and year = ' + str(year) + ';') db.close()
def write_opening_day(year): start_time = time.time() db = DatabaseConnection(sandbox_mode) logger.log('Getting date of opening day') mlb_schedule = str( BeautifulSoup( urlopen('https://www.baseball-reference.com/leagues/MLB/' + str(year) + '-schedule.shtml'), 'html.parser')) possible_opening_dates = mlb_schedule.split('<h3>') months = {'March': '03', 'April': '04', 'May': '05'} for possible_opening_date in possible_opening_dates: if ', ' + str(year) in possible_opening_date: opening_day = months[possible_opening_date.split('</h3>')[0].split(', ')[1].split(' ')[0]] + '-'\ + possible_opening_date.split('</h3>')[0].split(', ')[1].split(' ')[1].split(',')[0] break db.write('update years set opening_day = "' + opening_day + '" where year = ' + str(year) + ';') db.close() logger.log('\tComplete (opening day): time = ' + time_converter(time.time() - start_time))
def manager_table_constructor(): driver_logger.log('\tGathering manager data (all-time)') print("Gathering manager data (all-time)") start_time = time.time() logger.log('Begin populating teams table || Timestamp: ' + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) table = str( bs( urllib.request.urlopen( 'https://www.baseball-reference.com/managers/'), 'html.parser')) rows = table.split('<tr') db = DatabaseConnection(sandbox_mode=True) db.write('ALTER TABLE managers DROP INDEX managerId;') db.close() with ThreadPoolExecutor(os.cpu_count()) as executor: for row in rows: if '<td class="left" csk="' in row: this_row = row.split('</tr>')[0] try: manager_id = this_row.split( '<a href="/managers/')[1].split('.shtml')[0].replace( "'", "\'") last_first = this_row.split('</tr>')[0].split( '<td class="left" csk="')[1].split('"')[0] last = last_first.split(',')[0].replace("'", "\'") first = last_first.split(',')[1].replace("'", "\'") wins = this_row.split('data-stat="W">')[1].split('<')[0] loses = this_row.split('data-stat="L">')[1].split('<')[0] executor.submit( write_to_file, '"' + manager_id + '","' + last + '","' + first + '",' + wins + ',' + loses) except AttributeError: continue db = DatabaseConnection(sandbox_mode=True) db.write('ALTER TABLE managers ADD INDEX(managerId);') db.close() total_time = time.time() - start_time logger.log('Constructing manager table completed: time = ' + time_converter(total_time)) driver_logger.log('\t\tTime = ' + time_converter(total_time))
def populate_teams_table(year): driver_logger.log('\tPopulating teams table') print("Populating teams table") start_time = time.time() logger.log('Begin populating teams table for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) with open(os.path.join("..", "background", "yearTeams.txt"), 'rt') as file: db = DatabaseConnection(sandbox_mode) db.write('ALTER TABLE teams DROP INDEX teamId;') for line in file: if str(year) in line: temp_line = line.split(',')[1:-1] for team in temp_line: team_id = team.split(';')[0] db.write('insert into teams (teamId, teamName) values ("' + team_id + '", "' + translate_team_name(team_id).replace("'", "\'") + '");') break db.write('ALTER TABLE teams ADD INDEX(teamId);') db.close() total_time = time.time() - start_time logger.log('Populating teams table completed: ' + time_converter(total_time)) driver_logger.log('\t\tTime = ' + time_converter(total_time))
def write_teams_and_stats(player_id, stats, team, year, catcher_stats): stat_nums = {} for index, numbers in stats.items(): for field, value in numbers['stats'].items(): if field in stat_nums: stat_nums[field] += value else: stat_nums[field] = value db = DatabaseConnection(sandbox_mode) if len( db.read( 'select pt_uniqueidentifier from player_teams where playerid = "' + player_id + '" and teamid = "' + team + '";')) == 0: db.write( 'insert into player_teams (pt_uniqueidentifier, playerid, teamid) values (default, "' + player_id + '", "' + team + '");') if len( db.read( 'select pf_uniqueidentifier from player_fielding where year = ' + str(year) + ' and' ' pt_uniqueidentifier = (select pt_uniqueidentifier from player_teams where playerid = "' + player_id + '" and teamid = "' + team + '");')) == 0: fields = '' values = '' for field, value in stat_nums.items(): fields += ', ' + field values += ', ' + str(value) for field, value in catcher_stats.items(): if len(value) > 0: fields += ', ' + field values += ', ' + value db.write( 'insert into player_fielding (pf_uniqueidentifier, year, pt_uniqueidentifier, complete_year' + fields + ') values (default, ' + str(year) + ', (select pt_uniqueidentifier from player_teams where playerid' ' = "' + player_id + '" and teamid = "' + team + '"), FALSE' + values + ');') db.close()
def write_to_file(comparisons, stat_type): db = DatabaseConnection(sandbox_mode) with ThreadPoolExecutor(os.cpu_count()) as executor: for ty_uid, comp in comparisons.items(): if comp is not None: comp_pull = 1 - float( db.read( 'select certainty from team_years where ty_uniqueidentifier = ' + str(ty_uid) + ';')[0][0]) if len( db.read('select comp_id from comparisons_team_' + stat_type + '_overall where ' 'ty_uniqueidentifier = ' + str(ty_uid) + ';')) == 0: executor.submit( transact, 'insert into comparisons_team_' + stat_type + '_overall (comp_id, ' 'ty_uniqueidentifier, comp, comp_pull) values (default, ' + str(ty_uid) + ', ' + str(comp) + ', ' + str(comp_pull) + ');') else: db.write('update comparisons_team_' + stat_type + '_overall set comp = ' + str(comp) + ', comp_pull = ' + str(comp_pull) + ' where ty_uniqueidentifier = ' + str(ty_uid) + ';') else: if len( db.read('select comp_id from comparisons_team_' + stat_type + '_overall where ' 'ty_uniqueidentifier = ' + str(ty_uid) + ';')) == 0: executor.submit( transact, 'insert into comparisons_team_' + stat_type + '_overall (comp_id, ty_' 'uniqueidentifier) values (default, ' + str(ty_uid) + ');') db.close()
def transact(hitter, team, year, place, matchup): db = DatabaseConnection(sandbox_mode=True) ty_uid = db.read( 'select TY_uniqueidentifier from team_years where teamId = "' + team + '" and year = ' + str(year))[0][0] if len( db.read('select * from hitter_spots where playerId = "' + hitter.split(',')[0] + '" and matchup = "' + matchup + '" and TY_uniqueidentifier = ' + str(ty_uid) + ';')) > 0: db.write('update hitter_spots set ' + num_to_word(place + 1) + ' = ' + hitter.split(',')[1] + ' where playerId ' '= "' + hitter.split(',')[0] + '" and matchup = "' + matchup + '" and TY_uniqueidentifier = (select ' 'TY_uniqueidentifier from team_years where teamId = "' + team + '" and year = ' + str(year) + ');') else: db.write( 'insert into hitter_spots (HS_uniqueidentifier, playerId, matchup, ' + num_to_word(place + 1) + ', TY_uniqueidentifier) values (default, "' + hitter.split(',')[0] + '", "' + matchup + '", ' + hitter.split(',')[1] + ', ' + str(ty_uid) + ');') db.close()
def determine_pitcher_roles_year(year): driver_logger.log("\tDetermining Pitcher Roles") print("Determining Pitcher Roles") start_time = time.time() logger.log("Determining Pitcher Roles || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) db = DatabaseConnection(sandbox_mode) with ThreadPoolExecutor(os.cpu_count()) as executor: for pt_uid in db.read('select pt_uniqueidentifier from player_pitching where year = ' + str(year) + ';'): player_id_team_id = db.read('select playerid, teamid from player_teams where pt_uniqueidentifier=' + str(pt_uid[0]) + ';')[0] player_id = player_id_team_id[0] team_id = player_id_team_id[1] if team_id == 'TOT': continue ty_uid = str(db.read('select ty_uniqueidentifier from team_years where teamId = "' + team_id + '" and year = ' + str(year) + ';')[0][0]) try: positions = db.read('select positions from player_positions where playerId = "' + player_id + '" and ' 'ty_uniqueidentifier = ' + ty_uid + ';')[0][0] except IndexError: continue update_positions = [] if 'P' in positions: appearances_starts = db.read('select G, GS from player_pitching where pt_uniqueidentifier = ' + str(pt_uid[0]) + ' and year = ' + str(year) + ';')[0] appearances = appearances_starts[0] starts = appearances_starts[1] start_percent = starts / appearances if start_percent > 0.75: role = ['SP'] elif start_percent > 0.50: role = ['SP', 'RP'] elif start_percent > 0.25: role = ['RP', 'SP'] else: role = ['RP'] for position in positions.split(','): if position == 'P': update_positions += role else: update_positions.append(position) executor.submit(db.write('update player_positions set positions = "' + ','.join(update_positions) + '" where ty_uniqueidentifier = ' + ty_uid + ' and playerId = "' + player_id + '";')) db.close() total_time = time_converter(time.time() - start_time) logger.log("Done: Time = " + total_time + '\n\n') driver_logger.log("\t\tTime = " + total_time)
def write_to_db(player_id, player_attributes): fields = '' values = '' for field, value in player_attributes.items(): fields += ', ' + field values += '", "' + value db = DatabaseConnection(sandbox_mode) if len( db.read('select * from players where playerid = "' + player_id + '";')) == 0: db.write('ALTER TABLE players DROP INDEX playerId;') db.write('insert into players (playerid ' + fields + ') values ("' + player_id + values + '");') db.write('ALTER TABLE players ADD INDEX(playerId);') db.close()
def league_table_constructor(): driver_logger.log('\tPopulating leagues table (all-time)') print('Populating leagues table (all-time)') logger.log('Begin populating leagues table || Timestamp: ' + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) leagues = { 'NL': 'National League', 'AL': 'American League', 'AA': 'American Association', 'FL': 'Federal League', 'PL': 'Players League', 'UA': 'Union Association', 'NA': 'National Association' } db = DatabaseConnection(sandbox_mode) db.write('ALTER TABLE leagues DROP INDEX leagueId;') for league_id, league_name in leagues.items(): db.write('insert into leagues (leagueId, leagueName) values ("' + league_id + '", "' + league_name + '");') db.write('ALTER TABLE leagues ADD INDEX(leagueId);') db.close() logger.log('Populating leagues table completed\n\n') driver_logger.log('\t\tPopulating leagues table completed')
def write_to_file(data): db = DatabaseConnection(sandbox_mode) db.write( 'insert into managers (managerId, lastName, firstName, wins, loses) values (' + data + ');') db.close()
def get_year_data(year): driver_logger.log('\tGathering year data') print("Gathering year data") start_time = time.time() global pages global strings pages = {} strings = {} logger.log('Beginning year_data download for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')) batting_list = { 'PA': 'pa', 'AB': 'ab', 'R': 'r', 'H': 'h', '2B': '2b', '3B': '3b', 'HR': 'hr', 'RBI': 'rbi', 'SB': 'sb', 'BB': 'bb', 'SO': 'so', 'batting_avg': 'ba', 'onbase_perc': 'obp', 'slugging_perc': 'slg', 'onbase_plus_slugging': 'ops' } pitching_list = { 'earned_run_avg': 'era', 'SV': 'sv', 'IP': 'ip', 'ER': 'er', 'whip': 'whip', 'strikeouts_per_nine': 'k_9', 'strikeouts_per_base_on_balls': 'k_bb' } fielding_list = {'E_def': 'e', 'fielding_perc': 'f_percent'} stat_list = { "batting": batting_list, "pitching": pitching_list, "fielding": fielding_list } db = DatabaseConnection(sandbox_mode) if len(db.read('select * from years where year = ' + str(year) + ';')) == 0: db.write('alter table years drop index year;') db.write('insert into years (year) values (' + str(year) + ');') db.write('alter table years add index(year);') db.close() write_opening_day(year) download_start = time.time() logger.log("making HTTP requests for year data") with ThreadPoolExecutor(3) as executor1: for key, value in stat_list.items(): executor1.submit(load_url, year, key) logger.log("\tdone making HTTP requests: time = " + time_converter(time.time() - download_start)) for key, dictionary in stat_list.items(): assemble_stats(key, dictionary, pages[key]) write_start = time.time() logger.log("writing to database") with ThreadPoolExecutor(3) as executor2: for key, value in stat_list.items(): executor2.submit(write_to_db, year, strings[key], key) logger.log("\tdone writing to database: time = " + time_converter(time.time() - write_start)) total_time = time_converter(time.time() - start_time) logger.log('year_data download completed: time = ' + total_time + '\n\n') driver_logger.log('\t\tTime = ' + total_time)
def fill_fields(year): db = DatabaseConnection(sandbox_mode) db.write('update player_pitching set certainty = 0.0 where year = ' + str(year) + ';') db.close()
def fill_pitchers_with_0_pa(year): db = DatabaseConnection(sandbox_mode) db.write( "update player_pitching set certainty = 0.0 where pa = 0 and year = " + str(year) + ";") db.close()