Exemplo n.º 1
0
def load_url(player_id):
    global temp_pages
    db = DatabaseConnection(sandbox_mode)
    if len(
            db.read('select * from players where playerid = "' + player_id +
                    '";')) == 0:
        try:
            temp_pages[player_id] = BeautifulSoup(
                urlopen("https://www.baseball-reference.com/players/" +
                        player_id[0] + "/" + player_id + ".shtml"),
                'html.parser')
        except:
            temp_pages[player_id] = None
    else:
        temp_pages[player_id] = None
    db.close()
Exemplo n.º 2
0
def write_to_file(stats, year, stat_type):
    logger.log("\tWriting " + stat_type + " data")
    start_time = time.time()
    db = DatabaseConnection(sandbox_mode)
    counter = 0
    for team in stats:
        team_id = translate_team_id(team[0], year)
        if db.read('select league from team_years where teamId = "' + team_id +
                   '" and year = ' + str(year) +
                   ';')[0][0].upper() in ['AL', 'NL']:
            counter += 1
            db.write('update team_years set ' + stat_type + '_year = ' +
                     str(counter) + ' where teamId = "' + team_id +
                     '" and year = ' + str(year) + ';')
        else:
            continue
    db.close()
    logger.log("\t\tTime = " + time_converter(time.time() - start_time))
Exemplo n.º 3
0
def consolidate_hitter_spots(ty_uid):
    db = DatabaseConnection(sandbox_mode=True)
    batting_spots = {}
    query_fields = ''
    for num in range(9):
        query_fields += num_to_word(num + 1) + ', '
    for match_up in ['vr', 'vl']:
        for data in db.read('select playerId, ' + query_fields[:-2] +
                            ' from hitter_spots where ty_uniqueidentifier = ' +
                            str(ty_uid) + ' and matchup = "' + match_up +
                            '";'):
            player_id = data[0]
            if player_id not in batting_spots:
                batting_spots[player_id] = {}
            batting_spots[player_id][match_up] = {}
            spot = 1
            for ent in data[1:]:
                if ent:
                    batting_spots[player_id][match_up][spot] = ent
                spot += 1
    db.close()
    return ensure_both_match_ups_are_present(batting_spots)
Exemplo n.º 4
0
def get_year_data(year):
    driver_logger.log('\tGathering year data')
    print("Gathering year data")
    start_time = time.time()
    global pages
    global strings
    pages = {}
    strings = {}
    logger.log('Beginning year_data download for ' + str(year) +
               ' || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    batting_list = {
        'PA': 'pa',
        'AB': 'ab',
        'R': 'r',
        'H': 'h',
        '2B': '2b',
        '3B': '3b',
        'HR': 'hr',
        'RBI': 'rbi',
        'SB': 'sb',
        'BB': 'bb',
        'SO': 'so',
        'batting_avg': 'ba',
        'onbase_perc': 'obp',
        'slugging_perc': 'slg',
        'onbase_plus_slugging': 'ops'
    }
    pitching_list = {
        'earned_run_avg': 'era',
        'SV': 'sv',
        'IP': 'ip',
        'ER': 'er',
        'whip': 'whip',
        'strikeouts_per_nine': 'k_9',
        'strikeouts_per_base_on_balls': 'k_bb'
    }
    fielding_list = {'E_def': 'e', 'fielding_perc': 'f_percent'}
    stat_list = {
        "batting": batting_list,
        "pitching": pitching_list,
        "fielding": fielding_list
    }
    db = DatabaseConnection(sandbox_mode)
    if len(db.read('select * from years where year = ' + str(year) +
                   ';')) == 0:
        db.write('alter table years drop index year;')
        db.write('insert into years (year) values (' + str(year) + ');')
        db.write('alter table years add index(year);')
    db.close()
    write_opening_day(year)
    download_start = time.time()
    logger.log("making HTTP requests for year data")
    with ThreadPoolExecutor(3) as executor1:
        for key, value in stat_list.items():
            executor1.submit(load_url, year, key)
    logger.log("\tdone making HTTP requests: time = " +
               time_converter(time.time() - download_start))
    for key, dictionary in stat_list.items():
        assemble_stats(key, dictionary, pages[key])
    write_start = time.time()
    logger.log("writing to database")
    with ThreadPoolExecutor(3) as executor2:
        for key, value in stat_list.items():
            executor2.submit(write_to_db, year, strings[key], key)
    logger.log("\tdone writing to database: time = " +
               time_converter(time.time() - write_start))
    total_time = time_converter(time.time() - start_time)
    logger.log('year_data download completed: time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
Exemplo n.º 5
0
def write_to_file(innings_url, player_type, player_id, team_id, year, month,
                  day, match_up, count, pitch_type, ball_strike, swing_take,
                  outcome, trajectory, field, direction, original_player_id,
                  x_coord, y_coord, velocity):
    if player_id is None:
        with open(
                os.path.join("..", "..", "baseball-sync", "src", "import_data",
                             "player_data", "pitch_fx",
                             "multiple_players.csv"), 'a') as file:
            file.write(
                str(original_player_id) + ': --> insert into ' + player_type +
                '_pitches (pitchid, playerid, '
                'year, matchup, count, pitch_type, swing_take, ball_strike, outcome, trajectory, field, '
                'direction, P' + player_type[0] +
                '_uniqueidentifier) values (default, ' + str(player_id) +
                ', ' + str(year) + ', "' + match_up + '", "' + str(count) +
                '", "' + pitch_type + '", "' + swing_take + '", "' +
                ball_strike + '", "' + outcome + '", "' + trajectory + '", "' +
                field + '", "' + direction + '", (select P' + player_type[0] +
                '_uniqueidentifier from player_' + player_type[:-2] +
                'ing where year = ' + str(year) +
                ' and pt_uniqueidentifier = (select '
                'pt_uniqueidentifier from player_teams where playerid = "' +
                str(player_id) + '" and teamid = "' + team_id + '"))); -- ' +
                innings_url + '\n')
    else:
        db_to_read = DatabaseConnection(sandbox_mode=True)
        # if len(db_to_read.read('select p' + player_type[0] + '_uniqueidentifier from player_' + player_type + ' where '
        #                        'playerdId = ' + player_id + ' and year = ' + str(year) + ';')[0]) > 1:
        #     team_id = 'TOT'  <---this code would've made sure that all players that played for more than one team
        p_uid = str(
            db_to_read.read(
                'select P' + player_type[0] +
                '_uniqueidentifier from player_' + player_type[:-2] +
                'ing where year = ' + str(year) +
                ' and pt_uniqueidentifier = (select '
                'pt_uniqueidentifier from player_teams where playerid = "' +
                player_id + '" and '
                'teamid = "' + team_id + '");')[0][0])
        db_to_read.close()
        db = PitchFXDatabaseConnection(sandbox_mode=True)
        if x_coord == 'None' or y_coord == 'None':
            if velocity == 'None':
                db.write(
                    'insert into ' + player_type +
                    '_pitches (pitchid, playerid, year, month, day, matchup, count,'
                    ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, P'
                    + player_type[0] +
                    '_uniqueidentifier) values (default, "' + player_id +
                    '", ' + str(year) + ', ' + month + ', ' + day + ', "' +
                    match_up + '", "' + str(count) + '", "' + pitch_type +
                    '", "' + swing_take + '", "' + ball_strike + '", "' +
                    outcome + '", "' + trajectory + '", "' + field + '", "' +
                    direction + '", ' + p_uid + ');')
            else:
                db.write(
                    'insert into ' + player_type +
                    '_pitches (pitchid, playerid, year, month, day, matchup, count,'
                    ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, velocity, P'
                    + player_type[0] +
                    '_uniqueidentifier) values (default, "' + player_id +
                    '", ' + str(year) + ', ' + month + ', ' + day + ', "' +
                    match_up + '", "' + str(count) + '", "' + pitch_type +
                    '", "' + swing_take + '", "' + ball_strike + '", "' +
                    outcome + '", "' + trajectory + '", "' + field + '", "' +
                    direction + '", ' + velocity + ', ' + p_uid + ');')
        else:
            if velocity == 'None':
                db.write(
                    'insert into ' + player_type +
                    '_pitches (pitchid, playerid, year, month, day, matchup, count,'
                    ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, x, y, P'
                    + player_type[0] +
                    '_uniqueidentifier) values (default, "' + player_id +
                    '", ' + str(year) + ', ' + month + ', ' + day + ', "' +
                    match_up + '", "' + str(count) + '", "' + pitch_type +
                    '", "' + swing_take + '", "' + ball_strike + '", "' +
                    outcome + '", "' + trajectory + '", "' + field + '", "' +
                    direction + '", ' + x_coord + ', ' + y_coord + ', ' +
                    p_uid + ');')
            else:
                db.write(
                    'insert into ' + player_type +
                    '_pitches (pitchid, playerid, year, month, day, matchup, count,'
                    ' pitch_type, swing_take, ball_strike, outcome, trajectory, field, direction, x, y, velocity,'
                    ' P' + player_type[0] +
                    '_uniqueidentifier) values (default, "' + player_id +
                    '", ' + str(year) + ', ' + month + ', ' + day + ', "' +
                    match_up + '", "' + str(count) + '", "' + pitch_type +
                    '", "' + swing_take + '", "' + ball_strike + '", "' +
                    outcome + '", "' + trajectory + '", "' + field + '", "' +
                    direction + '", ' + x_coord + ', ' + y_coord + ', ' +
                    velocity + ', ' + p_uid + ');')
        db.close()
Exemplo n.º 6
0
def get_teams(year):
    db = DatabaseConnection(sandbox_mode)
    teams = db.read('select teamid from team_years where year = ' + str(year) +
                    ';')
    db.close()
    return teams
def gather_team_home_numbers(team_id, team_key, year, team_count):
    logger.log("\tCalculating home numbers and downloading manager data: " + team_id)
    db = DatabaseConnection(sandbox_mode)
    if len(db.read('select BY_uniqueidentifier from ballpark_years where teamId = "' + team_id + '" and year = '
                   + str(year) + ';')) == 0:
        global pages
        table = str(pages[team_key])
        manager_ids = {}
        try:
            manager_data = table.split('<strong>Manager')[1].split('<p>')[0].split('<a href="/managers/')[1:]
            home_row = table.split('<h2>Home or Away</h2>')[1].split('<tbody>')[1].split('</tbody>')[0] \
                .split('<tr >')[1]
            ballpark_platoon_splits = table.split('<h2>Hit Location</h2>')[1].split('<tbody>')[1].split('</tbody>')[0]\
                .split('<tr >')
            hit_trajectory_table = table.split('<h2>Hit Trajectory</h2>')[1].split('<tbody>')[1].split('</tbody>')[0]\
                .split('<tr >')
            stats = ['AB', 'H', '2B', '3B', 'HR']
            overall_stats = {}
            home_percent = {}
            season_total_row = table.split('<h2>Season Totals</h2>')[1].split('<tbody>')[1].split('</tbody>')[0].\
                split('<tr>')[1]
            for stat in stats:
                league_total = int(db.read("select " + stat + " from years where year = " + str(year) + ";")[0][0])
                team_total = int(season_total_row.split('data-stat="' + stat + '">')[1].split('<')[0])
                overall_stats[stat] = team_total / ((league_total - team_total) / (team_count - 2))
                home = int(home_row.split('data-stat="' + stat + '" >')[1].split('<')[0])
                home_percent[stat] = home / team_total
            db.close()
            stat_translate = {'AB': 'AB', 'H': 'H', '2B': 'double', '3B': 'triple', 'HR': 'homerun'}
            r_location = {'Up Mdle': 'centerfield', 'Opp Fld': 'rightfield', 'Pulled': 'leftfield'}
            l_location = {'Up Mdle': 'centerfield', 'Opp Fld': 'leftfield', 'Pulled': 'rightfield'}
            location = {'AB_centerfield': 0, 'AB_leftfield': 0, 'AB_rightfield': 0,
                        'H_centerfield': 0, 'H_leftfield': 0, 'H_rightfield': 0,
                        'double_centerfield': 0, 'double_leftfield': 0, 'double_rightfield': 0,
                        'triple_centerfield': 0, 'triple_leftfield': 0, 'triple_rightfield': 0,
                        'homerun_centerfield': 0, 'homerun_leftfield': 0, 'homerun_rightfield': 0}
            trajectory = {"Ground_Balls_PA": 0, "Ground_Balls_AB": 0, "Ground_Balls_H": 0, "Ground_Balls_2B": 0,
                          "Ground_Balls_3B": 0, "Ground_Balls_HR": 0, "Fly_Balls_PA": 0, "Fly_Balls_AB": 0,
                          "Fly_Balls_H": 0, "Fly_Balls_2B": 0, "Fly_Balls_3B": 0, "Fly_Balls_HR": 0,
                          "Line_Drives_PA": 0, "Line_Drives_AB": 0, "Line_Drives_H": 0, "Line_Drives_2B": 0,
                          "Line_Drives_3B": 0, "Line_Drives_HR": 0}
            for row in ballpark_platoon_splits:
                if '-RHB' in row:
                    handedness = "R"
                elif '-LHB' in row:
                    handedness = "L"
                else:
                    continue
                for stat in stats:
                    if handedness == "R":
                        location[stat_translate[stat] + '_' + r_location[row.split('data-stat="split_name" >')[1]
                            .split('-')[0]]] += (int(row.split('data-stat="' + stat + '" >')[1].split('<')[0])
                                                 * home_percent[stat]) / overall_stats[stat]
                    else:
                        location[stat_translate[stat] + '_' + l_location[row.split('data-stat="split_name" >')[1]
                            .split('-')[0]]] += (int(row.split('data-stat="' + stat + '" >')[1].split('<')[0])
                                                 * home_percent[stat]) / overall_stats[stat]
            for row in hit_trajectory_table:
                for key, value in trajectory.items():
                    if key.split('_')[0] + ' ' + key.split('_')[1] in row:
                        stat = key.split('_')[-1]
                        if stat != 'PA':
                            trajectory[key] = int(row.split('data-stat="' + key.split('_')[2] + '" >')[1].
                                                  split('<')[0]) * home_percent[stat]
        except (IndexError, ZeroDivisionError):
            table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/teams/' + team_key + '/' + str(year)
                                              + '.shtml'), 'html.parser'))
            manager_data = table.split('<strong>Manager')[1].split('<p>')[0].split('<a href="/managers/')[1:]
            location = {}
            trajectory = {}
        manager_page = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/managers/' + manager_data[0]
                                                 .split('.shtml')[0] + '.shtml'), 'html.parser'))
        try:
            manager_pic_url = manager_page.split('<img class="" src="')[1].split('"')[0]
            urlretrieve(manager_pic_url, os.path.join("interface", "static", "images", "model", "managers",
                                                      manager_data[0].split('.shtml')[0] + ".jpg"))
        except Exception as e:
            logger.log('\t\t' + str(e))
        try:
            team_pic_url = table.split('<div class="media-item logo loader">')[1].split('<')[1].split('src="')[1].\
                split('"')[0]
        except IndexError:
            table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/teams/' + team_key + '/' + str(year)
                                              + '.shtml'), 'html.parser'))
            team_pic_url = table.split('<div class="media-item logo loader">')[1].split('<')[1].split('src="')[1].\
                split('"')[0]
        try:
            urlretrieve(team_pic_url, os.path.join("interface", "static", "images", "model", "teams", team_id
                                                   + str(year) + ".jpg"))
        except Exception as e:
            logger.log('\t\t' + str(e))
        for i in manager_data:
            manager_ids[i.split('.shtml')[0]] = i.split('(')[1].split(')')[0]
        try:
            park_name = table.split('<strong>Ballpark')[1].split('</strong> ')[1].split('</p>')[0][:-1]
        except IndexError as e:
            logger.log('\t\t' + str(e))
            park_name = "No Home Field"
        write_to_db(team_id, location, trajectory, manager_ids, year, park_name)
Exemplo n.º 8
0
def consolidate_player_stats(ty_uid, player_type, year):
    player_stats = {}
    db = DatabaseConnection(sandbox_mode=True)
    if player_type != 'fielding':
        for p_uid in db.read('select p_uid from direction_' + player_type +
                             ' group by p_uid;'):
            p_uids = [p_uid[0]]
            if player_is_on_this_team(ty_uid, p_uid, player_type, year):
                if player_was_on_more_than_one_team(p_uid, player_type, year):
                    p_uids.append(
                        get_uid_of_player_for_this_team(
                            ty_uid, p_uid, player_type, year))
                player_id = get_player_id(p_uid, player_type)
                player_stats[player_id] = {}
                player_stats[player_id]['standard_' + player_type +
                                        '_stats'] = {}
                for uid in p_uids:
                    player_stats[player_id]['standard_' + player_type + '_stats'][get_team_id(uid, player_type)] = \
                        consolidate_traditional_player_stats(
                            db.read('select * from player_' + player_type + ' where p' + player_type[0]
                                    + '_uniqueidentifier = ' + str(uid) + ';')[0], get_db_field_names(
                                db.read('describe player_' + player_type + ';')))
                try:
                    with open(
                            os.path.join(
                                "..", "background", player_type +
                                "_pitch_fx_tables.csv")) as tables_file:
                        tables = tables_file.readlines()
                except FileNotFoundError:
                    with open(os.path.join("..", "..", "..", "background", player_type + "_pitch_fx_tables.csv")) as \
                            tables_file:
                        tables = tables_file.readlines()
                player_stats[player_id]['advanced_' + player_type +
                                        '_stats'] = {}
                for table in tables:
                    player_stats[player_id]['advanced_' + player_type + '_stats'][table[:-1]] = \
                        consolidate_pitch_fx(db.read('select * from ' + table[:-1] + ' where p_uid = ' + str(p_uid[0])
                                                     + ';'), table[:-1],
                                             get_db_field_names(db.read('describe ' + table[:-1] + ';')))
    else:
        for player_role in ['batting', 'pitching']:
            for p_uid in db.read('select p_uid from direction_' + player_role +
                                 ' group by p_uid;'):
                p_uids = [p_uid[0]]
                try:
                    if player_is_on_this_team(
                            ty_uid,
                            db.read(
                                player_fielding_uid_query(
                                    p_uid[0],
                                    player_role,
                                    year,
                                    selector='pf_uniqueidentifier'))[0],
                            player_type, year):
                        if player_was_on_more_than_one_team(
                                p_uid, player_type, year):
                            p_uids.append(
                                get_uid_of_player_for_this_team(
                                    ty_uid, p_uid, player_type, year))
                        player_id = get_player_id(p_uid, player_role)
                        for uid in p_uids:
                            player_stats[
                                player_id] = consolidate_traditional_player_stats(
                                    db.read(
                                        player_fielding_uid_query(
                                            uid, player_role, year))[0],
                                    get_db_field_names(
                                        db.read('describe player_fielding;')))
                except IndexError:
                    continue
    db.close()
    return player_stats
Exemplo n.º 9
0
            logger.log('\t' + winner.get_team_id() + ' wins the ' +
                       str(games) + ' game series - ' + str(away_wins) +
                       ' to ' + str(home_wins))
        else:
            winner = home_team
            logger.log('\t' + winner.get_team_id() + ' wins the ' +
                       str(games) + ' game series - ' + str(home_wins) +
                       ' to ' + str(away_wins))
        return winner.get_team_id() + ' wins the ' + str(games) + ' game series: ' + away_team.get_team_id()\
               + ' ' + str(away_wins) + ' | ' + home_team.get_team_id() + ' ' + str(home_wins)
    else:
        return 'The ' + str(away_wins +
                            home_wins) + ' game series has ended in a tie'


from utilities.database.wrappers.baseball_data_connection import DatabaseConnection
from ast import literal_eval

away_year = 2017
home_year = 2017
db = DatabaseConnection(sandbox_mode=True)
away_year_info = literal_eval(
    db.read('select year_info from years where year = ' + str(away_year) +
            ';')[0][0])
home_year_info = literal_eval(
    db.read('select year_info from years where year = ' + str(home_year) +
            ';')[0][0])
db.close()
simulation('MIN', away_year, away_year_info, 'LAD', home_year, home_year_info,
           1)
Exemplo n.º 10
0
from utilities.database.wrappers.baseball_data_connection import DatabaseConnection
from ast import literal_eval

db = DatabaseConnection(sandbox_mode=True)
print(
    literal_eval(
        db.read(
            'select team_info from team_years where teamid = "min" and year = 2017;'
        )[0][0])['batter_stats']['doziebr01']['advanced_batting_stats']
    ['trajectory_batting']['vl']['0-0'])  #.keys())
db.close()
Exemplo n.º 11
0
 def retrieve_league(self):
     db = DatabaseConnection(sandbox_mode)
     league = db.read('select league from team_years where teamid = "' + self.home_team + '" and year = '
                      + str(self.year) + ';')[0][0].upper()
     db.close()
     return league
Exemplo n.º 12
0
def get_ws_winner(year):
    db = DatabaseConnection(sandbox_mode)
    champ = db.read('select ws_champ from years where year = ' + str(year) +
                    ';')[0][0]
    db.close()
    return champ
Exemplo n.º 13
0
 def set_batting_handedness(self):
     db = DatabaseConnection(sandbox_mode=True)
     self.bats_with = db.read(
         'select batsWith from players where playerId = "' +
         self.player_id + '";')[0][0]
     db.close()
Exemplo n.º 14
0
 def retrieve_throwing_handedness(self):
     db = DatabaseConnection(sandbox_mode)
     self.throws_with = db.read(
         'select throwsWith from players where playerId = "' +
         self.player_id + '";')[0][0]
     db.close()
def reduce_functionality(year, ent):
    stats1 = {
        "To Infield_PA": ["pa_infield", ""],
        "To Outfield_PA": ["pa_outfield", ""],
        "To Infield_H": ["infield_hits", ""],
        "Ball In Play_PA": ["pa_balls_in_play", ""],
        "Pulled_PA_R": ["r_pa_pulled", ""],
        "Up Mdle_PA_R": ["r_pa_middle", ""],
        "Opp Fld_PA_R": ["r_pa_oppo", ""],
        "To Infield_AB": ["ab_infield", ""],
        "To Outfield_AB": ["ab_outfield", ""],
        "Ball In Play_AB": ["ab_balls_in_play", ""],
        "Pulled_AB_R": ["r_ab_pulled", ""],
        "Up Mdle_AB_R": ["r_ab_middle", ""],
        "Opp Fld_AB_R": ["r_ab_oppo", ""],
        "Pulled_H_R": ["r_h_pull", ""],
        "Up Mdle_H_R": ["r_h_middle", ""],
        "Opp Fld_H_R": ["r_h_oppo", ""],
        "Pulled_2B_R": ["r_double_pull", ""],
        "Up Mdle_2B_R": ["r_double_middle", ""],
        "Opp Fld_2B_R": ["r_double_oppo", ""],
        "Pulled_3B_R": ["r_triple_pull", ""],
        "Up Mdle_3B_R": ["r_triple_middle", ""],
        "Opp Fld_3B_R": ["r_triple_oppo", ""],
        "Pulled_HR_R": ["r_hr_pull", ""],
        "Up Mdle_HR_R": ["r_hr_middle", ""],
        "Opp Fld_HR_R": ["r_hr_oppo", ""],
        "Pulled_PA_L": ["l_pa_pulled", ""],
        "Up Mdle_PA_L": ["l_pa_middle", ""],
        "Opp Fld_PA_L": ["l_pa_oppo", ""],
        "Pulled_AB_L": ["l_ab_pulled", ""],
        "Up Mdle_AB_L": ["l_ab_middle", ""],
        "Opp Fld_AB_L": ["l_ab_oppo", ""],
        "Pulled_H_L": ["l_h_pull", ""],
        "Up Mdle_H_L": ["l_h_middle", ""],
        "Opp Fld_H_L": ["l_h_oppo", ""],
        "Pulled_2B_L": ["l_double_pull", ""],
        "Up Mdle_2B_L": ["l_double_middle", ""],
        "Opp Fld_2B_L": ["l_double_oppo", ""],
        "Pulled_3B_L": ["l_triple_pull", ""],
        "Up Mdle_3B_L": ["l_triple_middle", ""],
        "Opp Fld_3B_L": ["l_triple_oppo", ""],
        "Pulled_HR_L": ["l_hr_pull", ""],
        "Up Mdle_HR_L": ["l_hr_middle", ""],
        "Opp Fld_HR_L": ["l_hr_oppo", ""]
    }
    stats2 = {
        "Ground_Balls_PA": "",
        "Ground_Balls_AB": "",
        "Ground_Balls_H": "",
        "Ground_Balls_2B": "",
        "Ground_Balls_3B": "",
        "Ground_Balls_HR": "",
        "Fly_Balls_PA": "",
        "Fly_Balls_AB": "",
        "Fly_Balls_H": "",
        "Fly_Balls_2B": "",
        "Fly_Balls_3B": "",
        "Fly_Balls_HR": "",
        "Line_Drives_PA": "",
        "Line_Drives_AB": "",
        "Line_Drives_H": "",
        "Line_Drives_2B": "",
        "Line_Drives_3B": "",
        "Line_Drives_HR": ""
    }
    db = DatabaseConnection(sandbox_mode)
    player_id = db.read(
        "select playerId from player_teams where PT_uniqueidentifier = " +
        str(ent[0]) + ";")[0][0]
    if db.read(
            'select pa_infield from player_pitching where PT_uniqueidentifier = '
            + str(ent[0]) + ' and year = ' + str(year) + ';')[0][0] is None:
        try:
            page = str(
                BeautifulSoup(
                    urlopen(
                        'https://www.baseball-reference.com/players/split.fcgi?id='
                        + player_id + '&year=' + str(year) + '&t=p'),
                    'html.parser'))
        except Exception:
            global bad_gateway_data
            bad_gateway_data.append([ent, stats1, stats2])
            return
        try:
            table_rows1 = page.split('<h2>Hit Location</h2>')[1].split(
                '<tbody>')[1].split('</tbody>')[0].split('<tr')
            for row in table_rows1:
                for key, value in stats1.items():
                    if key.split('_')[0] in row:
                        if key not in [
                                "To Infield_PA", "To Outfield_PA",
                                "To Infield_H", "Ball In Play_PA"
                        ]:
                            if key[-1] != row.split('HB</th><td')[0][-1]:
                                continue
                        try:
                            stats1[key][1] = row.split('data-stat="' +
                                                       key.split('_')[1] +
                                                       '" >')[1].split('<')[0]
                        except IndexError:
                            continue
            table_rows2 = page.split('<h2>Hit Trajectory</h2>')[1].split(
                '<tbody>')[1].split('</tbody>')[0].split('<tr')
            for row in table_rows2:
                for key, value in stats2.items():
                    this_key = key.split('_')
                    if this_key[0] + ' ' + this_key[1] in row:
                        try:
                            stats2[key] = row.split('data-stat="' +
                                                    key.split('_')[2] +
                                                    '" >')[1].split('<')[0]
                        except IndexError:
                            continue
        except IndexError:
            for key, value in stats1.items():
                stats1[key][1] = '0'
            for key, value in stats2.items():
                stats2[key] = '0'
        finally:
            write_to_file(year, ent[0], stats1, stats2)
Exemplo n.º 16
0
def get_year_list():
    db = DatabaseConnection(sandbox_mode=False)
    years = db.read('select year from years;')
    db.close()
    return reversed([year[0] for year in years])
Exemplo n.º 17
0
def get_oldest_year():
    db = DatabaseConnection(sandbox_mode)
    oldest_year = int(
        db.read("select year from years order by year limit 1;")[0][0])
    db.close()
    return oldest_year