def drop_columns(pitch_data): # # Drop unwanted dataset columns # # ID columns to drop id_cols_to_drop = [ 'p1_pitch_id', 'p0_pitch_id', 'pitch_data_id', 'team_id', 'game_id', 'inning_id', 'half_inning_id', 'at_bat_id', 'gid', 'b1_id', 'b1_team_id', 'team_abbrev' ] pitch_data = utils.drop_columns_by_list(pitch_data, id_cols_to_drop) # Pitch data columns to drop pitch_cols_to_drop = [ 'p0_pitch_seqno', 'p1_pitch_seqno', 'p0_inning', 'result_type', 'type_confidence', 'p0_at_bat_o', 'p0_pitch_des', 'nasty' ] pitch_data = utils.drop_columns_by_list(pitch_data, pitch_cols_to_drop) # Optional pitchf/x data columns to drop #pitchfx_cols_to_drop = ['pitch_count_atbat', 'pitch_count_team', 'start_speed', 'spin_dir', # 'x', 'y', 'sz_top', 'sz_bot', 'pfx_x', 'pfx_z', 'px', 'pz', # 'x0', 'y0', 'z0', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'break_y'] #pitch_data = utils.drop_columns_by_list(pitch_data, pitchfx_cols_to_drop) print("dropped cols") return pitch_data
def add_run_diff(pitch_data): # # Create new column of run differential # pitch_data['run_diff'] = pitch_data['runs_pitcher_team'] - pitch_data[ 'runs_batter_team'] cols_to_drop = ['runs_pitcher_team', 'runs_batter_team'] pitch_data = utils.drop_columns_by_list(pitch_data, cols_to_drop) print("added run diff") return pitch_data
def add_crunch_time(pitch_data): # # Create new column for crunch time (after 7th inning) # pitch_data['inning'] = pitch_data['inning'].astype(dtype='int64') pitch_data['inning'] = pitch_data['inning'].fillna( 0) # '0' is for unknown inning (Other values are 1-9) pitch_data['crunch_time'] = np.where(pitch_data['inning'] > 7, 1, 0) cols_to_drop = ['inning'] pitch_data = utils.drop_columns_by_list(pitch_data, cols_to_drop) print("added crunch time") return pitch_data
def drop_season_pitch_id_cols(pd_train, pd_test): cols_to_drop = ['season', 'pitcher_id'] pd_test = utils.drop_columns_by_list(pd_test, cols_to_drop) pd_train = utils.drop_columns_by_list(pd_train, cols_to_drop) return pd_train, pd_test