def drop_columns(pitch_data):
    #
    # Drop unwanted dataset columns
    #

    # ID columns to drop
    id_cols_to_drop = [
        'p1_pitch_id', 'p0_pitch_id', 'pitch_data_id', 'team_id', 'game_id',
        'inning_id', 'half_inning_id', 'at_bat_id', 'gid', 'b1_id',
        'b1_team_id', 'team_abbrev'
    ]
    pitch_data = utils.drop_columns_by_list(pitch_data, id_cols_to_drop)
    # Pitch data columns to drop
    pitch_cols_to_drop = [
        'p0_pitch_seqno', 'p1_pitch_seqno', 'p0_inning', 'result_type',
        'type_confidence', 'p0_at_bat_o', 'p0_pitch_des', 'nasty'
    ]
    pitch_data = utils.drop_columns_by_list(pitch_data, pitch_cols_to_drop)

    # Optional pitchf/x data columns to drop
    #pitchfx_cols_to_drop = ['pitch_count_atbat', 'pitch_count_team', 'start_speed', 'spin_dir',
    #                        'x', 'y', 'sz_top', 'sz_bot', 'pfx_x', 'pfx_z', 'px', 'pz',
    #                        'x0', 'y0', 'z0', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'break_y']
    #pitch_data = utils.drop_columns_by_list(pitch_data, pitchfx_cols_to_drop)

    print("dropped cols")
    return pitch_data
def add_run_diff(pitch_data):
    #
    # Create new column of run differential
    #
    pitch_data['run_diff'] = pitch_data['runs_pitcher_team'] - pitch_data[
        'runs_batter_team']
    cols_to_drop = ['runs_pitcher_team', 'runs_batter_team']
    pitch_data = utils.drop_columns_by_list(pitch_data, cols_to_drop)
    print("added run diff")
    return pitch_data
def add_crunch_time(pitch_data):
    #
    # Create new column for crunch time (after 7th inning)
    #
    pitch_data['inning'] = pitch_data['inning'].astype(dtype='int64')
    pitch_data['inning'] = pitch_data['inning'].fillna(
        0)  # '0' is for unknown inning (Other values are 1-9)
    pitch_data['crunch_time'] = np.where(pitch_data['inning'] > 7, 1, 0)
    cols_to_drop = ['inning']
    pitch_data = utils.drop_columns_by_list(pitch_data, cols_to_drop)
    print("added crunch time")
    return pitch_data
def drop_season_pitch_id_cols(pd_train, pd_test):
    cols_to_drop = ['season', 'pitcher_id']
    pd_test = utils.drop_columns_by_list(pd_test, cols_to_drop)
    pd_train = utils.drop_columns_by_list(pd_train, cols_to_drop)
    return pd_train, pd_test