Exemplo n.º 1
0
    'L1_XGB_vH1t', 'L1_NN_vD2t', 'L1_XGB_vG1t', 'L1_XGB_vF1t', 'L1_NN_vE1t',
    'L1_RF_vD1t', 'L1_XGB_vE1t'
] + [
    'L1_RF_vA1t', 'L1_NN_vA1t', 'L1_NN_vC1t', 'L1_RF_vH1t', 'L1_XGB_vA1t',
    'L1_NN_vH2t'
] + ['L1_RF_vE1t', 'L1_XGB_vH1t', 'L1_ET_vH1t', 'L1_NN_vD2t', 'L1_RF_vH1t'] + [
    'L1_ET_vA2t', 'L1_XGB_vD1t', 'L1_RF_vH1t', 'L1_NN_vD2t', 'L1_RF_vE1t'
]
files_txt = np.unique(files_txt)

all_train_x = fd.load_L1_train(files_txt)
all_train_y = fd.load_train_y([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
all_train_y = all_train_y[np.isfinite(all_train_y.sum(1))]
all_test_x = fd.load_submissions(files_txt)
train_seq = fd.get_clean_sequences([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
rows, tmp = fd.load_test(['ds_pir_v0'])

dataset = {
    'train_x': all_train_x,
    'train_y': all_train_y,
    'train_seq': train_seq,
    'test_x': all_test_x
}

# Add past data
dataset = {
    'train_x':
    np.c_[all_train_x,
          fd.get_past_data(all_train_x, train_seq, 1, -9999, 200),
          fd.get_past_data(all_train_x, train_seq, 2, -9999, 200),
          fd.get_future_data(all_train_x, train_seq, 1, -9999, 200)],
class_weights = np.asarray(
    json.load(open('../public_data/class_weights.json', 'r')))
sequence_train = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
data_source = [
    'ds_accel_M01_v1A_s2', 'ds_accel_M01_v1B_s2', 'ds_accel_M01_v2A',
    'ds_accel_M01_v2B', 'ds_accel_M01_v2C', 'ds_accel_M01_v2D', 'ds_rssi_v0',
    'ds_pir_M1s_v1', 'ds_video_fig_M00_v1A_s2', 'ds_video_fig_M00_v1B_s2',
    'ds_video_fig_M00_v2A', 'ds_video_fig_M00_v2B', 'ds_video_mov_M10_v1A_s2',
    'ds_video_mov_M10_v1B_s2', 'ds_video_mov_M10_v2A', 'ds_video_mov_M10_v2B',
    'ds_video_sta_M10_v1A_s2', 'ds_video_sta_M10_v1B_s2',
    'ds_video_sta_M10_v2A', 'ds_video_sta_M10_v2B'
]

all_train_x, all_train_y, train_seq = fd.load_sequences(
    sequence_train, data_source)
rows, all_test_x = fd.load_test(data_source)

# Preprocess the whole data
prepwd_params = {
    'remove_nan_targets': True,
    'imputer_strategy': 'most_frequent'
}
all_train_x, all_train_y, train_seq, rows, all_test_x = fd.whole_preprocess(
    all_train_x,
    all_train_y,
    train_seq,
    rows,
    all_test_x,
    params=prepwd_params)

# Add preprocessed data)