Пример #1
0
    'L1_XGB_vH1t', 'L1_ET_vC1t', 'L1_NN_vG2t', 'L1_GLM_vA1t', 'L1_GLM_vA1'
] + [
    'L1_XGB_vH1t', 'L1_NN_vD2t', 'L1_XGB_vG1t', 'L1_XGB_vF1t', 'L1_NN_vE1t',
    'L1_RF_vD1t', 'L1_XGB_vE1t'
] + [
    'L1_RF_vA1t', 'L1_NN_vA1t', 'L1_NN_vC1t', 'L1_RF_vH1t', 'L1_XGB_vA1t',
    'L1_NN_vH2t'
] + ['L1_RF_vE1t', 'L1_XGB_vH1t', 'L1_ET_vH1t', 'L1_NN_vD2t', 'L1_RF_vH1t'] + [
    'L1_ET_vA2t', 'L1_XGB_vD1t', 'L1_RF_vH1t', 'L1_NN_vD2t', 'L1_RF_vE1t'
]
files_txt = np.unique(files_txt)

all_train_x = fd.load_L1_train(files_txt)
all_train_y = fd.load_train_y([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
all_train_y = all_train_y[np.isfinite(all_train_y.sum(1))]
all_test_x = fd.load_submissions(files_txt)
train_seq = fd.get_clean_sequences([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
rows, tmp = fd.load_test(['ds_pir_v0'])

dataset = {
    'train_x': all_train_x,
    'train_y': all_train_y,
    'train_seq': train_seq,
    'test_x': all_test_x
}

# Add past data
dataset = {
    'train_x':
    np.c_[all_train_x,
          fd.get_past_data(all_train_x, train_seq, 1, -9999, 200),
weights = np.ones(len(files_txt))

if validate:
    validated = np.zeros((16124, 20))
    for i, file_txt in enumerate(files_txt):
        validated = validated + fd.load_L1_train([file_txt]) * weights[i]
    validated = validated / np.sum(weights)

    all_train_y = fd.load_train_y([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    all_train_y = all_train_y[np.isfinite(all_train_y.sum(1))]
    print("Final Submission local-CV score: {}".format(
        fp.brier_score(all_train_y, validated, class_weights)))

# Predict L3 test data
predicted = np.zeros((16600, 20))
for i, file_txt in enumerate(files_txt):
    predicted = predicted + fd.load_submissions([file_txt]) * weights[i]
predicted = predicted / np.sum(weights)

name_to_save = 'L3_WA_vD2'

# Save files
directory = '../final_submission/'
if not os.path.exists(directory):
    os.makedirs(directory)
# Submission
submission = pd.concat((pd.DataFrame(rows), pd.DataFrame(predicted)), axis=1)
submission.columns = ['record_id'] + ['start', 'end'] + activity_names
submission.to_csv('{}{}_submission.csv'.format(directory, name_to_save),
                  index=False)