'L1_XGB_vH1t', 'L1_ET_vC1t', 'L1_NN_vG2t', 'L1_GLM_vA1t', 'L1_GLM_vA1' ] + [ 'L1_XGB_vH1t', 'L1_NN_vD2t', 'L1_XGB_vG1t', 'L1_XGB_vF1t', 'L1_NN_vE1t', 'L1_RF_vD1t', 'L1_XGB_vE1t' ] + [ 'L1_RF_vA1t', 'L1_NN_vA1t', 'L1_NN_vC1t', 'L1_RF_vH1t', 'L1_XGB_vA1t', 'L1_NN_vH2t' ] + ['L1_RF_vE1t', 'L1_XGB_vH1t', 'L1_ET_vH1t', 'L1_NN_vD2t', 'L1_RF_vH1t'] + [ 'L1_ET_vA2t', 'L1_XGB_vD1t', 'L1_RF_vH1t', 'L1_NN_vD2t', 'L1_RF_vE1t' ] files_txt = np.unique(files_txt) all_train_x = fd.load_L1_train(files_txt) all_train_y = fd.load_train_y([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) all_train_y = all_train_y[np.isfinite(all_train_y.sum(1))] all_test_x = fd.load_submissions(files_txt) train_seq = fd.get_clean_sequences([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) rows, tmp = fd.load_test(['ds_pir_v0']) dataset = { 'train_x': all_train_x, 'train_y': all_train_y, 'train_seq': train_seq, 'test_x': all_test_x } # Add past data dataset = { 'train_x': np.c_[all_train_x, fd.get_past_data(all_train_x, train_seq, 1, -9999, 200),
weights = np.ones(len(files_txt)) if validate: validated = np.zeros((16124, 20)) for i, file_txt in enumerate(files_txt): validated = validated + fd.load_L1_train([file_txt]) * weights[i] validated = validated / np.sum(weights) all_train_y = fd.load_train_y([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) all_train_y = all_train_y[np.isfinite(all_train_y.sum(1))] print("Final Submission local-CV score: {}".format( fp.brier_score(all_train_y, validated, class_weights))) # Predict L3 test data predicted = np.zeros((16600, 20)) for i, file_txt in enumerate(files_txt): predicted = predicted + fd.load_submissions([file_txt]) * weights[i] predicted = predicted / np.sum(weights) name_to_save = 'L3_WA_vD2' # Save files directory = '../final_submission/' if not os.path.exists(directory): os.makedirs(directory) # Submission submission = pd.concat((pd.DataFrame(rows), pd.DataFrame(predicted)), axis=1) submission.columns = ['record_id'] + ['start', 'end'] + activity_names submission.to_csv('{}{}_submission.csv'.format(directory, name_to_save), index=False)