optim = optimise_weights(all_train_preds, train_targets, optimised_weights, minimise=True) optimised_weights = optim.x print "-", optim.fun train_preds = ktools.ensemble_preds(all_train_preds, init_weights) score = tools.get_mae_loss(train_targets, train_preds) print 'score ens manual:', score, init_weights train_preds = ktools.ensemble_preds(all_train_preds, optimised_weights) score = tools.get_mae_loss(train_targets, train_preds) print 'score ens with optim:', score, optimised_weights all_test_preds = convert_preds_to_list(new_test) optim_preds = ktools.ensemble_preds(all_test_preds, optimised_weights) print 'generating predictions for the test set. weiths:{}'.format(weights) final_preds = tools.ensemble_preds( [preds_test_xgb, optim_preds, preds_test_nn], weights) final_preds = final_preds * 1.1 sub_file_name = 'stk_3lvl2_models_x{}_f{}'.format(n_models, n_folds) data.generate_simple_kaggle_file(final_preds, sub_file_name) final_preds_train = tools.ensemble_preds( [preds_train_xgb, train_preds, preds_train_nn], weights) score = tools.get_mae_loss(train_targets, final_preds_train) print('train score:{}'.format(score)) # msg = 'score ens:{}, w:{}.weights, file:{}'.format(score, optimised_weights, sub_file_name)
######## keras batch_size, epochs = 64, 30 model = model_params.get_keras(x_train.shape[1]) history = model.fit(df_train.values, targets, nb_epoch=epochs, batch_size=batch_size) model.history = history preds_keras = model.predict(df_test.values).squeeze() print "keras done!" ##### COMBINE XGBOOST RESULTS ENS_WEIGHT = 0.80 # (try this weight) weights = [0.53, 0.32, 0.14] xgb_pred = tools.ensemble_preds([xgb_pred1, preds_cat, preds_keras], weights) print("\nCombined XGBoost predictions:") print(pd.DataFrame(xgb_pred).head()) del train_df del x_train del x_test del properties del dtest del dtrain del xgb_pred1 gc.collect() ################ ################
dtrain = xgb.DMatrix(df_train.values, targets) params = model_params.get_xtune11k() sub_preds = np.repeat(0, len(df_test)) num_boost_rounds = 110 for i in range(n_bags): model = xgb.train( params, dtrain, num_boost_round=num_boost_rounds, ) sub_preds = model.predict(dtest) + sub_preds #prepare for the next iteration df_bag, bag_targets = delete_some_outliers(df_train, targets) dtrain = xgb.DMatrix(df_bag.values, bag_targets) print(i, df_bag.shape) num_boost_rounds = 155 #params['seed'] = i sub_preds = sub_preds / n_bags print sub_preds[0:10] weights = (1 - BASELINE_WEIGHT, BASELINE_WEIGHT) print weights final_preds = tools.ensemble_preds( [sub_preds, np.repeat(BASELINE_PRED, len(sub_preds))], weights) print final_preds[0:10] data.generate_simple_kaggle_file(final_preds, 'bagged_{}'.format(n_bags))
df_train, targets, df_test = data.split_data(df, logerror) dtrain = xgb.DMatrix(df_train.values, targets) dtest = xgb.DMatrix(df_test.values) sub_model = xgb.train( model_params.get_xtune11k(), dtrain, num_boost_round=105, ) xgb_preds = sub_model.predict(dtest) print( "\n XGB predictions:" ) print( pd.DataFrame(xgb_preds).head() ) ################ ## LightGBM ## ################ sub_model = LGBMRegressor(**model_params.get_ltune7k()) sub_model.fit(df_train, targets) lgb_preds = sub_model.predict(df_test) print( "\n LGB predictions:" ) print( pd.DataFrame(lgb_preds).head() ) weights = (xgb_weight, 1-xgb_weight) final_preds = tools.ensemble_preds([xgb_preds, lgb_preds], weights) data.generate_simple_kaggle_file(final_preds, 'ensemble') print( "\n 'Ensemble predictions:" ) print( pd.DataFrame(final_preds).head() )