Exemplo n.º 1
0
        )
        if j % 10 == 0:
            print "----BEST RESULT best_loss%.6f lr=%.3f,decay=%.3f,tol=%.4f ------ " % (
                best_loss,
                best_lr,
                best_decay,
                best_tol,
            )
    lr = best_lr
    decay = best_decay
    tol = best_tol
    print "----BEST RESULT best_loss%.6f lr=%.3f,decay=%f,tol=%f ------ " % (best_loss, best_lr, best_decay, best_tol)

clf = StackingLinearRegression.StackingLinearRegression(lr, decay, max_epoch, tol)

# train with searched or defined parameters
clf.fit(preds_train, labels)
df = pd.DataFrame(index=sample.id.values)
i = 0
preds_test = []
for loss, ntree, param in param_list:
    part_test = pd.read_csv(otto_utils.get_test_preds_from_param(param, train_name, ntree), index_col=0)
    preds_test.append(np.asarray(part_test))
    i += 1
# predict and output results
preds = clf.predict(preds_test)
preds = pd.DataFrame(preds, index=sample.id.values, columns=sample.columns[1:])
preds.to_csv("xgb_" + train_name + "_my_stacking_benchmark.csv", index_label="id")

cp.dump(clf, open("xgb_" + train_name + "_my_stacking.model", "wb"), protocol=1)
Exemplo n.º 2
0
    full_param = other.copy()
    full_param.update(param)
    plst = full_param.items()
    # train on the trainning set
    bst,loss,ntree = xgb.train(full_param, dtrain, num_round, watchlist)

    # dump bst model
    bst_dump_model = otto_utils.get_model_name_from_param(param, train_name, ntree)
    bst.save_model(bst_dump_model)

    # output train predictions
    preds = bst.predict(dtrain)
    preds = pd.DataFrame(preds, index=train_ids, columns=sample.columns[1:])
    preds.to_csv(otto_utils.get_train_preds_from_param(param, train_name, ntree), index_label='id')

    # output test predictions
    preds = bst.predict(dtest)
    preds = pd.DataFrame(preds, index=sample.id.values, columns=sample.columns[1:])
    preds.to_csv(otto_utils.get_test_preds_from_param(param, train_name, ntree), index_label='id')

    if avg_preds is None:
        avg_preds = preds
    else:
        avg_preds += preds
    
avg_preds /= len(param_list)

# create submission file
avg_preds = pd.DataFrame(avg_preds, index=sample.id.values, columns=sample.columns[1:])
avg_preds.to_csv('xgb_avg_benchmark_'+train_name+'.csv', index_label='id')