示例#1
0
def update_best(task, score, ans):
    files = glob(f"best/{task}@*@.txt")
    logger.debug(f"Have files for {files}")
    if len(files) == 0 or int(files[0].split('@')[1]) < score:
        if len(files) > 0:
            os.remove(files[0])
        write_answer(ans, f"best/{task}@{score}@.txt")
示例#2
0
def main():
    train_x, train_y = read_data(train_fname)
    ID, test_x = read_data(test_fname, train_mode=False)
    print(ID)
    print(train_x)
    print(np.sum(train_y))

    ind = random.shuffle(range(train_x[0]))
    # train_x, train_y, val_x, val_y = split_data(train_x, train_y, val_ratio=0.2)

    tr = xgb.DMatrix(train_x, label=train_y)
    # val = xgb.DMatrix(val_x, label=val_y)
    te = xgb.DMatrix(test_x)

    param = {'bst:max_depth':3, 'bst:eta':0.1, 'silent':0, 'objective':'binary:logistic' }
    param['nthread'] = 16
    param['eval_metric'] = 'auc'

    num_round = 100

    xgb.cv(param, tr, num_round, nfold=5, metrics={"error"})

    model = xgb.train(param, tr, num_round, early_stopping_rounds=10)

    ypred = model.predict(xgmat, ntree_limit=model.best_ntree_limit)

    clf = RandomForestClassifier(n_estimators=500,
                                 criterion='entropy',
                                 max_features='sqrt',
                                 max_depth=None,
                                 oob_score=False,
                                 n_jobs=-1,
                                 verbose=1)

    clf.fit(X=train_x, y=train_y)
    print(clf.score(train_x, train_y))
    pred = clf.predict(X=test_x)
    pred_prob = clf.predict_proba(X=test_x)[:,1]

    write_answer(output_fname, ID, pred, print_prob=False)
    write_answer("prob_"+output_fname, ID, pred_prob, print_prob=True)
示例#3
0
def main():
    train_x, train_y = read_data(train_fname)
    ID, test_x = read_data(test_fname, train_mode=False)
    print(ID)
    print(train_x)
    print(np.sum(train_y))

    clf = RandomForestClassifier(n_estimators=500,
                                 criterion='entropy',
                                 max_features='sqrt',
                                 max_depth=None,
                                 oob_score=False,
                                 n_jobs=-1,
                                 verbose=1)

    clf.fit(X=train_x, y=train_y)
    print(clf.score(train_x, train_y))
    pred = clf.predict(X=test_x)
    pred_prob = clf.predict_proba(X=test_x)[:,1]

    write_answer(output_fname, ID, pred, print_prob=False)
    write_answer("prob_"+output_fname, ID, pred_prob, print_prob=True)