Пример #1
0
def getData():
    p = 0.1
    data = loadData(p=p)
    data = featureEngineer(data)
    # print(data.info())

    #训练数据预处理
    x_train, x_test, y_train, y_test = preprocessing(data)

    return x_train, y_train, x_test, y_test
Пример #2
0
def predict_clf(model):
    env = janestreet.make_env()
    iter_test = env.iter_test()
    for (test_df, sample_prediction_df) in iter_test:
        if test_df['weight'].item() > 0:
            test_df = featureEngineer(test_df)
            X_test = test_df.loc[:, test_df.columns.str.contains('feature')]
            X_test = X_test.fillna(0.0)
            y_preds = model.predict(X_test)[0]
        else:
            y_preds = 0
        # print(y_preds)
        sample_prediction_df.action = y_preds
        env.predict(sample_prediction_df)
Пример #3
0
        print("%f (%f) with: %r" % (mean, stdev, param))
        y.append(mean)
    plt.plot(y)
    plt.savefig("./output/" + param_name + ".png")


if __name__ == "__main__":
    newpath = "/home/code"
    os.chdir(newpath)

    # data_explore()

    # 真正开始干活
    p = 0.0001
    train = loadData(p=p)
    train = featureEngineer(train)
    # print(train.head())

    # 计算模型评分
    # score = Score(model, train)
    # print("模型评分:%.2f" % score)
    test = loadData(p=p)
    test = featureEngineer(test)

    #训练数据预处理
    X_train, y_train = preprocessing(train)

    # xgboost
    print("XGBoost")
    max_depth = [3, 4, 5]
    subsample = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
Пример #4
0
    iter_test = env.iter_test()
    for (test_df, sample_prediction_df) in iter_test:
        if test_df['weight'].item() > 0:
            X_test = test_df.loc[:, test_df.columns.str.contains('feature')]
            X_test = X_test.fillna(-999)
            y_preds = model.predict(X_test)[0]
        else:
            y_preds = 0
        # print(y_preds)
        sample_prediction_df.action = y_preds
        env.predict(sample_prediction_df)


if __name__ == "__main__":
    newpath = "/home/code"
    os.chdir(newpath)

    # data_explore()

    # 真正开始干活
    train = pd.read_csv("./train.csv", nrows=10000)
    feature = pd.read_csv("./features.csv")
    train = featureEngineer(train)
    model = modeling(train)
    # 计算模型评分
    # score = Score(model, train)
    # print("模型评分:%.2f" % score)

    # 进行预测
    predict_clf(model)