示例#1
0
def test_multiple_runs():
    "test running multiple models through multiple tournaments"

    d = testing.play_data()
    models = [nx.logistic(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'bernie')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 2)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'ken')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 10, 'wrong number of tournaments')

        p = nx.production(models, d, [1, 5])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.backtest(models, d, ['charles', 'bernie'])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['ken'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['ken'], 'wrong tournaments')
示例#2
0
def test_multiple_runs():
    """test running multiple models through multiple tournaments"""

    d = testing.play_data()
    models = [nx.linear(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 8)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d, [8])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.backtest(models, d, ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
示例#3
0
def test_backtest_production():
    "Make sure backtest and production run"
    d = testing.micro_data()
    model = fifty()
    with testing.HiddenPrints():
        for verbosity in (0, 1, 2, 3):
            nx.backtest(model, d, kfold=2, verbosity=verbosity)
            nx.production(model, d, verbosity=verbosity)
            if verbosity == 3:
                nx.production(model, d, name='test', verbosity=verbosity)
示例#4
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'model1', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 'model2',  verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 'model3',  verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 'model1',  verbosity=0)

    p = nx.Prediction()
    p['model1'] = p1
    p['model2'] = p2
    p['model3'] = p3
    p['model1'] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, 'model1', p1)
    assert_raises(ValueError, p.__setitem__, 'model1', p)
示例#5
0
def test_prediction_setitem():
    """compare prediction._setitem__ with merge"""

    data = nx.play_data()

    p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0)
    p2 = nx.production(nx.linear(), data, 8, verbosity=0)
    p3 = nx.production(nx.linear(), data, 8, verbosity=0)
    p4 = nx.backtest(nx.linear(), data, 8, verbosity=0)

    p = nx.Prediction()
    p[('linear', 1)] = p1
    p[('linear', 2)] = p2
    p[('linear', 3)] = p3
    p[('linear', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('linear', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
示例#6
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'bernie', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 2, verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 3, verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 4, verbosity=0)

    p = nx.Prediction()
    p[('logistic', 1)] = p1
    p[('logistic', 2)] = p2
    p[('logistic', 3)] = p3
    p[('logistic', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p)
示例#7
0
def test_backtest_production():
    "Make sure backtest and production run"
    d = testing.micro_data()
    model = nx.fifty()
    with testing.HiddenPrints():
        p = nx.production(model, d)
        ok_(p.shape[1] == 5, 'wrong number of tournaments')
        ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')
        p = nx.backtest(model, d, kfold=2)
        ok_(p.shape[1] == 5, 'wrong number of tournaments')
        ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')
        for verbosity in (0, 1, 2, 3):
            nx.backtest(model, d, tournament=3, kfold=2, verbosity=verbosity)
            nx.production(model, d, tournament='ken', verbosity=verbosity)
            nx.production(model, d, tournament=4, verbosity=verbosity)
            nx.production(model, d, tournament=None, verbosity=verbosity)
            if verbosity == 3:
                nx.production(model, d, tournament=5, verbosity=verbosity)
                nx.production(model,
                              d,
                              tournament='charles',
                              verbosity=verbosity)
示例#8
0
def backtest_example(data):
    "Simple cross validation on training data using logistic regression"
    model = nx.logistic()
    prediction = nx.backtest(model, data)  # noqa
示例#9
0
        # y_train remains the same
        y_train = y[train_index]

        print(">> running split #", counter)

        print(">> finding best params")
        xgreg = model_selection.GridSearchCV(xgb.XGBRegressor(),
                                             parameters,
                                             scoring="neg_mean_squared_error",
                                             cv=kfold_split)
        xgreg.fit(X_train, y_train)
        best_params = xgreg.best_params_
        print(">> best params: ", best_params)

        # create a new logistic regression model for the tournament
        model = xgboost(best_params)

        print(">> training info:")
        train = nx.backtest(model, data, verbosity=2)

        # print (">> validation info:")
        #validation = nx.production(model, data)

        print(">> saving validation info: ")
        validation.to_csv(MODEL_NAME + "-" + tournament + "-" + str(counter) +
                          ".csv")
        print(">> done saving validation info")

        print("\n")

        counter = counter + 1
示例#10
0
def backtest(data, tournament='kazutsugi'):
    "Simple cross validation on training data using linear regression"
    model = nx.linear()
    prediction = nx.backtest(model, data, tournament)  # noqa
示例#11
0
def backtest(data, tournament='bernie'):
    "Simple cross validation on training data using logistic regression"
    model = nx.logistic()
    prediction = nx.backtest(model, data, tournament)  # noqa
        X_train = X[train_index][:,3:]
        # y_train remains the same
        y_train = y[train_index]
        
        print ">> running split #", counter
        
        print ">> finding best params"
        clf = model_selection.GridSearchCV(linear_model.LogisticRegression(), parameters, scoring="neg_log_loss", cv=kfold_split, n_jobs=-1)
        clf.fit(X_train, y_train)
        best_params = clf.best_params_
        print ">> best params: ", best_params

        # create a new logistic regression model for the tournament
        model = logistic(best_params)

        print ">> training info:"
        train = nx.backtest(model, data, tournament, verbosity=1)

        print ">> validation info:"
        validation = nx.production(model, data, tournament, verbosity=1)

        print ">> saving validation info: "
        validation.to_csv(MODEL_NAME + "-" + tournament + "-" + str(counter) + ".csv")
        print ">> done saving validation info"

        print "\n"
        
        counter=counter+1
    

示例#13
0
def backtest_example():
    data = nx.play_data()
    model = nx.logistic()
    prediction = nx.backtest(model, data)  # noqa
示例#14
0
    m = []
    m.append(log_loss(y, yhat))
    m.append(roc_auc_score(y, yhat))
    yh = np.zeros(yhat.size)
    yh[yhat >= 0.5] = 1
    m.append(accuracy_score(y, yh))
    m.append(yhat.std())
    return m


if __name__ == '__main__':
    # test prediction.performance()
    import numerox as nx
    data = nx.load_data('/data/nx/numerai_dataset_20171024.hdf')
    model = nx.model.logistic()
    prediction1 = nx.backtest(model, data, verbosity=1)
    prediction2 = nx.production(model, data)
    """
    prediction = prediction1 + prediction2
    print prediction
    prediction.performance(data)
    prediction.save('/data/nx/pred/logistic_1e-5.pred')
    """

    """
    for c in (1e-1, 1e-2, 1e-3, 1e-4, 1e-5):
        print c
        model = nx.model.logistic(c)
        prediction1 = nx.backtest(model, data, verbosity=1)
        prediction2 = nx.production(model, data)
        prediction = prediction1 + prediction2