Пример #1
0
def test_prediction_setitem():
    """compare prediction._setitem__ with merge"""

    data = nx.play_data()

    p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0)
    p2 = nx.production(nx.linear(), data, 8, verbosity=0)
    p3 = nx.production(nx.linear(), data, 8, verbosity=0)
    p4 = nx.backtest(nx.linear(), data, 8, verbosity=0)

    p = nx.Prediction()
    p[('linear', 1)] = p1
    p[('linear', 2)] = p2
    p[('linear', 3)] = p3
    p[('linear', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('linear', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
Пример #2
0
def test_model_rename():
    """Test renaming a model"""
    model = nx.linear()
    ok_(model.name == 'linear', 'wrong name')
    model.rename('LR')
    ok_(model.name == 'LR', 'wrong name')
    model = model.rename('logreg')
    ok_(model.name == 'logreg', 'wrong name')
    ok_(model.__repr__().startswith('logreg'), 'wrong name')
    model = nx.linear()
    ok_(model.rename(None).name == 'linear', 'wrong name')
    assert_raises(ValueError, model.rename, 1)
Пример #3
0
def test_prediction_regression():
    """regression test of prediction performance evaluation"""
    d = nx.play_data()
    p = nx.production(nx.linear(), d, tournament=None, verbosity=0)
    for number, name in nx.tournament_iter():
        p2 = nx.production(nx.linear(), d, tournament=name, verbosity=0)
        df = p.performance_mean(d['validation'], mean_of='tournament')
        logloss1 = float('%.3f' % (df.loc[name]['mse']))
        logloss2 = float('%.3f' %
                         (p2.summary(d['validation']).loc['mean']['mse']))
        diff = np.abs(logloss1 - logloss2)
        msg = f"failed on {name}"
        ok_(diff < 1e-6, msg)
Пример #4
0
def improve_model(data, tournament='kazutsugi'):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    # we'll look at 5 models
    models = [nx.linear(), nx.extratrees(), nx.randomforest(), nx.mlpc(),
              nx.linearPCA()]

    print('\nStandard dataset:\n')

    # first run the base case
    prediction = nx.production(models, data, tournament, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(models, data2, tournament, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2, tournament))
Пример #5
0
def test_multiple_runs():
    """test running multiple models through multiple tournaments"""

    d = testing.play_data()
    models = [nx.linear(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 8)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d, [8])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.backtest(models, d, ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
Пример #6
0
def test_prediction_check():
    """make sure prediction.check runs"""
    d = nx.play_data()
    p1 = nx.production(nx.linear(), d, 'kazutsugi', verbosity=0)
    p2 = p1.copy()
    p2 = p2.rename('example_predictions')
    p = p1 + p2
    with nx.testing.HiddenPrints():
        df = p.check(d)
    ok_(isinstance(df, dict), 'expecting a dictionary')
Пример #7
0
def concordance(data, tournament='kazutsugi'):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    models = [nx.linear(), nx.extratrees(), nx.mlpc()]
    p = nx.production(models, data, tournament)
    print("\nA concordance less than 0.12 is passing")
    print(p.concordance(data))
Пример #8
0
def get_models():

    models = [nx.linear(),
              nx.ridge_mean(),
              nx.extratrees(),
              nx.randomforest(),
              nx.mlpc(),
              nx.linearPCA(),
              nx.example_predictions(),
              nx.fifty()]

    return models
Пример #9
0
def numerox_example():
    """
    Example of how to prepare a submission for the Numerai tournament.
    It uses Numerox which you can install with: pip install numerox
    For more information see: https://github.com/kwgoodman/numerox
    """

    # download dataset from numerai, save it and then load it
    data = nx.download('numerai_dataset.zip')

    # we will use logistic regression; you will want to write your own model
    model = nx.linear()

    # fit model with train data and make predictions for tournament data
    prediction = nx.production(model, data, tournament='kazutsugi')

    # save predictions to csv file
    prediction.to_csv('linear.csv', verbose=True)
Пример #10
0
def test_run():
    "Make sure run runs"
    d = testing.play_data()

    models = [nx.linear(), nx.fifty()]
    splitters = [nx.TournamentSplitter(d),
                 nx.ValidationSplitter(d),
                 nx.CheatSplitter(d),
                 nx.CVSplitter(d, kfold=2),
                 nx.SplitSplitter(d, fit_fraction=0.5)]

    for model in models:
        for splitter in splitters:
            p = nx.run(model, splitter, tournament=None, verbosity=0)
            ok_(p.shape[1] == 1, 'wrong number of tournaments')
            ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')

    assert_raises(ValueError, nx.run, None, nx.TournamentSplitter(d))
    assert_raises(ValueError, nx.run, nx.fifty(), nx.TournamentSplitter(d), {})
Пример #11
0
def run_all_examples(data=None):
    "Run most of the numerox examples"

    if data is None:
        data = nx.numerai.download_data_object(verbose=True)

    backtest = nx.examples.backtest
    print_source(backtest)
    backtest(data)

    concordance = nx.examples.concordance
    print_source(concordance)
    concordance(data)

    improve_model = nx.examples.improve_model
    print_source(improve_model)
    improve_model(data)

    cv_warning = nx.examples.cv_warning
    print_source(cv_warning)
    cv_warning(nx.linear(), data['train'], nsamples=2)
Пример #12
0
def backtest(data, tournament='kazutsugi'):
    "Simple cross validation on training data using linear regression"
    model = nx.linear()
    prediction = nx.backtest(model, data, tournament)  # noqa
Пример #13
0
def test_prediction_concordance():
    """make sure prediction.concordance runs"""
    d = nx.testing.play_data()
    p = nx.production(nx.linear(), d, 8, verbosity=0)
    df = p.concordance(d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')