def test_prediction_setitem(): """compare prediction._setitem__ with merge""" data = nx.play_data() p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0) p2 = nx.production(nx.linear(), data, 8, verbosity=0) p3 = nx.production(nx.linear(), data, 8, verbosity=0) p4 = nx.backtest(nx.linear(), data, 8, verbosity=0) p = nx.Prediction() p[('linear', 1)] = p1 p[('linear', 2)] = p2 p[('linear', 3)] = p3 p[('linear', 4)] = p4 pp = nx.Prediction() pp = pp.merge(p1) pp = pp.merge(p2) pp = pp.merge(p3) pp = pp.merge(p4) pd.testing.assert_frame_equal(p.df, pp.df) assert_raises(ValueError, p.__setitem__, ('linear', 1), p1) assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
def test_model_rename(): """Test renaming a model""" model = nx.linear() ok_(model.name == 'linear', 'wrong name') model.rename('LR') ok_(model.name == 'LR', 'wrong name') model = model.rename('logreg') ok_(model.name == 'logreg', 'wrong name') ok_(model.__repr__().startswith('logreg'), 'wrong name') model = nx.linear() ok_(model.rename(None).name == 'linear', 'wrong name') assert_raises(ValueError, model.rename, 1)
def test_prediction_regression(): """regression test of prediction performance evaluation""" d = nx.play_data() p = nx.production(nx.linear(), d, tournament=None, verbosity=0) for number, name in nx.tournament_iter(): p2 = nx.production(nx.linear(), d, tournament=name, verbosity=0) df = p.performance_mean(d['validation'], mean_of='tournament') logloss1 = float('%.3f' % (df.loc[name]['mse'])) logloss2 = float('%.3f' % (p2.summary(d['validation']).loc['mean']['mse'])) diff = np.abs(logloss1 - logloss2) msg = f"failed on {name}" ok_(diff < 1e-6, msg)
def improve_model(data, tournament='kazutsugi'): """ Run multiple models: fit on training data, predict for tournament data. Then change the data, rerun and compare performance with and without the change. """ # we'll look at 5 models models = [nx.linear(), nx.extratrees(), nx.randomforest(), nx.mlpc(), nx.linearPCA()] print('\nStandard dataset:\n') # first run the base case prediction = nx.production(models, data, tournament, verbosity=1) # let's now make a change, could be anything; as an example let's add # the square of each feature to the dataset x = np.hstack((data.x, data.x * data.x)) data2 = data.xnew(x) print('\nDataset expanded with squared features:\n') # rerun all models with the new expanded data prediction2 = nx.production(models, data2, tournament, verbosity=1) # compare performance print('\nCompare (1 is regular dataset; 2 expanded dataset):\n') print(prediction.compare(data['validation'], prediction2, tournament))
def test_multiple_runs(): """test running multiple models through multiple tournaments""" d = testing.play_data() models = [nx.linear(), nx.fifty()] with testing.HiddenPrints(): p = nx.production(models, d, 'kazutsugi') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.backtest(models, d, 8) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi') ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.production(models, d) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.backtest(models, d) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.run(models, nx.ValidationSplitter(d)) ok_(p.shape[1] == 2, 'wrong number of tournaments') p = nx.production(models, d, [8]) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments') p = nx.backtest(models, d, ['kazutsugi']) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments') p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi']) ok_(p.shape[1] == 2, 'wrong number of tournaments') ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
def test_prediction_check(): """make sure prediction.check runs""" d = nx.play_data() p1 = nx.production(nx.linear(), d, 'kazutsugi', verbosity=0) p2 = p1.copy() p2 = p2.rename('example_predictions') p = p1 + p2 with nx.testing.HiddenPrints(): df = p.check(d) ok_(isinstance(df, dict), 'expecting a dictionary')
def concordance(data, tournament='kazutsugi'): """ Example showing how to calculate concordance. Concordance must be less than 0.12 to pass numerai's check. For an accurate concordance calculation `data` must be the full dataset. """ models = [nx.linear(), nx.extratrees(), nx.mlpc()] p = nx.production(models, data, tournament) print("\nA concordance less than 0.12 is passing") print(p.concordance(data))
def get_models(): models = [nx.linear(), nx.ridge_mean(), nx.extratrees(), nx.randomforest(), nx.mlpc(), nx.linearPCA(), nx.example_predictions(), nx.fifty()] return models
def numerox_example(): """ Example of how to prepare a submission for the Numerai tournament. It uses Numerox which you can install with: pip install numerox For more information see: https://github.com/kwgoodman/numerox """ # download dataset from numerai, save it and then load it data = nx.download('numerai_dataset.zip') # we will use logistic regression; you will want to write your own model model = nx.linear() # fit model with train data and make predictions for tournament data prediction = nx.production(model, data, tournament='kazutsugi') # save predictions to csv file prediction.to_csv('linear.csv', verbose=True)
def test_run(): "Make sure run runs" d = testing.play_data() models = [nx.linear(), nx.fifty()] splitters = [nx.TournamentSplitter(d), nx.ValidationSplitter(d), nx.CheatSplitter(d), nx.CVSplitter(d, kfold=2), nx.SplitSplitter(d, fit_fraction=0.5)] for model in models: for splitter in splitters: p = nx.run(model, splitter, tournament=None, verbosity=0) ok_(p.shape[1] == 1, 'wrong number of tournaments') ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments') assert_raises(ValueError, nx.run, None, nx.TournamentSplitter(d)) assert_raises(ValueError, nx.run, nx.fifty(), nx.TournamentSplitter(d), {})
def run_all_examples(data=None): "Run most of the numerox examples" if data is None: data = nx.numerai.download_data_object(verbose=True) backtest = nx.examples.backtest print_source(backtest) backtest(data) concordance = nx.examples.concordance print_source(concordance) concordance(data) improve_model = nx.examples.improve_model print_source(improve_model) improve_model(data) cv_warning = nx.examples.cv_warning print_source(cv_warning) cv_warning(nx.linear(), data['train'], nsamples=2)
def backtest(data, tournament='kazutsugi'): "Simple cross validation on training data using linear regression" model = nx.linear() prediction = nx.backtest(model, data, tournament) # noqa
def test_prediction_concordance(): """make sure prediction.concordance runs""" d = nx.testing.play_data() p = nx.production(nx.linear(), d, 8, verbosity=0) df = p.concordance(d) ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')