def test_RandomForestRegressionLolo_2(): """Non-trivial test case, including standard deviation.""" n, m, xlen = 100, 600, 10 train_inputs = np.reshape(np.linspace(-xlen / 2, +xlen / 2, n), (n, 1)) train_labels = (train_inputs * 2 + 1).flatten() train_data = smlb.TabularData(data=train_inputs, labels=train_labels) train_data = smlb.LabelNoise(noise=smlb.NormalNoise( rng=0)).fit(train_data).apply(train_data) valid_inputs = np.reshape(np.linspace(-xlen / 2, +xlen / 2, m), (m, 1)) valid_labels = (valid_inputs * 2 + 1).flatten() valid_data = smlb.TabularData(data=valid_inputs, labels=valid_labels) valid_data = smlb.LabelNoise(noise=smlb.NormalNoise( rng=1)).fit(valid_data).apply(valid_data) # 12 trees meets minimal requirements for jackknife estimates rf = RandomForestRegressionLolo() preds = rf.fit(train_data).apply(valid_data) mae = smlb.MeanAbsoluteError().evaluate(valid_data.labels(), preds) # for perfect predictions, expect MAE of 1.12943 # (absolute difference between draws from two unit normal distributions) assert np.allclose(mae, 1.13, atol=0.25) assert np.allclose(np.median(preds.stddev), 1, atol=0.5)
def test_GaussianProcessRegressionSklearn_3(): """All predictive distributions. Linear noisy function, linear kernel + white noise kernel. The optimized noise level is expected to go to its true value. """ kernel = skl.gaussian_process.kernels.DotProduct( sigma_0=0, sigma_0_bounds="fixed" ) + skl.gaussian_process.kernels.WhiteKernel(noise_level=1, noise_level_bounds=(1e-5, 1e5)) gpr = GaussianProcessRegressionSklearn(kernel=kernel, random_state=1) n, nlsd = 100, 0.5 data = smlb.TabularData(data=np.ones(shape=(n, 1)) * 2, labels=np.ones(shape=n) * 3) data = smlb.LabelNoise(noise=smlb.NormalNoise(stddev=nlsd, rng=1)).fit(data).apply(data) preds = gpr.fit(data).apply(data) assert preds.has_signal_part and preds.has_noise_part conf, noise = preds.signal_part, preds.noise_part assert np.allclose(conf.mean, np.ones(n) * 3, atol=1e-1) assert np.allclose(conf.stddev, np.ones(n) * nlsd, atol=1e-1) assert (preds.mean == conf.mean).all() assert np.allclose(preds.stddev, np.sqrt(np.square(conf.stddev) + np.square(nlsd)), atol=1e-1) assert np.allclose(noise.mean, np.zeros(shape=n)) assert np.allclose(noise.stddev, nlsd, atol=1e-1)
def test_NormalNoise(): """Test Gaussian noise.""" # fail without specifying pseudo-random number generator seed with pytest.raises(smlb.InvalidParameterError): smlb.NormalNoise() # unit normal noise = smlb.NormalNoise(rng=1).noise(100) assert sp.stats.normaltest(noise)[1] > 0.05 # same seed leads to identical noise noise2 = smlb.NormalNoise(rng=1).noise(100) assert (noise == noise2).all() # non-unit normal noise = smlb.NormalNoise(mean=10, stddev=0.5, rng=1).noise(100) assert sp.stats.normaltest(noise)[1] > 0.05
def test_LabelNoise_NormalNoise(fixture_TabularData_ComputedLabels): """Test LabelNoise with NormalNoise.""" arange = np.arange(0, 100) data1 = fixture_TabularData_ComputedLabels( size=100, labelf=lambda arg: arg.flatten()) data2 = smlb.LabelNoise(noise=smlb.NormalNoise( rng=1)).fit(data1).apply(data1) assert sp.stats.normaltest(data2.labels(arange) - arange)[1] > 0.05 assert sp.stats.normaltest(data2.labels(arange))[1] < 0.05 # repeated evaluation of labels will yield different values assert (data2.labels(arange) != data2.labels(arange)).any()