def test_covariate_shift(self): n_sample = 100 # Biased training var_bias = .5**2 mean_bias = .7 x_train = SP.random.randn(n_sample)*SP.sqrt(var_bias) + mean_bias y_train = self.complete_sample(x_train) # Unbiased test set var = .3**2 mean = 0 x_test = SP.random.randn(n_sample)*SP.sqrt(var) + mean x_complete = SP.hstack((x_train, x_test)) kernel = utils.getQuadraticKernel(x_complete, d=1) +\ 10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1)) kernel = utils.scale_K(kernel) kernel_train = kernel[SP.ix_(SP.arange(x_train.size), SP.arange(x_train.size))] kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size), SP.arange(x_train.size))] mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0, subsampling=False) mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1)) response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0) self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
def test_covariate_shift(self): n_sample = 100 # Biased training var_bias = .5**2 mean_bias = .7 x_train = SP.random.randn(n_sample) * SP.sqrt(var_bias) + mean_bias y_train = self.complete_sample(x_train) # Unbiased test set var = .3**2 mean = 0 x_test = SP.random.randn(n_sample) * SP.sqrt(var) + mean x_complete = SP.hstack((x_train, x_test)) kernel = utils.getQuadraticKernel(x_complete, d=1) +\ 10 * SP.dot(x_complete.reshape(-1, 1), x_complete.reshape(1, -1)) kernel = utils.scale_K(kernel) kernel_train = kernel[SP.ix_(SP.arange(x_train.size), SP.arange(x_train.size))] kernel_test = kernel[SP.ix_(SP.arange(x_train.size, x_complete.size), SP.arange(x_train.size))] mf = MF(n_estimators=100, kernel=kernel_train, min_depth=0, subsampling=False) mf.fit(x_train.reshape(-1, 1), y_train.reshape(-1, 1)) response_gp = mf.predict(x_test.reshape(-1, 1), kernel_test, depth=0) self.assertTrue(((response_gp - self.polynom(x_test))**2).sum() < 2.4)
def test_delta_updating(self): n_sample = 100 # A 20 x 2 random integer matrix X = SP.empty((n_sample, 2)) X[:, 0] = SP.arange(0, 1, 1.0 / n_sample) X[:, 1] = SP.random.rand(n_sample) sd_noise = .5 sd_conf = .5 noise = SP.random.randn(n_sample, 1) * sd_noise # print 'true delta equals', (sd_noise**2)/(sd_conf**2) # Here, the observed y is just a linear function of the first column # in X and # a little independent gaussian noise y_fixed = (X[:, 0:1] > .5) * 1.0 y_fn = y_fixed + noise # Divide into training and test sample using 2/3 of data for training training_sample = SP.zeros(n_sample, dtype='bool') training_sample[SP.random.permutation(n_sample) [:SP.int_(.66 * n_sample)]] = True test_sample = ~training_sample kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\ 1e-3*SP.eye(n_sample) # The confounded version of y_lin is computed as y_conf = sd_conf * SP.random.multivariate_normal( SP.zeros(n_sample), kernel, 1).reshape(-1, 1) y_tot = y_fn + y_conf # Selects rows and columns kernel_train = kernel[SP.ix_(training_sample, training_sample)] kernel_test = kernel[SP.ix_(test_sample, training_sample)] lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1, verbose=0) # Returns prediction for random effect lm_forest.fit(X[training_sample], y_tot[training_sample]) response_lmf = lm_forest.predict(X[test_sample], k=kernel_test) # print 'fitting forest (delta-update)' # earn random forest, not accounting for the confounding random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5, verbose=0) random_forest.fit(X[training_sample], y_tot[training_sample]) response_rf = random_forest.predict(X[test_sample], k=kernel_test)
def test_delta_updating(self): n_sample = 100 # A 20 x 2 random integer matrix X = SP.empty((n_sample, 2)) X[:, 0] = SP.arange(0, 1, 1.0/n_sample) X[:, 1] = SP.random.rand(n_sample) sd_noise = .5 sd_conf = .5 noise = SP.random.randn(n_sample, 1)*sd_noise # print 'true delta equals', (sd_noise**2)/(sd_conf**2) # Here, the observed y is just a linear function of the first column # in X and # a little independent gaussian noise y_fixed = (X[:, 0:1] > .5)*1.0 y_fn = y_fixed + noise # Divide into training and test sample using 2/3 of data for training training_sample = SP.zeros(n_sample, dtype='bool') training_sample[ SP.random.permutation(n_sample)[:SP.int_(.66*n_sample)]] = True test_sample = ~training_sample kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\ 1e-3*SP.eye(n_sample) # The confounded version of y_lin is computed as y_conf = sd_conf*SP.random.multivariate_normal(SP.zeros(n_sample), kernel, 1).reshape(-1, 1) y_tot = y_fn + y_conf # Selects rows and columns kernel_train = kernel[SP.ix_(training_sample, training_sample)] kernel_test = kernel[SP.ix_(test_sample, training_sample)] lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1, verbose=0) # Returns prediction for random effect lm_forest.fit(X[training_sample], y_tot[training_sample]) response_lmf = lm_forest.predict(X[test_sample], k=kernel_test) # print 'fitting forest (delta-update)' # earn random forest, not accounting for the confounding random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5, verbose=0) random_forest.fit(X[training_sample], y_tot[training_sample]) response_rf = random_forest.predict(X[test_sample], k=kernel_test)