def test_augmented_data_regression(self): data = Table("housing") res = CrossValidation(store_data=True)(data, [LinearRegressionLearner()]) table = res.get_augmented_data(['Linear Regression']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + 1) res = CrossValidation(store_data=True)( data, [LinearRegressionLearner(), MeanLearner()]) table = res.get_augmented_data(['Linear Regression', 'Mean Learner']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +2 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 + 1)
def test_error_model(self): for loo in [False, True]: icr = InductiveRegressor( ErrorModelNC(LinearRegressionLearner(), LinearRegressionLearner(), loo=loo), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor(AbsError(RandomForestRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner(), loo=True)) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean())
def test_nc_type(self): nc_regr = AbsError(LinearRegressionLearner()) nc_class = InverseProbability(LogisticRegressionLearner()) InductiveRegressor(nc_regr) self.assertRaises(AssertionError, InductiveRegressor, nc_class) CrossRegressor(nc_regr, 5) self.assertRaises(AssertionError, CrossRegressor, nc_class, 5)
def test_raise_no_classifier_error(self): """ Regression learner must raise error """ w = self.widget # linear regression learner is regression - should raise learner = LinearRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertTrue(w.Error.no_classifier.is_shown()) # make it empty to test if error disappear self.send_signal(w.Inputs.learner, None) self.assertFalse(w.Error.no_classifier.is_shown()) # test with some other learners learner = LogisticRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = RandomForestLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = SVMLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown())
def test_explain_predictions(self): model = LogisticRegressionLearner()(self.iris) shap_values, predictions, _, _ = explain_predictions( model, self.iris[:3], self.iris ) self.assertEqual(3, len(shap_values)) self.assertTupleEqual((3, self.iris.X.shape[1]), shap_values[0].shape) self.assertTupleEqual((3, self.iris.X.shape[1]), shap_values[1].shape) self.assertTupleEqual((3, self.iris.X.shape[1]), shap_values[2].shape) self.assertTupleEqual( (3, len(self.iris.domain.class_var.values)), predictions.shape ) # regression model = LinearRegressionLearner()(self.housing) shap_values, predictions, _, _ = explain_predictions( model, self.housing[:3], self.housing ) self.assertEqual(1, len(shap_values)) self.assertTupleEqual( (3, self.housing.X.shape[1]), shap_values[0].shape ) self.assertTupleEqual((3, 1), predictions.shape)
def test_linear_scorer(self): data = Table('housing') learner = LinearRegressionLearner() scores = learner.score_data(data) self.assertEqual('LSTAT', data.domain.attributes[np.argmax(scores)].name) self.assertEqual(len(scores), len(data.domain.attributes))
def test_apply(self): self.widget.set_data(self.data) self.widget.apply() self.assertNotEqual(self.widget.plot_item, None) self.assertNotEqual(self.widget.scatterplot_item, None) self.widget.set_data(None) self.widget.apply() # TODO: output will be checked when it available in GuiTest # check if function does not change plots that are None according to test_set_data self.assertEqual(self.widget.plot_item, None) self.assertEqual(self.widget.scatterplot_item, None) self.widget.set_data(self.data) self.widget.set_learner(LinearRegressionLearner()) self.widget.apply() self.assertNotEqual(self.widget.plot_item, None) self.assertNotEqual(self.widget.scatterplot_item, None) self.widget.set_learner(RandomForestRegressionLearner()) self.widget.apply() self.assertNotEqual(self.widget.plot_item, None) self.assertNotEqual(self.widget.scatterplot_item, None) self.widget.set_preprocessor((Normalize(), )) self.assertNotEqual(self.widget.plot_item, None) self.assertNotEqual(self.widget.scatterplot_item, None)
def test_linear_scorer_feature(self): data = Table('housing') learner = LinearRegressionLearner() scores = learner.score_data(data) for i, attr in enumerate(data.domain.attributes): score = learner.score_data(data, attr) self.assertEqual(score, scores[i])
def setUp(self): self.widget = self.create_widget(OWRank) # type: OWRank self.iris = Table("iris") self.housing = Table("housing") self.log_reg = LogisticRegressionLearner() self.lin_reg = LinearRegressionLearner() self.pca = PCA()
def test_scorer(self): learners = [LinearRegressionLearner(), RidgeRegressionLearner(), LassoRegressionLearner(alpha=0.01), ElasticNetLearner(alpha=0.01)] for learner in learners: scores = learner.score_data(self.housing) self.assertEqual('LSTAT', self.housing.domain.attributes[np.argmax(scores)].name) self.assertEqual(len(scores), len(self.housing.domain.attributes))
def test_scorer_feature(self): learners = [LinearRegressionLearner(), RidgeRegressionLearner(), LassoRegressionLearner(alpha=0.01), ElasticNetLearner(alpha=0.01)] for learner in learners: scores = learner.score_data(self.housing) for i, attr in enumerate(self.housing.domain.attributes): score = learner.score_data(self.housing, attr) self.assertEqual(score, scores[i])
def test_abs_error_normalized(self): tab = Table('housing') normalizer = Normalize(zero_based=True, norm_type=Normalize.NormalizeBySpan) tab = normalizer(tab) icr = InductiveRegressor(AbsError(LinearRegressionLearner())) icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4))) icr_norm = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=False)) icr_norm_exp = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=True)) icr_norm_rf = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, rf=RandomForestRegressor())) r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr( ), ResultsRegr(), ResultsRegr(), ResultsRegr() eps = 0.05 for rep in range(10): for train, test in CrossSampler(tab, 10): train, calibrate = next( RandomSampler(train, len(train) - 100, 100)) r.concatenate(run_train_test(icr, eps, train, test, calibrate)) r_knn.concatenate( run_train_test(icr_knn, eps, train, test, calibrate)) r_norm.concatenate( run_train_test(icr_norm, eps, train, test, calibrate)) r_norm_exp.concatenate( run_train_test(icr_norm_exp, eps, train, test, calibrate)) r_norm_rf.concatenate( run_train_test(icr_norm_rf, eps, train, test, calibrate)) print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy()) print(r_knn.median_range(), r_knn.interdecile_mean(), 1 - r_knn.accuracy()) print(r_norm.median_range(), r_norm.interdecile_mean(), 1 - r_norm.accuracy()) print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(), 1 - r_norm_exp.accuracy()) print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(), 1 - r_norm_rf.accuracy()) self.assertGreater(r.accuracy(), 1 - eps - 0.03) self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03) """
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_loo(self): train, test = get_instance(Table('iris'), 0) loocp = LOOClassifier(InverseProbability(LogisticRegressionLearner()), train) pred = loocp(test.x, 0.1) self.assertEqual(pred, ['Iris-setosa']) train, test = get_instance(Table('housing'), 0) loocr = LOORegressor(AbsError(LinearRegressionLearner()), train) lo, hi = loocr(test.x, 0.1) self.assertLess(hi - lo, 20)
def test_validate_cross_regression(self): tab = shuffle_data(Table('housing')) eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) ccr = CrossRegressor(AbsError(LinearRegressionLearner()), 5, shuffle_data(train)) y_min, y_max = ccr(test.x, eps) if y_min <= test.y <= y_max: correct += 1 num += y_max - y_min self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_validate_regression(self): tab = Table('housing') eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) train, calibrate = split_data(shuffle_data(train), 2, 1) icr = InductiveRegressor(AbsError(LinearRegressionLearner()), train, calibrate) y_min, y_max = icr(test.x, eps) if y_min <= test.y <= y_max: correct += 1 num += y_max - y_min self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_scorer(self): data = Table('housing') learners = [ LinearRegressionLearner(), RidgeRegressionLearner(), LassoRegressionLearner(alpha=0.01), ElasticNetLearner(alpha=0.01) ] for learner in learners: scores = learner.score_data(data) self.assertEqual('NOX', data.domain.attributes[np.argmax(scores)].name) self.assertEqual(len(scores), len(data.domain.attributes))
def test_explain_regression(self): learner = LinearRegressionLearner() model = learner(self.housing) shap_values, _, sample_mask, base_value = compute_shap_values( model, self.housing, self.housing) self.assertEqual(len(shap_values), 1) self.assertTupleEqual(shap_values[0].shape, self.housing.X.shape) self.assertIsInstance(shap_values, list) self.assertIsInstance(shap_values[0], np.ndarray) # number of cases to short to be subsampled self.assertEqual(len(shap_values[0]), sample_mask.sum()) self.assertTupleEqual((1, ), base_value.shape)
def test_LinearRegression(self): nrows = 1000 ncols = 3 x = np.random.randint(-20, 51, (nrows, ncols)) c = np.random.rand(ncols, 1) * 10 - 3 e = np.random.rand(nrows, 1) - 0.5 y = np.dot(x, c) + e x1, x2 = np.split(x, 2) y1, y2 = np.split(y, 2) t = Table(x1, y1) learn = LinearRegressionLearner() clf = learn(t) z = clf(x2) self.assertTrue((abs(z.reshape(-1, 1) - y2) < 2.0).all())
def test_PolynomialLearner(self): x = np.array([0.172, 0.167, 0.337, 0.420, 0.355, 0.710, 0.801, 0.876]) y = np.array([0.784, 0.746, 0.345, 0.363, 0.366, 0.833, 0.490, 0.445]) data = Table(x.reshape(-1, 1), y) data.domain = Domain([ContinuousVariable('x')], class_vars=[ContinuousVariable('y')]) linear = LinearRegressionLearner() polynomial2 = PolynomialLearner(linear, degree=2) polynomial3 = PolynomialLearner(linear, degree=3) res = TestOnTrainingData(data, [linear, polynomial2, polynomial3]) rmse = RMSE(res) self.assertGreater(rmse[0], rmse[1]) self.assertGreater(rmse[1], rmse[2])
def test_abs_error_rf(self): icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor())) r = run(icr, 0.1, CrossSampler(Table('housing'), 10)) self.assertGreater(r.accuracy(), 0.85) print(r.median_range(), r.interdecile_mean())
def test_coefficients(self): data = Table([[11], [12], [13]], [0, 1, 2]) model = LinearRegressionLearner()(data) self.assertAlmostEqual(float(model.intercept), -11) self.assertEqual(len(model.coefficients), 1) self.assertAlmostEqual(float(model.coefficients[0]), 1)
def test_linear_scorer(self): learner = LinearRegressionLearner() scores = learner.score_data(self.housing) self.assertEqual( "LSTAT", self.housing.domain.attributes[np.argmax(scores[0])].name) self.assertEqual(scores.shape[1], len(self.housing.domain.attributes))
def test_linear_regression_repr(self): learner = LinearRegressionLearner() repr_text = repr(learner) learner2 = eval(repr_text) self.assertIsInstance(learner2, LinearRegressionLearner)
def test_linreg(self): self.assertTrue( LinearRegressionLearner().supports_weights, "Either LinearRegression no longer supports weighted tables or " "SklLearner.supports_weights is out-of-date.")
def test_continuous(self): res = CrossValidation(k=3)(self.housing, [LinearRegressionLearner()]) self.assertLess(RMSE(res), 5)