def test_wrong_class_type(self): scorers = [Gini(), InfoGain(), GainRatio()] for scorer in scorers: with self.assertRaises(ValueError): scorer(self.housing, 0) with self.assertRaises(ValueError): Chi2()(self.housing, 0) with self.assertRaises(ValueError): ANOVA()(self.housing, 2) UnivariateLinearRegression()(self.housing, 2)
def test_chi2(self): nrows, ncols = 500, 5 X = np.random.randint(4, size=(nrows, ncols)) y = 10 + (-3 * X[:, 1] + X[:, 3]) // 2 domain = Domain.from_numpy(X, y) domain = Domain(domain.attributes, DiscreteVariable('c', values=np.unique(y))) table = Table(domain, X, y) data = preprocess.Discretize()(table) scorer = Chi2() sc = [scorer(data, a) for a in range(ncols)] self.assertTrue(np.argmax(sc) == 1)
def test_discrete_scores_on_continuous_features(self): c = self.iris.columns for method in (Gini(), Chi2()): d1 = SelectBestFeatures(method=method)(self.iris) expected = \ (c.petal_length, c.petal_width, c.sepal_length, c.sepal_width) self.assertSequenceEqual(d1.domain.attributes, expected) scores = method(d1) self.assertEqual(len(scores), 4) score = method(d1, c.petal_length) self.assertIsInstance(score, float)