示例#1
0
    def test_on_numeric_madelon(self):
        X, y = get_data('madelon')
        X_test, y_test = get_data('madelon', test=True)

        clf = DecisionTree()
        clf.fit(X, y)
        y_hat = clf.predict(X_test)
        score = accuracy_score(y_test, y_hat)
        self.assertTrue(score > 0.97)
 def test_fit(self):
     clf = LinearGradientDescent()
     X, y = get_data('table2')
     clf.fit(X, y)
     y_hat = clf.predict(X)
     score = accuracy_score(y, y_hat)
     assert_almost_equal(score, 0.833333333)
 def test_fit(self):
     clf = Perceptron(weight_initialization='zeros', shuffle=False)
     X, y = get_data('table2')
     clf.fit(X, y)
     y_hat = clf.predict(X)
     score = accuracy_score(y, y_hat)
     assert_almost_equal(score, 0.8333333)
示例#4
0
 def test_fit_madelon(self):
     clf = SVM(shuffle=False, epochs=1, weight_initialization='zeros')
     X, y = get_data('madelon')
     clf.fit(X, y)
     y_hat = clf.predict(X)
     score = accuracy_score(y, y_hat)
     assert_almost_equal(score, 0.5505, decimal=4)
示例#5
0
 def test_fit_madelon(self):
     np.random.seed(0)
     clf = SVMRandomForest(N=5, m=20)
     X, y = get_data('madelon')
     clf.fit(X, y)
     y_hat = clf.predict(X)
     score = accuracy_score(y, y_hat)
     assert_almost_equal(score, -1)
示例#6
0
 def test_fit_handwriting(self):
     np.random.seed(0)
     clf = SVMRandomForest(N=5, m=100)
     X, y = get_data('handwriting')
     clf.fit(X, y)
     y_hat = clf.predict(X)
     score = accuracy_score(y, y_hat)
     assert_almost_equal(score, 1)
示例#7
0
    def test_numerical_data_works_with_missing_method_choices(self):
        X, y = get_data('blobs')

        clf = DecisionTree(missing_method='majority_value')
        clf.fit(X, y)
        y_hat = clf.predict(X)
        score = accuracy_score(y, y_hat)
        self.assertTrue(score > 0.97)
示例#8
0
    def test_numerical_data(self):
        X, y = get_data('blobs')

        clf = DecisionTree()
        clf.fit(X, y)
        y_hat = clf.predict(X)
        score = accuracy_score(y, y_hat)
        self.assertTrue(score > 0.97)
示例#9
0
    def test_setting_a(self):
        X, y = get_data('mushroom', setting='SettingA')
        X_test, y_test = get_data('mushroom', setting='SettingA', test=True)

        clf = DecisionTree()
        clf.fit(X, y)
        y_hat = clf.predict(X_test)
        score = accuracy_score(y_test, y_hat)
        self.assertEqual(score, 1)

        # how does it compare to sklearn
        clf = DecisionTreeClassifier()
        Xt = pd.get_dummies(pd.DataFrame(X)).values
        Xt_test = pd.get_dummies(pd.DataFrame(X_test)).values
        clf.fit(Xt, y)
        y_hat = clf.predict(Xt_test)
        score = accuracy_score(y_test, y_hat)
        self.assertEqual(score, 1)
示例#10
0
    def test_stuff(self):
        return # TODO

        # TODO how do you use the same base classifier over & over?
        # nothing makes it any different, so how do you get it
        # to select different things...
        X,y = get_data('table2')
        clf = AdaBoost([
            DecisionTree(max_depth=1),
            DecisionTree(max_depth=2),
            DecisionTree(max_depth=3),
            DecisionTree(max_depth=1, best_attr_method='gini'),
            DecisionTree(max_depth=2, best_attr_method='gini'),
            DecisionTree(max_depth=3, best_attr_method='gini'),
        ], T=5)
        clf.fit(X, y)
        y_hat = clf.predict(X)
        score = accuracy_score(y, y_hat)
        self.assertEqual(score, 1)