def test_on_numeric_madelon(self): X, y = get_data('madelon') X_test, y_test = get_data('madelon', test=True) clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X_test) score = accuracy_score(y_test, y_hat) self.assertTrue(score > 0.97)
def test_fit(self): clf = LinearGradientDescent() X, y = get_data('table2') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.833333333)
def test_fit(self): clf = Perceptron(weight_initialization='zeros', shuffle=False) X, y = get_data('table2') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.8333333)
def test_fit_madelon(self): clf = SVM(shuffle=False, epochs=1, weight_initialization='zeros') X, y = get_data('madelon') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.5505, decimal=4)
def test_fit_madelon(self): np.random.seed(0) clf = SVMRandomForest(N=5, m=20) X, y = get_data('madelon') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, -1)
def test_fit_handwriting(self): np.random.seed(0) clf = SVMRandomForest(N=5, m=100) X, y = get_data('handwriting') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 1)
def test_numerical_data_works_with_missing_method_choices(self): X, y = get_data('blobs') clf = DecisionTree(missing_method='majority_value') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertTrue(score > 0.97)
def test_numerical_data(self): X, y = get_data('blobs') clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertTrue(score > 0.97)
def test_setting_a(self): X, y = get_data('mushroom', setting='SettingA') X_test, y_test = get_data('mushroom', setting='SettingA', test=True) clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X_test) score = accuracy_score(y_test, y_hat) self.assertEqual(score, 1) # how does it compare to sklearn clf = DecisionTreeClassifier() Xt = pd.get_dummies(pd.DataFrame(X)).values Xt_test = pd.get_dummies(pd.DataFrame(X_test)).values clf.fit(Xt, y) y_hat = clf.predict(Xt_test) score = accuracy_score(y_test, y_hat) self.assertEqual(score, 1)
def test_stuff(self): return # TODO # TODO how do you use the same base classifier over & over? # nothing makes it any different, so how do you get it # to select different things... X,y = get_data('table2') clf = AdaBoost([ DecisionTree(max_depth=1), DecisionTree(max_depth=2), DecisionTree(max_depth=3), DecisionTree(max_depth=1, best_attr_method='gini'), DecisionTree(max_depth=2, best_attr_method='gini'), DecisionTree(max_depth=3, best_attr_method='gini'), ], T=5) clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertEqual(score, 1)