def test_fit(self): clf = Perceptron(weight_initialization='zeros', shuffle=False) X, y = get_data('table2') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.8333333)
def test_fit(self): clf = LinearGradientDescent() X, y = get_data('table2') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.833333333)
def test_fit_madelon(self): clf = SVM(shuffle=False, epochs=1, weight_initialization='zeros') X, y = get_data('madelon') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 0.5505, decimal=4)
def test_fit_madelon(self): np.random.seed(0) clf = SVMRandomForest(N=5, m=20) X, y = get_data('madelon') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, -1)
def test_fit_handwriting(self): np.random.seed(0) clf = SVMRandomForest(N=5, m=100) X, y = get_data('handwriting') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) assert_almost_equal(score, 1)
def test_numerical_data_works_with_missing_method_choices(self): X, y = get_data('blobs') clf = DecisionTree(missing_method='majority_value') clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertTrue(score > 0.97)
def test_numerical_data(self): X, y = get_data('blobs') clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertTrue(score > 0.97)
def test_on_numeric_madelon(self): X, y = get_data('madelon') X_test, y_test = get_data('madelon', test=True) clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X_test) score = accuracy_score(y_test, y_hat) self.assertTrue(score > 0.97)
def test_setting_a(self): X, y = get_data('mushroom', setting='SettingA') X_test, y_test = get_data('mushroom', setting='SettingA', test=True) clf = DecisionTree() clf.fit(X, y) y_hat = clf.predict(X_test) score = accuracy_score(y_test, y_hat) self.assertEqual(score, 1) # how does it compare to sklearn clf = DecisionTreeClassifier() Xt = pd.get_dummies(pd.DataFrame(X)).values Xt_test = pd.get_dummies(pd.DataFrame(X_test)).values clf.fit(Xt, y) y_hat = clf.predict(Xt_test) score = accuracy_score(y_test, y_hat) self.assertEqual(score, 1)
def test_stuff(self): return # TODO # TODO how do you use the same base classifier over & over? # nothing makes it any different, so how do you get it # to select different things... X,y = get_data('table2') clf = AdaBoost([ DecisionTree(max_depth=1), DecisionTree(max_depth=2), DecisionTree(max_depth=3), DecisionTree(max_depth=1, best_attr_method='gini'), DecisionTree(max_depth=2, best_attr_method='gini'), DecisionTree(max_depth=3, best_attr_method='gini'), ], T=5) clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertEqual(score, 1)
def test_hw_example(self): classifiers = [ RuleClassifier(lambda x: np.sign(x[:,0])), RuleClassifier(lambda x: np.sign(x[:,0] - 2)), RuleClassifier(lambda x: -np.sign(x[:,0])), RuleClassifier(lambda x: -np.sign(x[:,1])), ] clf = AdaBoost(classifiers, T=4) X = np.array([ [1,1], [1,-1], [-1,-1], [-1,1], ]) y = np.array([-1,1,-1,-1]) clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertEqual(score, 1)
def test_classifier(self): classifiers=[ RuleClassifier(lambda x: np.sign(1.5 - x[:, 0])), RuleClassifier(lambda x: np.sign(4.5 - x[:, 0])), RuleClassifier(lambda x: np.sign(x[:, 1] - 5)), ] clf = AdaBoost(classifiers, T=5) X = np.array([ [1, 2], [1, 4], [2.5, 5.5], [3.5, 6.5], [4, 5.4], [2, 1], [2, 4], [3.5, 3.5], [5, 2], [5, 5.5], ]) y = np.array([1,1,1,1,1,-1,-1,-1,-1,-1]) clf.fit(X, y) y_hat = clf.predict(X) score = accuracy_score(y, y_hat) self.assertEqual(score, 1)
def test_accuracy(self): score = accuracy_score( np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]), np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), ) assert_almost_equal(score, 0.4)