def evaluate(category, clf, datamanager, data=(None, None)): """Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1])
class TestVisualConceptDetection(unittest.TestCase): def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager) def test_base_dir(self): path = os.path.join(BASE_PATH, "testdata", "bow") self.assertEqual(self.datamanager.PATHS["BOW"], path) # def test_dump_object(self): def test_dump_and_load_object(self): self.vcd.dump_object(self.vcd.classifier, "test_classifier") # TODO: meh self.assertIsInstance(self.vcd.load_object("test_classifier"), self.vcd.classifier.__class__) def test_count_methods(self): classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]) self.assertEqual(self.vcd.count_positives(classes), 5) self.assertEqual(self.vcd.count_negatives(classes), 7) def test_sample_weights(self): vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) p = 1.0 n = 3.0 / 7.0 weights = np.array([p, n, n, n, p, p, n, n, n, n]) calculated = self.vcd.calculate_weights(vec) self.assertTrue((calculated == weights).all(), "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
class TestVisualConceptDetection(unittest.TestCase): def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager) def test_base_dir(self): path = os.path.join(BASE_PATH, "testdata", "bow") self.assertEqual(self.datamanager.PATHS["BOW"], path) # def test_dump_object(self): def test_dump_and_load_object(self): self.vcd.dump_object(self.vcd.classifier, "test_classifier") # TODO: meh self.assertIsInstance(self.vcd.load_object("test_classifier"), self.vcd.classifier.__class__) def test_count_methods(self): classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]) self.assertEqual(self.vcd.count_positives(classes), 5) self.assertEqual(self.vcd.count_negatives(classes), 7) def test_sample_weights(self): vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) p = 1.0 n = 3.0 / 7.0 weights = np.array([p, n, n, n, p, p, n, n, n, n]) calculated = self.vcd.calculate_weights(vec) self.assertTrue((calculated==weights).all(), "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager)
return result if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join(datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [vis.get_image_title(prediction, real) for prediction, real in izip(pred, class_vector)] del class_vector del sample_matrix
import sys from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.svm import LinearSVC, SVC from vcd import VisualConceptDetection import os import time from util import svm from datamanagers.CaltechManager import CaltechManager import numpy as np import pylab as pl from runGridSearch import GridSearch if __name__ == "__main__": category = "airplanes" total = time.time() clf = RandomForestClassifier(n_estimators = 2000) # clf = AdaBoostClassifier(n_estimators = 2000) # clf.base_estimator.max_depth = 4 # clf = LinearSVC(C=100) # clf = SVC(C=10) dm = CaltechManager() vcd = VisualConceptDetection(classifier=clf, datamanager=dm) vcd.run(category) print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
import sys from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.svm import LinearSVC, SVC from vcd import VisualConceptDetection import os import time from util import svm from datamanagers.CaltechManager import CaltechManager import numpy as np import pylab as pl from runGridSearch import GridSearch if __name__ == "__main__": category = "airplanes" total = time.time() clf = RandomForestClassifier(n_estimators=2000) # clf = AdaBoostClassifier(n_estimators = 2000) # clf.base_estimator.max_depth = 4 # clf = LinearSVC(C=100) # clf = SVC(C=10) dm = CaltechManager() vcd = VisualConceptDetection(classifier=clf, datamanager=dm) vcd.run(category) print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [ vis.get_image_title(prediction, real) for prediction, real in izip(pred, class_vector) ] del class_vector
def get_svm_importances(coef): """Normalize the SVM weights.""" factor = 1.0 / np.linalg.norm(coef) return (coef * factor).ravel() if __name__ == "__main__": svm = LinearSVC(C=0.1) category = "Faces" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_Faces_LinearSVC_normalized") vcd = VisualConceptDetection(svm, datamanager) clf = vcd.load_object("Classifier", category) importances = get_svm_importances(clf.coef_) sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.decision_function(sample_matrix) del clf image_titles = [ get_image_title(prediction, real) for prediction, real in izip(pred, class_vector) ] del class_vector del sample_matrix