result = "" result += "True " if p == real else "False " result += "positive" if p == 1 else "negative" result += " - confidence: %.5f" % prediction[p] return result if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join(datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [vis.get_image_title(prediction, real) for prediction, real in
def setUp(self): self.datamanager = CaltechManager() self.datamanager.change_base_path(os.path.join(BASE_PATH, "testdata"))
class TestCaltechManager(unittest.TestCase): def setUp(self): self.datamanager = CaltechManager() self.datamanager.change_base_path(os.path.join(BASE_PATH, "testdata")) def test_invalid_dataset_caltech(self): self.assertRaises(InvalidDatasetException, self.datamanager.build_sample_matrix, "rubbish", "test") def test_invalid_dataset_caltech2(self): self.assertRaises(InvalidDatasetException, self.datamanager.build_class_vector, "rubbish", "test") def test_invalid_category_caltech(self): self.assertRaises(NoSuchCategoryException, self.datamanager.get_positive_samples, "test", "rubbish") def test_invalid_category_caltech2(self): self.assertRaises(NoSuchCategoryException, self.datamanager.build_sample_matrix, "test", "rubbish") def test_training_sample_matrix(self): samples = self.datamanager.build_sample_matrix("train", "TestFake") should_be = np.array([[ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112 ], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], [ 0.12442154, 0.57743013, 0.9548108, 0.22592719, 0.10155164, 0.60750473 ], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318 ], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511 ]], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_test_sample_matrix(self): samples = self.datamanager.build_sample_matrix("test", "TestFake") should_be = np.array([[ 0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.69156636, 0.4500224 ], [ 0.38948518, 0.33885501, 0.567841, 0.36167425, 0.18220702, 0.57701336 ], [ 0.08660618, 0.83642531, 0.9239062, 0.53778457, 0.56708116, 0.13766008 ], [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361 ]], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_all_sample_matrix(self): samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array([[ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112 ], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], [ 0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.69156636, 0.4500224 ], [ 0.38948518, 0.33885501, 0.567841, 0.36167425, 0.18220702, 0.57701336 ], [ 0.12442154, 0.57743013, 0.9548108, 0.22592719, 0.10155164, 0.60750473 ], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318 ], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511 ], [ 0.08660618, 0.83642531, 0.9239062, 0.53778457, 0.56708116, 0.13766008 ], [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361 ]], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_all_sample_matrix_exclude_feature(self): self.datamanager.exclude_feature = 4 samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array( [[0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.11623112], [0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.9854272], [0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.4500224], [0.38948518, 0.33885501, 0.567841, 0.36167425, 0.57701336], [0.12442154, 0.57743013, 0.9548108, 0.22592719, 0.60750473], [0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.85619318], [0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.08127511], [0.08660618, 0.83642531, 0.9239062, 0.53778457, 0.13766008], [0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.81349361]], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) @unittest.expectedFailure # TODO: dependent on file order def test_complete_sample_matrix(self): samples = self.datamanager.build_complete_sample_matrix("train") should_be = np.array([ [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361 ], [ 0.12442154, 0.57743013, 0.9548108, 0.22592719, 0.10155164, 0.60750473 ], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318 ], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511 ], [ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112 ], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) @unittest.expectedFailure # TODO: dependent on file order def test_complete_sample_matrix_exclude_feature(self): self.datamanager.exclude_feature = 1 samples = self.datamanager.build_complete_sample_matrix("train") should_be = np.array([ [0.31313366, 0.20000355, 0.56186443, 0.15771926, 0.81349361], [0.12442154, 0.9548108, 0.22592719, 0.10155164, 0.60750473], [0.53320956, 0.60112703, 0.09004746, 0.31448245, 0.85619318], [0.18139255, 0.87969971, 0.81630158, 0.57571691, 0.08127511], [0.44842428, 0.45302102, 0.54796243, 0.82176286, 0.11623112], [0.31588301, 0.16203263, 0.02196996, 0.96935761, 0.9854272], ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_complete_sample_matrix_fail(self): self.assertRaises(NotImplementedError, self.datamanager.build_complete_sample_matrix, "all") def test_training_class_vector(self): classes = self.datamanager.build_class_vector("train", "TestFake") should_be = np.array([1, 1, 0, 0, 0]) self.assertTrue((classes == should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_test_class_vector(self): classes = self.datamanager.build_class_vector("test", "TestFake") should_be = np.array([1, 1, 0, 0]) self.assertTrue((classes == should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_complete_class_vector(self): classes = self.datamanager.build_class_vector("all", "TestFake") should_be = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0]) self.assertTrue((classes == should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_sample_matrix_pca(self): self.datamanager.use_pca(n_components=1) samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array([[-0.24263228], [0.85717554], [0.29054203], [0.03857126], [-0.18379566], [0.44021899], [-0.78841356], [-0.65111911], [-0.08255303]], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples))
class TestCaltechManager(unittest.TestCase): def setUp(self): self.datamanager = CaltechManager() self.datamanager.change_base_path(os.path.join(BASE_PATH, "testdata")) def test_invalid_dataset_caltech(self): self.assertRaises(InvalidDatasetException, self.datamanager.build_sample_matrix, "rubbish", "test") def test_invalid_dataset_caltech2(self): self.assertRaises(InvalidDatasetException, self.datamanager.build_class_vector, "rubbish", "test") def test_invalid_category_caltech(self): self.assertRaises(NoSuchCategoryException, self.datamanager.get_positive_samples, "test", "rubbish") def test_invalid_category_caltech2(self): self.assertRaises(NoSuchCategoryException, self.datamanager.build_sample_matrix, "test", "rubbish") def test_training_sample_matrix(self): samples = self.datamanager.build_sample_matrix("train", "TestFake") should_be = np.array([ [ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], [ 0.12442154, 0.57743013, 0.9548108 , 0.22592719, 0.10155164, 0.60750473], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511] ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_test_sample_matrix(self): samples = self.datamanager.build_sample_matrix("test", "TestFake") should_be = np.array([ [ 0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.69156636, 0.4500224 ], [ 0.38948518, 0.33885501, 0.567841 , 0.36167425, 0.18220702, 0.57701336], [ 0.08660618, 0.83642531, 0.9239062 , 0.53778457, 0.56708116, 0.13766008], [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361] ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_all_sample_matrix(self): samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array([ [ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], [ 0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.69156636, 0.4500224 ], [ 0.38948518, 0.33885501, 0.567841 , 0.36167425, 0.18220702, 0.57701336], [ 0.12442154, 0.57743013, 0.9548108 , 0.22592719, 0.10155164, 0.60750473], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511], [ 0.08660618, 0.83642531, 0.9239062 , 0.53778457, 0.56708116, 0.13766008], [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361] ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_all_sample_matrix_exclude_feature(self): self.datamanager.exclude_feature = 4 samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array([ [ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.11623112], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.9854272 ], [ 0.64663881, 0.55629711, 0.11966438, 0.04559849, 0.4500224 ], [ 0.38948518, 0.33885501, 0.567841 , 0.36167425, 0.57701336], [ 0.12442154, 0.57743013, 0.9548108 , 0.22592719, 0.60750473], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.85619318], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.08127511], [ 0.08660618, 0.83642531, 0.9239062 , 0.53778457, 0.13766008], [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.81349361] ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) @unittest.expectedFailure # TODO: dependent on file order def test_complete_sample_matrix(self): samples = self.datamanager.build_complete_sample_matrix("train") should_be = np.array([ [ 0.31313366, 0.88874122, 0.20000355, 0.56186443, 0.15771926, 0.81349361], [ 0.12442154, 0.57743013, 0.9548108 , 0.22592719, 0.10155164, 0.60750473], [ 0.53320956, 0.18181397, 0.60112703, 0.09004746, 0.31448245, 0.85619318], [ 0.18139255, 0.83218205, 0.87969971, 0.81630158, 0.57571691, 0.08127511], [ 0.44842428, 0.50402522, 0.45302102, 0.54796243, 0.82176286, 0.11623112], [ 0.31588301, 0.05166245, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) @unittest.expectedFailure # TODO: dependent on file order def test_complete_sample_matrix_exclude_feature(self): self.datamanager.exclude_feature = 1 samples = self.datamanager.build_complete_sample_matrix("train") should_be = np.array([ [ 0.31313366, 0.20000355, 0.56186443, 0.15771926, 0.81349361], [ 0.12442154, 0.9548108 , 0.22592719, 0.10155164, 0.60750473], [ 0.53320956, 0.60112703, 0.09004746, 0.31448245, 0.85619318], [ 0.18139255, 0.87969971, 0.81630158, 0.57571691, 0.08127511], [ 0.44842428, 0.45302102, 0.54796243, 0.82176286, 0.11623112], [ 0.31588301, 0.16203263, 0.02196996, 0.96935761, 0.9854272 ], ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.00000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples)) def test_complete_sample_matrix_fail(self): self.assertRaises(NotImplementedError, self.datamanager.build_complete_sample_matrix, "all") def test_training_class_vector(self): classes = self.datamanager.build_class_vector("train", "TestFake") should_be = np.array([1, 1, 0, 0, 0]) self.assertTrue((classes==should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_test_class_vector(self): classes = self.datamanager.build_class_vector("test", "TestFake") should_be = np.array([1, 1, 0, 0]) self.assertTrue((classes==should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_complete_class_vector(self): classes = self.datamanager.build_class_vector("all", "TestFake") should_be = np.array([1, 1, 1, 1, 0, 0, 0, 0, 0]) self.assertTrue((classes==should_be).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, classes)) def test_sample_matrix_pca(self): self.datamanager.use_pca(n_components = 1) samples = self.datamanager.build_sample_matrix("all", "TestFake") should_be = np.array([ [-0.24263228], [0.85717554], [0.29054203], [0.03857126], [-0.18379566], [0.44021899], [-0.78841356], [-0.65111911], [-0.08255303] ], dtype=np.float32) difference_matrix = np.abs(samples - should_be) self.assertTrue((difference_matrix < 0.000001).all(), "Should be:\n%s\nbut is:\n%s" % (should_be, samples))
result += "True " if p == real else "False " result += "positive" if p == 1 else "negative" result += " - distance: %.5f" % prediction return result def get_svm_importances(coef): """Normalize the SVM weights.""" factor = 1.0 / np.linalg.norm(coef) return (coef * factor).ravel() if __name__ == "__main__": svm = LinearSVC(C=0.1) category = "Faces" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join(datamanager.PATHS["BASE"], "results_Faces_LinearSVC_normalized") vcd = VisualConceptDetection(svm, datamanager) clf = vcd.load_object("Classifier", category) importances = get_svm_importances(clf.coef_) sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.decision_function(sample_matrix) del clf image_titles = [get_image_title(prediction, real) for prediction, real in izip(pred, class_vector)] del class_vector del sample_matrix
gammas.append(gamma) kernels.append(chi2_kernel(X, X, gamma=1.0 / gamma)) return kernels, gammas if __name__ == "__main__": total = time.time() params = { "n_estimators": [10, 50, 100, 200, 400, 750, 800, 1000, 2000], "base_estimator__max_depth": [1, 2, 3, 5], "base_estimator__random_state": [0], "random_state": [0] } # params = {"C": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]} datamanager = CaltechManager() categories = [ c for c in os.listdir(datamanager.PATHS["CATEGORIES_DIR"]) if c != datamanager.BACKGROUND and os.path.splitext(c)[1] != ".py" ] #kernels, gammas = build_train_kernels(categories, datamanager) #print "Finished building kernels" #grids = (GridSearch(SVC(kernel="precomputed"), c) for c in categories) # grids = (GridSearch(RandomForestClassifier(), c) for c in categories) grids = [ GridSearch(AdaBoostClassifier(), datamanager, c) for c in categories ]
import sys from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.svm import LinearSVC, SVC from vcd import VisualConceptDetection import os import time from util import svm from datamanagers.CaltechManager import CaltechManager import numpy as np import pylab as pl from runGridSearch import GridSearch if __name__ == "__main__": category = "airplanes" total = time.time() clf = RandomForestClassifier(n_estimators=2000) # clf = AdaBoostClassifier(n_estimators = 2000) # clf.base_estimator.max_depth = 4 # clf = LinearSVC(C=100) # clf = SVC(C=10) dm = CaltechManager() vcd = VisualConceptDetection(classifier=clf, datamanager=dm) vcd.run(category) print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
result += "True " if p == real else "False " result += "positive" if p == 1 else "negative" result += " - confidence: %.5f" % prediction[p] return result if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf
result += " - distance: %.5f" % prediction return result def get_svm_importances(coef): """Normalize the SVM weights.""" factor = 1.0 / np.linalg.norm(coef) return (coef * factor).ravel() if __name__ == "__main__": svm = LinearSVC(C=0.1) category = "Faces" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_Faces_LinearSVC_normalized") vcd = VisualConceptDetection(svm, datamanager) clf = vcd.load_object("Classifier", category) importances = get_svm_importances(clf.coef_) sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.decision_function(sample_matrix) del clf image_titles = [ get_image_title(prediction, real) for prediction, real in izip(pred, class_vector)