class TestVisualConceptDetection(unittest.TestCase): def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager) def test_base_dir(self): path = os.path.join(BASE_PATH, "testdata", "bow") self.assertEqual(self.datamanager.PATHS["BOW"], path) # def test_dump_object(self): def test_dump_and_load_object(self): self.vcd.dump_object(self.vcd.classifier, "test_classifier") # TODO: meh self.assertIsInstance(self.vcd.load_object("test_classifier"), self.vcd.classifier.__class__) def test_count_methods(self): classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]) self.assertEqual(self.vcd.count_positives(classes), 5) self.assertEqual(self.vcd.count_negatives(classes), 7) def test_sample_weights(self): vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) p = 1.0 n = 3.0 / 7.0 weights = np.array([p, n, n, n, p, p, n, n, n, n]) calculated = self.vcd.calculate_weights(vec) self.assertTrue((calculated == weights).all(), "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
class TestVisualConceptDetection(unittest.TestCase): def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager) def test_base_dir(self): path = os.path.join(BASE_PATH, "testdata", "bow") self.assertEqual(self.datamanager.PATHS["BOW"], path) # def test_dump_object(self): def test_dump_and_load_object(self): self.vcd.dump_object(self.vcd.classifier, "test_classifier") # TODO: meh self.assertIsInstance(self.vcd.load_object("test_classifier"), self.vcd.classifier.__class__) def test_count_methods(self): classes = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]) self.assertEqual(self.vcd.count_positives(classes), 5) self.assertEqual(self.vcd.count_negatives(classes), 7) def test_sample_weights(self): vec = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) p = 1.0 n = 3.0 / 7.0 weights = np.array([p, n, n, n, p, p, n, n, n, n]) calculated = self.vcd.calculate_weights(vec) self.assertTrue((calculated==weights).all(), "Should be:\n%s\nbut is:\n%s" % (weights, calculated))
def evaluate(category, clf, datamanager, data=(None, None)): """Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1])
def evaluate(category, clf, datamanager, data=(None, None)): """Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1])
if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join(datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [vis.get_image_title(prediction, real) for prediction, real in izip(pred, class_vector)] del class_vector del sample_matrix img_names = [f for f in vcd.datamanager.get_image_names(dataset, category)] vis.visualize_images(img_names, feature_importances, image_titles)
# ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [ vis.get_image_title(prediction, real) for prediction, real in izip(pred, class_vector) ] del class_vector del sample_matrix