def evaluate(category, clf, datamanager, data=(None, None)): """Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1])
def setUp(self): self.datamanager = MyTestDataManager() clf = AdaBoostClassifier(n_estimators=14) clf.base_estimator.max_depth = 10 self.vcd = VisualConceptDetection(clf, self.datamanager)
import sys from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.svm import LinearSVC, SVC from vcd import VisualConceptDetection import os import time from util import svm from datamanagers.CaltechManager import CaltechManager import numpy as np import pylab as pl from runGridSearch import GridSearch if __name__ == "__main__": category = "airplanes" total = time.time() clf = RandomForestClassifier(n_estimators=2000) # clf = AdaBoostClassifier(n_estimators = 2000) # clf.base_estimator.max_depth = 4 # clf = LinearSVC(C=100) # clf = SVC(C=10) dm = CaltechManager() vcd = VisualConceptDetection(classifier=clf, datamanager=dm) vcd.run(category) print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
if __name__ == "__main__": # ada = AdaBoostClassifier() # ada.n_estimators = 50 # ada.base_estimator.max_depth = 1 random_forest = RandomForestClassifier(n_estimators=100) category = "trilobite" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_trilobite_rf_testing") # vcd = VisualConceptDetection(ada, datamanager) vcd = VisualConceptDetection(random_forest, datamanager) clf = vcd.load_object("Classifier", category) feature_importances = clf.feature_importances_ sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.predict_proba(sample_matrix) vis = EnsembleVisualization(datamanager) del clf image_titles = [ vis.get_image_title(prediction, real) for prediction, real in izip(pred, class_vector) ] del class_vector
def get_svm_importances(coef): """Normalize the SVM weights.""" factor = 1.0 / np.linalg.norm(coef) return (coef * factor).ravel() if __name__ == "__main__": svm = LinearSVC(C=0.1) category = "Faces" dataset = "all" datamanager = CaltechManager() datamanager.PATHS["RESULTS"] = os.path.join( datamanager.PATHS["BASE"], "results_Faces_LinearSVC_normalized") vcd = VisualConceptDetection(svm, datamanager) clf = vcd.load_object("Classifier", category) importances = get_svm_importances(clf.coef_) sample_matrix = vcd.datamanager.build_sample_matrix(dataset, category) class_vector = vcd.datamanager.build_class_vector(dataset, category) pred = clf.decision_function(sample_matrix) del clf image_titles = [ get_image_title(prediction, real) for prediction, real in izip(pred, class_vector) ] del class_vector del sample_matrix