def adult(dataType): title = '{0} Ada Boost'.format(dataType) package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest param_range = list(range(1, 160, 10)) param = 'n_estimators' # params = {'algorithm': 'SAMME.R'} clf = AdaBoostClassifier() # clf.set_params(**params) plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title) clf.n_estimators = 40 plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain) title = 'Adult' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest)
"""Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1]) if __name__ == '__main__': # classifier = RandomForestClassifier() classifier = AdaBoostClassifier() classifier.n_estimators = 2000 classifier.base_estimator.max_depth = 4 # classifier = LinearSVC(C=100) category = "airplanes" datamanager = CaltechManager() evaluate(category, classifier, datamanager)
def evaluate(category, clf, datamanager, data=(None, None)): """Run evaluation of a classifier, for one category. If data isn't set explicitly, the test set is used by default. """ log_file = os.path.join(datamanager.PATHS["LOGS"], "evaluation", class_name(clf), category) log_file = os.path.join(log_file, str(datetime.now()) + ".log") vcd = VisualConceptDetection(None, datamanager, log_file=log_file) clf = vcd.load_object("Classifier", category, clf) vcd.classifier = clf if (data[0] is None) or (data[1] is None): return vcd.evaluate_test_set(category) else: return vcd.evaluate(X_test=data[0], y_test=data[1]) if __name__ == "__main__": # classifier = RandomForestClassifier() classifier = AdaBoostClassifier() classifier.n_estimators = 2000 classifier.base_estimator.max_depth = 4 # classifier = LinearSVC(C=100) category = "airplanes" datamanager = CaltechManager() evaluate(category, classifier, datamanager)