def trainFinalClassifier(db, random_state=0): clf = ExtraTreesClassifier(n_estimators=100, random_state=random_state, verbose=100, n_jobs=-1) print("Loading training set...") loaded = joblib.load(db + ".dump") print("Fitting...") clf.fit(loaded[:, 0:-1], loaded[:, -1]) loaded = 0 print("Saving...") path = "clfs{}/".format(random_state) if (os.path.exists(path) == False): os.mkdir(path) clf.verbose = 0 joblib.dump(clf, path + db)
def roc_precision(db, usecols=None, test="unnamed", random_state=0, show_plots=False): if (os.path.exists(MAT_PATH) == False): os.mkdir(MAT_PATH) random_state = check_random_state(random_state) clf = 0 if (not os.path.exists("clfs/" + db)): clf = ExtraTreesClassifier(n_estimators=100, random_state=0, n_jobs=-1) print("Loading training set...") loaded = loadClassifiedDB(db + ".train.csv", random_state=random_state, usecols=usecols)#, skipheader=234100) print("Fitting...") clf.fit(loaded[:, 0:-1], loaded[:, -1]) loaded = 0 print("Saving...") if (os.path.exists("clfs/") == False): os.mkdir("clfs") clf.verbose = 0 joblib.dump(clf, "clfs/" + db) else: print("Loading {}...".format(db)) clf = joblib.load("clfs/" + db) classes = clf.classes_ print("Loading test set...") loaded = loadClassifiedDB(db + ".csv", random_state=random_state, usecols=usecols)#, skipheader=232800) y_true = loaded[:, -1] print("Predict proba...") y_score = clf.predict_proba(loaded[:, 0:-1]) loaded = 0 clf = 0 y_score = y_score[:, classes == 1] print("ROC...") fpr, tpr, thresholds = roc_curve(y_true, y_score) sio.savemat(MAT_PATH + test + '.roc.' + db + '.mat', {'fpr':fpr, 'tpr':tpr, 'thresholds':thresholds}) if (show_plots): plt.plot(fpr, tpr) plt.title("ROC curve") plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") for i in range(0, thresholds.size): plt.annotate(str(thresholds[i]), xy=(fpr[i], tpr[i]), xytext=(10,10), textcoords='offset points', arrowprops=dict(facecolor='black', shrink=0.025)) plt.show() print("Precision/Recall...") precision, recall, thresholds = precision_recall_curve(y_true, y_score) sio.savemat(MAT_PATH + test + '.precall.' + db + '.mat', {'precision':precision, 'recall':recall, 'thresholds':thresholds}) if (show_plots): plt.plot(recall, precision) plt.title("Precision/Recall") plt.xlabel("Recall (TP / (TP+FN))") plt.ylabel("Precision (TP / (TP + FP))") for i in range(0, thresholds.size): plt.annotate(str(thresholds[i]), xy=(recall[i], precision[i]), xytext=(10,10), textcoords='offset points', arrowprops=dict(facecolor='black', shrink=0.025)) plt.show()