def plotPrc(clfName, folds, outdir): y_tests = [] y_scores = [] plt.clf() for i, (clf, X_test, y_test, _, _, _, _,_,_,_) in enumerate(folds): try: y_score = clf.decision_function(X_test) except AttributeError: y_score = clf.predict_proba(X_test)[:, 0] precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=POSTIVE_LABEL) y_tests.extend(y_test) y_scores.extend(y_score) try: area = average_precision_score(y_test, y_score) except ValueError: area = 0.0 clf.prc_auc = area plt.plot(recall, precision, label='Fold %d, AUC = %0.2f' % (i, area), lw=1) precision, recall, _ = precision_recall_curve(y_tests, y_scores, pos_label=POSTIVE_LABEL) try: area = average_precision_score(y_tests, y_scores) except ValueError: area = 0.0 plt.plot(recall, precision, 'k--', label='Mean, AUC = %0.2f' % (area), lw=2) plt.title('Precision-Recall: %s\n%s'%(clfName,outdir.name.replace("_"," "))) plt.xlabel('Recall') plt.ylabel('Precision') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.legend(loc="lower center", prop=legendprop) plt.savefig(str(outdir/(clfName.replace(" ","_")+'_precision-recall.png')))
def create_all_eval_results(y_true,y_pred,key,system_features,sampling,replacement,num_of_samples): # precision = metrics.precision_score(y_true, y_pred, average='weighted') # recall = metrics.recall_score(y_true, y_pred, average='weighted') # F2 = calculateF2(precision, recall) name = data_names[key] y_true_bugs, y_pred_bugs = zip(*[[y_true[i], y_pred[i]] for i in range(len(y_true)) if y_true[i] == 1]) # precision_bug, recall_bug, F_measure_bug ,_ = metrics.precision_recall_fscore_support(y_true_bugs, # y_pred_bugs, # average='micro') precision_bug =metrics.precision_score(y_true_bugs,y_pred_bugs,average='micro') recall_bug =metrics.recall_score(y_true_bugs,y_pred_bugs,average='micro') F2_bug = calculateF2(precision_bug,recall_bug) precision_bug_all, recall_bug_all,_ = metrics.precision_recall_curve(y_true_bugs, y_pred_bugs) prc_area_bug = metrics.auc(recall_bug_all, precision_bug_all) # precision, recall, F_measure,_ = metrics.precision_recall_fscore_support(y_true, # y_pred, # average='micro') precision = metrics.average_precision_score(y_true, y_pred, average='micro') recall = metrics.recall_score(y_true, y_pred, average='micro') F2 = calculateF2(precision, recall) precision_all, recall_all, _ = metrics.precision_recall_curve(y_true, y_pred) prc_area = metrics.auc(recall_all, precision_all) global results results.loc[len(results)] = [name,precision_bug,recall_bug,F2_bug,prc_area_bug, precision, recall,F2,prc_area,str(system_features),str(sampling),str(replacement),str(num_of_samples)]
def RunExp(StrModel:str, Param:str, FeaUsed:list, DataPath:str, Label:str, std:bool = False, N:int = 0): Data = np.genfromtxt(DataPath + Label, delimiter = ',', dtype = int) Data = Data[:, np.newaxis] for f in FeaUsed: T = (np.genfromtxt(DataPath + Features[f], delimiter = ',' , dtype = float)) if len(T.shape) < 2: T = T[:, np.newaxis] Data = np.concatenate((Data, T), axis = 1) if N > 0: Data = Data[:N, :] Lbl = Data[:, 0] Fea = Data[:,1:] if std: scaler = preprocessing.StandardScaler() Fea = scaler.fit_transform(Fea) Model = base.clone(Models[StrModel]) SetParam(Model, Param) Model.fit(Fea, Lbl) Pred = Model.predict_proba(Fea)[:, 1] st = metrics.precision_recall_curve(Lbl, Pred) Folds = cross_validation.KFold(Fea.shape[0], n_folds = 5) for train, valid in Folds: Model = base.clone(Models[StrModel]) SetParam(Model, Param) Model.fit(Fea[train], Lbl[train]) Pred[valid] = Model.predict_proba(Fea[valid])[:, 1] sv = metrics.precision_recall_curve(Lbl, Pred) return st, sv
def multiclass_pr(y_true, y_pred, class_names=[]): """Computes the precision and recall for multiclass predictions. Args: y_true: True class labels, shape (examples, classes). y_pred: Predicted class scores, shape (examples, classes). class_names: List of class name strings, with NoneType to indicate the no-connection class - eg ['inh', None, 'xct']. Returns: precision: Dict of precision for each class, plus the micro-average. recall: Dict of recall for each class, plus the micro-average. """ assert(len(class_names) == y_true.shape[1]) classes = y_true.shape[1] precision = dict() recall = dict() for i, class_name in enumerate(class_names): if class_name != None: precision[class_name], recall[class_name], _ = precision_recall_curve( y_true[:,i], y_pred[:,i]) # Don't include no-connection class in micro-average mask = np.ones(classes, dtype=bool) mask[mask.size//2 - 1 + mask.size%2] = 0 precision['micro-avg'], recall['micro-avg'], _ = precision_recall_curve( y_true[:,mask].ravel(), y_pred[:,mask].ravel()) return precision, recall
def evaluate_multiple(ground_truths, prediction_scores, compute_micro_macro_avg=False): """ :param ground_truths: 1-d array annotated with class labels start from 0, e.g. gt: [0, 0, 1, 3, 2, 1, 0] :param prediction_scores: 2-d array recorded the corresponding probability scores for each class :param compute_micro_macro_avg: switch if the micro and macro average roc are needed :return: Dictory with number of class: false_positive_rates, true_positive_rates, thresholds, roc_aucs """ # Check dimension if len(prediction_scores.shape) != 2: print 'The dimension of \'prediction_scores\' should be 2.' return N = prediction_scores.shape[0] M = prediction_scores.shape[1] precisions = {} recalls = {} thresholds = {} avg_precisions = {} if compute_micro_macro_avg: gt_label_array = [] prediction_score_array = [] for class_label in range(0, M): # Generate Class Label ground_truth_label = np.zeros(N, dtype=int) idx = (ground_truths == class_label) ground_truth_label[idx] = 1 # Extract positive scores prediction_score = prediction_scores[:, class_label] # Compute ROC curve precision, recall, threshold = precision_recall_curve(ground_truth_label, prediction_score) avg_precision = average_precision_score(ground_truth_label, prediction_score) precisions[class_label] = precision recalls[class_label] = recall thresholds[class_label] = threshold avg_precisions[class_label] = avg_precision if compute_micro_macro_avg: gt_label_array.append(ground_truth_label) prediction_score_array.append(prediction_score) if compute_micro_macro_avg: gt_label_array = np.asarray(gt_label_array) prediction_score_array = np.asarray(prediction_score_array) # Compute Micro Avg. precisions["micro"], recalls["micro"], _ = precision_recall_curve(gt_label_array.ravel(), prediction_score_array.ravel()) avg_precisions["micro"] = average_precision_score(gt_label_array, prediction_score_array, average="micro") return precisions, recalls, thresholds, avg_precisions
def calc_auc(model, y_test, y_score, auctype = "ROC"): y_score = 1 / ( 1 + np.exp(-y_score) ) # sigmoid it! n_classes = y_test.shape[1] # 164 if auctype == "ROC": fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) return roc_auc["micro"] elif auctype == "PR": prec = dict() rec = dict() pr_auc = dict() for i in range(n_classes): prec[i], rec[i], _ = precision_recall_curve(y_test[:,i], y_score[:,i]) pr_auc[i] = auc(rec[i], prec[i]) # Compute micro-average prec-rec curve and prec-rec AUC prec["micro"], rec["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) pr_auc["micro"] = auc(rec["micro"], prec["micro"]) return pr_auc["micro"]
def prc_curve(targets_ts, scores_ts, targets_tr, scores_tr, model_no): plt.clf() colors = ['r', 'g', 'b', 'y', 'k', 'm'] classes = ['lunge', 'wing_threat', 'charge', 'hold', 'tussle', 'other'] for i in range(NUM_CLASSES): i = 5 precision_ts, recall_ts, thresholds_ts = precision_recall_curve(targets_ts[:,i], scores_ts[:,i], pos_label=1) precision_tr, recall_tr, thresholds = precision_recall_curve(targets_tr[:,i], scores_tr[:,i], pos_label=1) area_ts = auc(recall_ts, precision_ts) area_tr = auc(recall_tr, precision_tr) test_i, f1_ts = compute_f1(precision_ts, recall_ts) train_i, f1_tr = compute_f1(precision_tr, recall_tr) print thresholds_ts[train_i] plt.plot(recall_ts, precision_ts, '--',label="%s test AUC: %0.3f f1: %0.3f" %(classes[i], area_ts, f1_ts), color=colors[i]) plt.plot(recall_tr, precision_tr, label="%s train AUC: %0.3f f1: %0.3f" %(classes[i],area_tr, f1_tr), color=colors[i]) break plt.title('Precision Recall of MC Model ' + model_no) plt.xlabel("Recall") plt.ylabel("Precision") plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.legend(loc="lower left", prop={'size':8}) plt.grid(b=True, which='major') figure = plt.gcf() figure.set_size_inches(8, 6) plt.savefig('PRC_mc_model' + model_no +'.png')
def plotPrecisionRecall(learner, learner_name, testFeatures, testAnswers): print 'Plotting Precision and Recall for ' + learner_name precDown, recDown, thrDown = precision_recall_curve(testAnswers == 0, learner.predict_proba(testFeatures)[:, 0]) precUp, recUp, thrUp = precision_recall_curve(testAnswers == 1, learner.predict_proba(testFeatures)[:, 1]) plotLines([[recDown], [precDown]], learner_name + ': Precision vs Recall(Down)', 'Recall', 'Precision') plotLines([[recUp], [precUp]], learner_name + ': Precision vs Recall(Up)', 'Recall', 'Precision') return (precDown, recDown, thrDown, precUp, recUp, thrUp)
def fscore(y_test, y_score): """ :param y_test: output vector - predictions on the test set :param y_score: output vector which contains probabilities for each contained estimator :return: plot object """ # binarize output vector y_test = binarize(y_test) print('y_test binarized shape = ', np.shape(y_test)) n_classes = np.shape(y_test)[1] # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() return plt
def plotCurve(arr): X = arr[:, :-1] y = arr[:, -1] # Binarize the output y = label_binarize(y, classes=[0,1]) n_classes = y.shape[1] # Add noisy features random_state = np.random.RandomState(0) n_samples, n_features = X.shape X = np.c_[X, random_state.randn(n_samples, 150 * n_features)] # Split into training and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state) # Run classifier classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') print(recall) print(precision) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.05]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})'''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})'''.format(i, average_precision[i])) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show()
def fscore_plot(classifier, X_test, y_test): # Binarize the output n_classes = max(y_test) - min(y_test) + 1 y_test = label_binarize(y_test, classes=list(range(0,n_classes))) y_score = classifier.predict_proba(X_test) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() return plt
def _test_precision_recall_curve(y_true, probas_pred): """Test Precision-Recall and aread under PR curve""" p, r, thresholds = precision_recall_curve(y_true, probas_pred) precision_recall_auc = auc(r, p) assert_array_almost_equal(precision_recall_auc, 0.85, 2) assert_array_almost_equal(precision_recall_auc, average_precision_score(y_true, probas_pred)) # Smoke test in the case of proba having only one value p, r, thresholds = precision_recall_curve(y_true, np.zeros_like(probas_pred)) precision_recall_auc = auc(r, p) assert_array_almost_equal(precision_recall_auc, 0.75, 3)
def test_precision_recall_curve_pos_label(): y_true, _, probas_pred = make_prediction(binary=False) pos_label = 2 p, r, thresholds = precision_recall_curve(y_true, probas_pred[:, pos_label], pos_label=pos_label) p2, r2, thresholds2 = precision_recall_curve(y_true == pos_label, probas_pred[:, pos_label]) assert_array_almost_equal(p, p2) assert_array_almost_equal(r, r2) assert_array_almost_equal(thresholds, thresholds2) assert_equal(p.size, r.size) assert_equal(p.size, thresholds.size + 1)
def precision_recall(predictions): prc_logreg = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 1]]) prc_svm = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 2]]) prc_knn = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 3]]) prc_tree = precision_recall_curve([int(y) for y in predictions[:, 0]], [float(w) for w in predictions[:, 4]]) print('logreg => %s' % round(max(prc_logreg[0][prc_logreg[1] >= 0.7]), 2)) print('svm => %s' % round(max(prc_svm[0][prc_svm[1] >= 0.7]), 2)) print('knn => %s' % round(max(prc_knn[0][prc_knn[1] >= 0.7]), 2)) print('tree => %s' % round(max(prc_tree[0][prc_tree[1] >= 0.7]), 2))
def compute_auc_prc( A_true, W_xcorr=None, bfgs_model=None, sgd_model=None, gibbs_samples=None, vb_models=None, svi_models=None, average="macro", ): """ Compute the AUC of the precision recall curve :return: """ A_flat = A_true.ravel() aucs = {} precs = {} recalls = {} if W_xcorr is not None: aucs["xcorr"] = average_precision_score(A_flat, W_xcorr.ravel(), average=average) precs["xcorr"], recalls["xcorr"], _ = precision_recall_curve(A_flat, W_xcorr.ravel()) if bfgs_model is not None: assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel) W_bfgs = bfgs_model.W.copy() W_bfgs -= np.diag(np.diag(W_bfgs)) aucs["bfgs"] = average_precision_score(A_flat, W_bfgs.ravel(), average=average) precs["bfgs"], recalls["bfgs"], _ = precision_recall_curve(A_flat, W_bfgs.ravel()) if sgd_model is not None: assert isinstance(sgd_model, DiscreteTimeStandardHawkesModel) aucs["sgd"] = average_precision_score(A_flat, sgd_model.W.ravel(), average=average) # precs['sgd'], recalls['sgd'], _ = precision_recall_curve(A_flat, W_sgd.ravel()) if gibbs_samples is not None: # Compute ROC based on mean value of W_effective in second half of samples Weff_samples = np.array([s.weight_model.W_effective for s in gibbs_samples]) N_samples = Weff_samples.shape[0] offset = N_samples // 2 Weff_mean = Weff_samples[offset:, :, :].mean(axis=0) aucs["gibbs"] = average_precision_score(A_flat, Weff_mean.ravel(), average=average) if vb_models is not None: # Compute ROC based on E[A] under variational posterior aucs["vb"] = average_precision_score(A_flat, vb_models[-1].weight_model.expected_A().ravel(), average=average) if svi_models is not None: # Compute ROC based on E[A] under variational posterior W_svi = svi_models[-1].weight_model.expected_W() aucs["svi"] = average_precision_score(A_flat, W_svi.ravel(), average=average) precs["svi"], recalls["svi"], _ = precision_recall_curve(A_flat, W_svi.ravel()) return aucs, precs, recalls
def wiggle_room_precision_recall(pred, boundary, margin=2, connectivity=1): struct = nd.generate_binary_structure(boundary.ndim, connectivity) gtd = nd.binary_dilation(boundary, struct, margin) struct_m = nd.iterate_structure(struct, margin) pred_dil = nd.grey_dilation(pred, footprint=struct_m) missing = np.setdiff1d(np.unique(pred), np.unique(pred_dil)) for m in missing: pred_dil.ravel()[np.flatnonzero(pred==m)[0]] = m prec, _, ts = precision_recall_curve(gtd.ravel(), pred.ravel()) _, rec, _ = precision_recall_curve(boundary.ravel(), pred_dil.ravel()) return zip(ts, prec, rec)
def create_all_eval_results(y_true, y_pred, key,result_type,features_type,num_of_bugs,num_of_all_instances,bugs_Precent,from_model,system_features): precision_bugged = metrics.precision_score(y_true,y_pred,pos_label=1,average='binary') recall_bugged = metrics.recall_score(y_true,y_pred,pos_label=1,average='binary') f_measure_bugged = metrics.f1_score(y_true,y_pred,pos_label=1,average='binary') f2_measure_bugged = calculateF2(precision_bugged,recall_bugged) un_true,_ = np.unique(y_true, return_counts=True) un_pred,_ = np.unique(y_pred, return_counts=True) if len(un_true) ==1 or len(un_pred)==1: roc_bugged = '?' prc_bugged = '?' print("zero") else: try: roc_bugged = metrics.roc_auc_score(y_true,y_pred,average=None) except: print("exception_roc") roc_bugged = '?' try: precision, recall, thresholds = metrics.precision_recall_curve(y_true, y_pred,pos_label=1) prc_bugged = metrics.auc(precision, recall) except: print("exception_prc") prc_bugged = '?' precision_all = metrics.precision_score(y_true, y_pred, average='weighted') recall_all = metrics.recall_score(y_true, y_pred, average='weighted') f_measure_all = metrics.f1_score(y_true, y_pred,average='weighted') f2_measure_all = calculateF2(precision_all, recall_all) if len(un_true) ==1 or len(un_pred)==1: roc_all = 0 prc_all = 1 print("zero") else: try: roc_all = metrics.roc_auc_score(y_true, y_pred, average='weighted') except: print("exception_roc") roc_all = 0 try: precision, recall, thresholds = metrics.precision_recall_curve(y_true, y_pred) prc_all = metrics.auc(recall, precision) except: print("exception_prc") prc_all = 1 global results_all_projects results_all_projects.loc[len(results_all_projects)] = [key, from_model ,result_type, features_type, "record-sensitive",str(system_features), precision_bugged, recall_bugged, f_measure_bugged, f2_measure_bugged, roc_bugged, prc_bugged, precision_all, recall_all, f_measure_all, f2_measure_all, roc_all, prc_all, num_of_bugs, num_of_all_instances, bugs_Precent]
def evaluate_classifier(X_train, X_test, y_train, y_test): ''' Run multiple times with different classifiers to get an idea of the relative performance of each configuration. Returns a sequence of tuples containing: (title, precision, recall) for each learner. ''' # Import some classifiers to test from sklearn.svm import LinearSVC, NuSVC from sklearn.ensemble import AdaBoostClassifier # We will calculate the P-R curve for each classifier from sklearn.metrics import precision_recall_curve, f1_score # Here we create classifiers with default parameters. These need # to be adjusted to obtain optimal performance on your data set. # Test the linear support vector classifier classifier = LinearSVC(C=1) # Fit the classifier classifier.fit(X_train, y_train) score = f1_score(y_test, classifier.predict(X_test)) # Generate the P-R curve y_prob = classifier.decision_function(X_test) precision, recall, _ = precision_recall_curve(y_test, y_prob) # Include the score in the title yield 'Linear SVC (F1 score={:.3f})'.format(score), precision, recall # Test the Nu support vector classifier classifier = NuSVC(kernel='rbf', nu=0.5, gamma=1e-3) # Fit the classifier classifier.fit(X_train, y_train) score = f1_score(y_test, classifier.predict(X_test)) # Generate the P-R curve y_prob = classifier.decision_function(X_test) precision, recall, _ = precision_recall_curve(y_test, y_prob) # Include the score in the title yield 'NuSVC (F1 score={:.3f})'.format(score), precision, recall # Test the Ada boost classifier classifier = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, algorithm='SAMME.R') # Fit the classifier classifier.fit(X_train, y_train) score = f1_score(y_test, classifier.predict(X_test)) # Generate the P-R curve y_prob = classifier.decision_function(X_test) precision, recall, _ = precision_recall_curve(y_test, y_prob) # Include the score in the title yield 'Ada Boost (F1 score={:.3f})'.format(score), precision, recall
def eval_measures(self, x, fwd, ninputs, prefix, logging, rng=42): xp, yp, xn, yn = [], [], [], [] xp_raw, yp_raw, xn_raw, yn_raw = [], [], [], [] print "Evaluation on %i mini-batches" % len(x) for i in xrange(len(x)): xp.append(fwd(*x[i][:ninputs])) yp.append(fwd(*x[i][ninputs:2*ninputs])) xn.append(fwd(*x[i][2*ninputs:3*ninputs])) yn.append(fwd(*x[i][3*ninputs:])) xp_raw.append(x[i][0]) yp_raw.append(x[i][ninputs]) xn_raw.append(x[i][2*ninputs]) yn_raw.append(x[i][3*ninputs]) xp = np.concatenate(xp, axis=0) yp = np.concatenate(yp, axis=0) xn = np.concatenate(xn, axis=0) yn = np.concatenate(yn, axis=0) xp_raw = np.concatenate(xp_raw, axis=0) yp_raw = np.concatenate(yp_raw, axis=0) xn_raw = np.concatenate(xn_raw, axis=0) yn_raw = np.concatenate(yn_raw, axis=0) np.random.seed(rng) dp_raw = np.sqrt(np.sum((xp_raw - yp_raw)**2., axis=1)) dn_raw = np.sqrt(np.sum((xn_raw - yn_raw)**2., axis=1)) D_raw = np.concatenate([dp_raw, dn_raw], axis=0) dp = np.sqrt(np.sum((xp - yp)**2., axis=1)) dn = np.sqrt(np.sum((xn - yn)**2., axis=1)) D = np.concatenate([dp, dn], axis=0) dp_bin = np.sqrt(np.sum((np.float32(xp > 0.) - np.float32(yp > 0.))**2., axis=1)) dn_bin = np.sqrt(np.sum((np.float32(xn > 0.) - np.float32(yn > 0.))**2., axis=1)) D_bin = np.concatenate([dp_bin, dn_bin], axis=0) y_true = np.zeros((D.shape[0], )) - 1. y_true[:dp.shape[0]] = 1. AUC = roc_auc_score(y_true, -D) AUC_raw = roc_auc_score(y_true, -D_raw) AUC_bin = roc_auc_score(y_true, -D_bin) precision, recall, thresholds = precision_recall_curve(y_true, -D) precision_raw, recall_raw, thresholds_raw = precision_recall_curve(y_true, -D_raw) precision_bin, recall_bin, thresholds_bin = precision_recall_curve(y_true, -D_bin) logging.info("...NN AUC (bin) %f (%f), RAW AUC %f", AUC, AUC_bin, AUC_raw) return AUC, precision, recall
def prPlot(ytest, yprob, yprob2=None, method1="", method2=""): pl.clf() auc = prAUC(ytest, yprob) precision, recall, th = metrics.precision_recall_curve(ytest, yprob) pl.plot(recall, precision, label='Precision-Recall: %s (%0.3f)' % (method1, auc) ) if yprob2 is not None: pr2, rc2, t2 = metrics.precision_recall_curve(ytest, yprob2) pl.plot(rc2, pr2, 'r', label='Precision-Recall: %s (%0.3f)' % (method2, prAUC(ytest, yprob2)) ) pl.xlabel('Recall') pl.ylabel('Precision') pl.ylim([0.0, 1.05]) pl.xlim([0.0, 1.0]) pl.title('Precision-Recall curve' % auc) pl.legend(loc="lower left") pl.show()
def plot_precision_recall_charts(df_result, classifiers, feature_sets, grouping, graph_files_dir, save=False): if grouping == 'fset': for fset in feature_sets: fig, axes = plt.subplots(1, 1, figsize=(10, 7)) for clf in classifiers: try: df = df_result[(df_result.model_name == clf) & (df_result.fset_name == fset)] precision, recall, thresholds = precision_recall_curve(df.target_ind, df.pred_1_prob) axes.plot(recall, precision, label=clf) except: pass plt.title('Feature Set: ' + fset + '\nPrecision Recall Chart') plt.grid(True) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xticks(np.arange(0, 1.1, 0.1)) plt.xlabel('Recall') plt.ylabel('Precision') plt.legend(loc='lower left') leg = plt.gca().get_legend() leg.set_title('Classification Model') ltext = leg.get_texts() plt.setp(ltext, fontsize='small') if save: plt.savefig(graph_files_dir + 'pr chart - ' + fset + '.png') if grouping == 'clf': for clf in classifiers: fig, axes = plt.subplots(1, 1, figsize=(10, 7)) for fset in feature_sets: try: df = df_result[(df_result.model_name == clf) & (df_result.fset_name == fset)] precision, recall, thresholds = precision_recall_curve(df.target_ind, df.pred_1_prob) axes.plot(recall, precision, label=fset) except: pass plt.title('Classification Model: ' + clf + '\nPrecision Recall Chart') plt.grid(True) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xticks(np.arange(0, 1.1, 0.1)) plt.xlabel('Recall') plt.ylabel('Precision') plt.legend(loc='lower left') leg = plt.gca().get_legend() leg.set_title('Feature Set') ltext = leg.get_texts() plt.setp(ltext, fontsize='small') if save: plt.savefig(graph_files_dir + 'pr chart - ' + clf + '.png')
def PR_multi_class(data_train, data_test, data_test_vectors): # Binarize the output y_train_label = label_binarize(data_train.target, classes=[0, 1, 2]) n_classes = y_train_label.shape[1] random_state = np.random.RandomState(0) # shuffle and split training and test sets X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5, random_state=random_state) # Learn to predict each class against the other classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state)) classifier.fit(X_train, y_train) y_pred_score = classifier.decision_function(data_test_vectors) y_test_label = label_binarize(data_test.target, classes=[0, 1, 2]) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i]) average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel()) average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro") # Plot Precision-Recall curve for each class plt.clf() # plt.plot(recall["micro"], precision["micro"], # label='micro-average PR curve (area = {0:0.2f})' # ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='PR curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall curve of multi-class') plt.legend(loc="lower right") plt.show() return 0
def calc_precision_recall_fmeasure(self): """ Computes Precision, Recall, F-measure and Support """ # precision, recall, F-measure and support for each class for a given thresholds for threshold in [10, 30, 50]: result = precision_recall_fscore_support(self.y_true, prediction_to_binary(self.y_pred, threshold)) self.scores['Precision ' + str(threshold) + '%'] = result[0] self.scores['Recall ' + str(threshold) + '%'] = result[1] self.scores['F-score ' + str(threshold) + '%'] = result[2] self.scores['Support'] = result[3] # Computes precision-recall pairs for different probability thresholds self.precision, self.recall, self.thresholds = precision_recall_curve(self.y_true, self.y_pred) #print "precision = " + str(precision) #print "recall = " + str(recall) #print "thresholds = " + str(thresholds) # Compute the area under the precision-recall curve (average precision from prediction scores) self.scores['Precision-Recall AUC'] = average_precision_score(self.y_true, self.y_pred) self.scores['Weighted Precision'] = average_precision_score(self.y_true, self.y_pred, average='weighted') # weighted average precision by support (the number of true instances for each label). self.scores['Average Recall'] = np.average(self.recall) self.scores['Average Threshold'] = np.average(self.thresholds) return
def _test_precision_recall_curve(y_true, probas_pred): # Test Precision-Recall and aread under PR curve p, r, thresholds = precision_recall_curve(y_true, probas_pred) precision_recall_auc = _average_precision_slow(y_true, probas_pred) assert_array_almost_equal(precision_recall_auc, 0.859, 3) assert_array_almost_equal(precision_recall_auc, average_precision_score(y_true, probas_pred)) assert_almost_equal(_average_precision(y_true, probas_pred), precision_recall_auc, decimal=3) assert_equal(p.size, r.size) assert_equal(p.size, thresholds.size + 1) # Smoke test in the case of proba having only one value p, r, thresholds = precision_recall_curve(y_true, np.zeros_like(probas_pred)) assert_equal(p.size, r.size) assert_equal(p.size, thresholds.size + 1)
def plot_precision_recall_n(y_true, y_prob, model_name): from sklearn.metrics import precision_recall_curve y_score = y_prob precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_true, y_score) precision_curve = precision_curve[:-1] recall_curve = recall_curve[:-1] pct_above_per_thresh = [] number_scored = len(y_score) for value in pr_thresholds: num_above_thresh = len(y_score[y_score>=value]) pct_above_thresh = num_above_thresh / float(number_scored) pct_above_per_thresh.append(pct_above_thresh) pct_above_per_thresh = np.array(pct_above_per_thresh) plt.clf() fig, ax1 = plt.subplots() ax1.plot(pct_above_per_thresh, precision_curve, 'b') ax1.set_xlabel('percent of population') ax1.set_ylabel('precision', color='b') ax2 = ax1.twinx() ax2.plot(pct_above_per_thresh, recall_curve, 'r') ax2.set_ylabel('recall', color='r') # fig.show() name = model_name + " Precision Recall vs Population for "+ ".png" plt.title(name) plt.savefig(name) return fig
def model(x_train, x_test, y_train, y_test,clf): print(clf) clf.fit(x_train, y_train) f = file(outpath,'a+') f.write(str(clf)) f.write("\n") ''''' 系数反映每个特征的影响力。越大表示该特征在分类中起到的作用越大 ''' # print(clf.feature_importances_) '''''测试结果的打印''' answer = clf.predict(x_test) # print(x_train) # print(answer) # print(y_train) avrage = np.mean(answer == y_test) f.write(str(avrage)) f.write("\n") '''''准确率与召回率''' precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test)) # print (precision) # answer = clf.predict_proba(x_trian)[:,1] # print answer # answer = answer > 0.3 report = classification_report(y_test, answer, target_names = ['女', '男']) print report f.write(str(report)) f.write("\n\n") f.close()
def plot_precision_recall_n(y_true, y_prob, model_name): ''' Takes the model, plots precision and recall curves ''' y_score = y_prob precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_true, y_score) precision_curve = precision_curve[:-1] recall_curve = recall_curve[:-1] pct_above_per_thresh = [] number_scored = len(y_score) for value in pr_thresholds: num_above_thresh = len(y_score[y_score >= value]) pct_above_thresh = num_above_thresh / float(number_scored) pct_above_per_thresh.append(pct_above_thresh) pct_above_per_thresh = np.array(pct_above_per_thresh) plt.clf() fig, ax1 = plt.subplots() ax1.plot(pct_above_per_thresh, precision_curve, 'b') ax1.set_xlabel('percent of population') ax1.set_ylabel('precision', color='b') ax2 = ax1.twinx() ax2.plot(pct_above_per_thresh, recall_curve, 'r') ax2.set_ylabel('recall', color='r') name = str(model_name) try: plt.title(name) plt.savefig("Output/Images/{}.png".format(name)) except: name = name[:75] plt.title(name) plt.savefig("Output/Images/{}.png".format(name)) plt.close()
def evaluation(self, test_data, test_label): dinx = np.array(list(self.train_drugs)) DS = self.dsMat[:, dinx] tinx = np.array(list(self.train_targets)) TS = self.tsMat[:, tinx] scores = [] if self.K2 > 0: for d, t in test_data: if d in self.train_drugs: if t in self.train_targets: val = np.sum(self.U[d, :]*self.V[t, :]) else: jj = np.argsort(TS[t, :])[::-1][:self.K2] val = np.sum(self.U[d, :]*np.dot(TS[t, jj], self.V[tinx[jj], :]))/np.sum(TS[t, jj]) else: if t in self.train_targets: ii = np.argsort(DS[d, :])[::-1][:self.K2] val = np.sum(np.dot(DS[d, ii], self.U[dinx[ii], :])*self.V[t, :])/np.sum(DS[d, ii]) else: ii = np.argsort(DS[d, :])[::-1][:self.K2] jj = np.argsort(TS[t, :])[::-1][:self.K2] v1 = DS[d, ii].dot(self.U[dinx[ii], :])/np.sum(DS[d, ii]) v2 = TS[t, jj].dot(self.V[tinx[jj], :])/np.sum(TS[t, jj]) val = np.sum(v1*v2) scores.append(np.exp(val)/(1+np.exp(val))) elif self.K2 == 0: for d, t in test_data: val = np.sum(self.U[d, :]*self.V[t, :]) scores.append(np.exp(val)/(1+np.exp(val))) prec, rec, thr = precision_recall_curve(test_label, np.array(scores)) aupr_val = auc(rec, prec) fpr, tpr, thr = roc_curve(test_label, np.array(scores)) auc_val = auc(fpr, tpr) return aupr_val, auc_val
def two_class_combo_plotter(y_test, y_score): plt.figure() y_test = one_hot(y_test) fpr, tpr, thresholds = roc_curve(y_test[:, 1], y_score[:, 1]) for i, t in enumerate(thresholds): if t < 0.425: print(t, fpr[i], tpr[i]) break roc_auc = auc(fpr, tpr) plt.plot(thresholds, fpr, label="FPR") plt.plot(thresholds, tpr, label="TPR / Recall") precision, recall, thresholds = precision_recall_curve(y_test[:, 1], y_score[:, 1]) for i, t in enumerate(thresholds): if t > 0.425: print(t, precision[i], recall[i]) break plt.plot(thresholds, precision[1:], label="Precision") plt.plot(thresholds, [f1(precision[i], recall[i]) for i in range(len(thresholds))], label="F1") plt.plot([0.408] * 100, np.arange(0, 1, 0.01), "k--") plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("Threshold") plt.legend(loc="lower right")
def train_and_evaluate_tree_model(X_train, X_test, y_train, y_test, show_plots=False): # Train model print 'training model...' a = time.clock() model = ensemble.GradientBoostingClassifier(n_estimators=100, max_depth=15, max_features='sqrt') model.fit(X_train, y_train) b = time.clock() print 'training took', (b - a) / 60, 'minutes' # Evaluate model print 'Model score (accuracy):', model.score(X_test, y_test) predicted = model.predict(X_test) predicted_probs = model.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(y_test, predicted_probs[:,1]) if show_plots: plot_auc(fpr, tpr) print 'AUC', roc_auc_score(y_test, predicted_probs[:,1]) precision, recall, thresholds = ( precision_recall_curve(y_test, predicted_probs[:,1])) both = zip(precision, recall) print 'Max min of P/Se', max([min(r) for r in both]) if show_plots: plot_precision_recall(precision, recall)
# ROC曲线下面积 fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) # test_integral = np.trapz(tpr,fpr) #trapz is numpy integration print("\nArea under the ROC curve: " + str(AUC_ROC)) roc_curve = plt.figure() plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(path_experiment + "ROC.png") #Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr( [precision])[0] #so the array is increasing (you won't get negative AUC) recall = np.fliplr( [recall])[0] #so the array is increasing (you won't get negative AUC) AUC_prec_rec = np.trapz(precision, recall) print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec)) prec_rec_curve = plt.figure() plt.plot(recall, precision, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec) plt.title('Precision - Recall curve') plt.xlabel("Recall") plt.ylabel("Precision") plt.legend(loc="lower right")
def plot_precision_recall_curve(y_true, y_probas, title='Precision-Recall Curve', curves=('micro', 'each_class'), ax=None, figsize=None, title_fontsize="large", text_fontsize="medium"): """Generates the Precision Recall Curve for a set of ground truth labels and classifier probability predictions. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_probas (array-like, shape (n_samples, n_classes)): Prediction probabilities for each class returned by a classifier. curves (array-like): A listing of which curves should be plotted on the resulting plot. Defaults to `("micro", "each_class")` i.e. "micro" for micro-averaged curve ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> import scikitplot.plotters as skplt >>> nb = GaussianNB() >>> nb = nb.fit(X_train, y_train) >>> y_probas = nb.predict_proba(X_test) >>> skplt.plot_precision_recall_curve(y_test, y_probas) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_precision_recall_curve.png :align: center :alt: Precision Recall Curve """ classes = np.unique(y_true) probas = y_probas if 'micro' not in curves and 'each_class' not in curves: raise ValueError( 'Invalid argument for curves as it only takes "micro" or "each_class"' ) # Compute Precision-Recall curve and area for each class precision = dict() recall = dict() average_precision = dict() for i in range(len(classes)): precision[i], recall[i], _ = precision_recall_curve( y_true, probas[:, i], pos_label=classes[i]) y_true = label_binarize(y_true, classes=classes) if len(classes) == 2: y_true = np.hstack((1 - y_true, y_true)) for i in range(len(classes)): average_precision[i] = average_precision_score(y_true[:, i], probas[:, i]) # Compute micro-average ROC curve and ROC area micro_key = 'micro' i = 0 while micro_key in precision: i += 1 micro_key += str(i) precision[micro_key], recall[micro_key], _ = precision_recall_curve( y_true.ravel(), probas.ravel()) average_precision[micro_key] = average_precision_score(y_true, probas, average='micro') if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ax.set_title(title, fontsize=title_fontsize) if 'each_class' in curves: for i in range(len(classes)): ax.plot(recall[i], precision[i], lw=2, label='Precision-recall curve of class {0} ' '(area = {1:0.3f})'.format(classes[i], average_precision[i])) if 'micro' in curves: ax.plot(recall[micro_key], precision[micro_key], label='micro-average Precision-recall curve ' '(area = {0:0.3f})'.format(average_precision[micro_key]), color='navy', linestyle=':', linewidth=4) ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.tick_params(labelsize=text_fontsize) ax.legend(loc='best', fontsize=text_fontsize) return ax
plt.title('Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.savefig('roc.png') # ------------------------------------------------------------- # # ----------------------- PR CURVE --------------------------- # # ------------------------------------------------------------- # plt.figure(3) # precision recall curve precision = dict() recall = dict() pr_auc = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(dummy_y_Test[:, i], predictions[:, i]) pr_auc[i] = auc(recall[i], precision[i]) colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(recall[i], precision[i], color=color, lw=lw, label='PR curve of class {0} (area = {1:0.2f})' ''.format(class_names[i], pr_auc[i])) # plt.plot([1, 0], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel("recall") plt.ylabel("precision") plt.legend(loc="best") plt.title("precision vs. recall curve")
print(confusion_matrix(label__test, predictions)) print(classification_report(label__test, predictions)) accuracy = (accuracy_score(label__test, predictions, normalize=True, sample_weight=None) * 100) print("Accuracy:" + str(accuracy) + "%") # Tree Visualization iris = load_iris() clf = DecisionTreeClassifier() clf.fit(iris.data, iris.target) dot_data = tree.export_graphviz(clf, out_file=None, feature_names=iris.feature_names, class_names=iris.target_names, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) # Precision Recall Curve average_precision = average_precision_score(label__test, predictions) precision, recall, _ = precision_recall_curve(label__test, predictions) step_kwargs = ({'step':'post'} if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision)) plt.show()
def train_model(clf_factory, X, Y, name, plot=False): labels = np.unique(Y) # cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, indices=True, random_state=0) cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, random_state=0) train_errors = [] test_errors = [] scores = [] pr_scores = defaultdict(list) precisions, recalls, thresholds = defaultdict(list), defaultdict( list), defaultdict(list) roc_scores = defaultdict(list) tprs = defaultdict(list) fprs = defaultdict(list) clfs = [] # just to later get the median cms = [] for train, test in cv: X_train, y_train = X[train], Y[train] X_test, y_test = X[test], Y[test] clf = clf_factory() clf.fit(X_train, y_train) # todo 维度不一致 clfs.append(clf) train_score = clf.score(X_train, y_train) test_score = clf.score(X_test, y_test) scores.append(test_score) train_errors.append(1 - train_score) test_errors.append(1 - test_score) y_pred = clf.predict(X_test) cm = confusion_matrix(y_test, y_pred) cms.append(cm) for label in labels: y_label_test = np.asarray(y_test == label, dtype=int) proba = clf.predict_proba(X_test) proba_label = proba[:, label] precision, recall, pr_thresholds = precision_recall_curve( y_label_test, proba_label) pr_scores[label].append(auc(recall, precision)) precisions[label].append(precision) recalls[label].append(recall) thresholds[label].append(pr_thresholds) fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label) roc_scores[label].append(auc(fpr, tpr)) tprs[label].append(tpr) fprs[label].append(fpr) if plot: for label in labels: print("Plotting", genre_list[label]) scores_to_sort = roc_scores[label] median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2] desc = "%s %s" % (name, genre_list[label]) plot_pr(pr_scores[label][median], desc, precisions[label][median], recalls[label][median], label='%s vs rest' % genre_list[label]) plot_roc(roc_scores[label][median], desc, tprs[label][median], fprs[label][median], label='%s vs rest' % genre_list[label]) all_pr_scores = np.asarray(pr_scores.values()).flatten() summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores)) print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary) return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
def pr_auc_score(y_true, y_score): precision, recall, thresholds = precision_recall_curve(y_true, y_score) return auc(recall, precision)
# to avoid exploring the regime in which very noisy variables enter # the model alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6) clf = RandomizedLasso(alpha=alphas, random_state=42).fit(X, y) trees = ExtraTreesRegressor(100).fit(X, y) # Compare with F-score F, _ = f_regression(X, y) plt.figure() for name, score in [ ('F-test', F), ('Stability selection', clf.scores_), ('Lasso coefs', np.abs(lars_cv.coef_)), ('Trees', trees.feature_importances_), ]: precision, recall, thresholds = precision_recall_curve( coef != 0, score) plt.semilogy(np.maximum(score / np.max(score), 1e-4), label="%s. AUC: %.3f" % (name, auc(recall, precision))) plt.plot(np.where(coef != 0)[0], [2e-4] * n_relevant_features, 'mo', label="Ground truth") plt.xlabel("Features") plt.ylabel("Score") # Plot only the 100 first coefficients plt.xlim(0, 100) plt.legend(loc='best') plt.title('Feature selection scores - Mutual incoherence: %.1f' % mi) plt.show()
random_state=0) y_pred = y_test.value_counts().idxmax() total = len(y_test.index) correct = y_test.value_counts().max() y_pred = pd.Series([1] + [y_pred] * len(y_test - 1)) #y_pred = pd.Series([y_pred] * len(y_test - 1)) df_confusion = pd.crosstab(y_test, y_pred) df_norm = df_confusion.values / df_confusion.sum(axis=1)[:, None] ax = sn.heatmap(df_confusion, annot=True, annot_kws={"size": 20}, cmap="YlGnBu") plt.xlabel('Predicted label', fontsize=20) plt.ylabel('True label', fontsize=20) plt.title('Confusion Matrix w/o Normalization (baseline)', fontsize=20) plt.show() precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred) score = correct / total average_precision = metrics.average_precision_score(y_test, y_pred) auc = metrics.auc(recall, precision) print("Majority predicts {}, score: {}".format(y_pred[0], score)) print("Average PR score: {0:0.2f}".format(average_precision)) print("AUC: {0:0.2f}".format(auc)) plt.plot([0, 1], [0.5, 0.5], linestyle='--') plt.plot(recall, precision, marker='.') plt.xlabel('Recall', fontsize=20) plt.ylabel('Precision', fontsize=20) plt.title('Precision-Recall Curve Baseline', fontsize=20) #plt.show()
decoder = Dense(hidden_sizes[2], activation='relu')(encoder) decoder = Dense(input_size)(decoder) deep_ae = Model(inputs=input_layer, outputs=decoder) print(deep_ae.summary()) optimizer = optimizers.Adam(lr=0.00005) deep_ae.compile(optimizer=optimizer, loss='mean_squared_error') tensorboard = TensorBoard(log_dir='./logs/run2/', write_graph=True, write_images=False) model_file = "model_deep_ae.h5" checkpoint = ModelCheckpoint(model_file, monitor='loss', verbose=1, save_best_only=True, mode='min') num_epoch = 50 batch_size = 64 deep_ae.fit(X_train, X_train, epochs=num_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_test, X_test), verbose=1, callbacks=[checkpoint, tensorboard]) recon = deep_ae.predict(X_test) recon_error = np.mean(np.power(X_test - recon, 2), axis=1) from sklearn.metrics import (precision_recall_curve, auc) precision, recall, th = precision_recall_curve(Y_test, recon_error) area = auc(recall, precision) print('Area under precision-recall curve:', area)
y_pred_task[i].extend(pred_lable) # y_pred_task_score[i].extend(y_pred) except: y_true_task[i] = [] y_pred_task[i] = [] # y_pred_task_score[i] = [] y_true_task[i].extend(y_label.cpu().numpy()) y_pred_task[i].extend(pred_lable) # y_pred_task_score[i].extend(y_pred.cpu().detach().numpy()) val_sum_loss.append(loss.cpu().detach().numpy()) val_avg_loss = np.array(val_sum_loss).mean() trn_roc = [metrics.roc_auc_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)] trn_prc = [metrics.auc(precision_recall_curve(y_true_task[i], y_pred_task[i])[1], precision_recall_curve(y_true_task[i], y_pred_task[i])[0]) for i in range(tasks_num)] # acc = [metrics.accuracy_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)] # recall = [metrics.recall_score(y_true_task[i], y_pred_task[i]) for i in range(tasks_num)] # specificity = [cm[i][0, 0] / (cm[i][0, 0] + cm[i][0, 1]) for i in range(tasks_num)] print("epoch:", epoch, " val " "avg_loss:", val_avg_loss, # "acc: ", np.array(acc).mean(), # "recall: ", np.array(recall).mean(), # "specificity: ", np.array(specificity).mean(), # " val_auc: ", trn_roc, " val_auc: ", np.array(trn_roc).mean(), # " val_pr: ", trn_prc, " val_pr: ", np.array(trn_prc).mean())
auc = roc_auc_score(y_test, probs) print('AUC: %.3f' % auc) # calculate roc curve fpr, tpr, thresholds = roc_curve(y_test, probs) # plot no skill pyplot.plot([0, 1], [0, 1], linestyle='--') # plot the roc curve for the model pyplot.plot(fpr, tpr, marker='.') pyplot.xlabel('FP RATE') pyplot.ylabel('TP RATE') # show the plot pyplot.show()""" average_precision = average_precision_score(y_test, rounded) precision, recall, _ = precision_recall_curve(y_test, rounded) """ # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision)) plt.show()"""
clf = CV_rfc.best_estimator_ n_trials = 50 test_size_percent = 0.1 signals = X labels = y plot_data = [] train_signals, test_signals, train_labels, test_labels = train_test_split( signals, labels, test_size=test_size_percent) clf.fit(train_signals, train_labels) predictions = clf.predict_proba(test_signals)[:, 1] precision, recall, thresholds = precision_recall_curve(test_labels, predictions) thresholds = np.append(thresholds, 1) queue_rate = [] for threshold in thresholds: queue_rate.append((predictions >= threshold).mean()) plt.plot(thresholds, precision, color=sns.color_palette()[0]) plt.plot(thresholds, recall, color=sns.color_palette()[1]) plt.plot(thresholds, queue_rate, color=sns.color_palette()[2]) leg = plt.legend(('precision', 'recall', 'queue_rate'), frameon=True) leg.get_frame().set_edgecolor('k') plt.xlabel('threshold') plt.ylabel('%')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state) # Run classifier classifier = OneVsRestClassifier( svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve( y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve( y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], lw=lw, color='navy',
def AUPR(correct_label, predict_score): precision, recall, _ = precision_recall_curve(correct_label, predict_score) aupr = auc(recall, precision) return aupr
ax1.set_ylabel('Precision') ax1.set_title('PR Curve') ax2 = fig.add_subplot(1,2,2) ax2.set_xlim([-0.05,1.05]) ax2.set_ylim([-0.05,1.05]) ax2.set_xlabel('False Positive Rate') ax2.set_ylabel('True Positive Rate') ax2.set_title('ROC Curve') for c,k in zip([0.0001, 0.001, 0.1, 1, 10, 25, 50, 100],'bgrcmywk'): lsvm_ = svm.LinearSVC(C=c, dual=False, class_weight={1:1,0:1}) lsvm_.fit(dataset['X_train_'], dataset['y_train_']) y_pred = lsvm_.predict(dataset['X_test_']) p,r,_ = precision_recall_curve(dataset['y_test_'], y_pred) tpr,fpr,_ = roc_curve(dataset['y_test_'], y_pred) ax1.plot(r,p,c=k,label=c) ax2.plot(tpr,fpr,c=k,label=c) ax1.legend(loc='lower left') ax2.legend(loc='lower left') plt.show() # In[ ]: fig = plt.figure(figsize=(12,6)) ax1 = fig.add_subplot(1,2,1)
args.prediction_window = win_size print(args) X_train, X_test, y_train, y_test, pos_rate = prepare_data(df_static, df_dynamic, dynamic_feature, args=args) model = train_gbtree(X_train, y_train, pos_rate, args=args) # Testing y_prob = model.predict_proba(X_test)[:, 1] # Evaluation fpr, tpr, _ = metrics.roc_curve(y_test, y_prob) prec, rec, _ = metrics.precision_recall_curve(y_test, y_prob) (sensitivity, specificity, PPV, NPV, f1, acc), _ = line_search_best_metric(y_test, y_prob, spec_thresh=0.95) result_table = result_table.append( { 'window': win_size, 'fpr': fpr, 'tpr': tpr, 'roc': metrics.auc(fpr, tpr), 'prec': prec, 'rec': rec, 'prc': metrics.auc(rec, prec), 'y_test': y_test, 'y_prob': y_prob, 'pos_rate': pos_rate
testing_samples = pos + neg shuffle(testing_samples) features = [x[0] for x in testing_samples] gold = [x[1] for x in testing_samples] sia = SentimentIntensityAnalyzer() labels = [get_prob(sia.polarity_scores(x)) for x in features] # k fold accuracy = [] import matplotlib.pyplot as plt for i in range(10): testing_fold = testing_samples[i * 100:(i + 1) * 100] testing_labels=[x[1] for x in testing_fold] predicted_labels = labels[i * 100:(i + 1) * 100] precision, recall, _ = precision_recall_curve(testing_labels,predicted_labels) print (len(predicted_labels)) lab = 'Fold %d AUC=%.4f' % (i + 1, auc(recall, precision)) plt.step(recall, precision, label=lab) accuracy.append(eval(predicted_labels, testing_fold)) plt.legend(loc='lower left', fontsize='small') plt.xlabel('Precision') plt.ylabel('Recall') plt.title('10 Fold Cross-Validation P-R Curve') # plt.show() plt.savefig('pr.png') # fig, ax = plt.subplots() # ax.plot([1,2,3],'ko-',label='line1') # ax.plot([2,4,3],'ro-',label='lin2') # ax.plot([1,5,9],'bo-',label='lin3') # ax.set_xticklabels(['','A','B','C',''])
print(len(x_train_undersampled)) print(len(y_train_undersampled)) modxtr = np.array(x_train_undersampled) modytr = np.array(y_train_undersampled) clf = RFC(n_estimators=100) svm_best_clf = clf.fit(modxtr, modytr) test_predictions_svm = svm_best_clf.predict(X1) test_predictions_svm_proba = svm_best_clf.predict_proba(X1) accuracy = accuracy_score(y1,test_predictions_svm) true_n, false_p, false_n, true_p = confusion_matrix( y1, test_predictions_svm).ravel() print(true_n, false_p, false_n, true_p) prec = precision_score(y1, test_predictions_svm) f1 = f1_score(y1, test_predictions_svm) sensitivity, specificity = compute_measures(true_p, false_p, false_n, true_n) print(sensitivity, specificity) average = (sensitivity + specificity) / 2 fpr, tpr, thresholds = roc_curve(y1, test_predictions_svm_proba[:, 1]) roc_auc1 = auc(fpr, tpr) precision, recall, thresholds = precision_recall_curve( y1, test_predictions_svm_proba[:, 1]) area = auc(recall, precision) print(sensitivity, specificity, average, roc_auc1, accuracy, precision, f1, area)
def evaluate_model(modelname, testX, testY, i, type): cnn = models.load_model(modelname) # cnn = models.load_model('%d-merge.h5' % i, {'isru': isru, 'pearson_r': pearson_r}) # ############### test ########################## pre_score = cnn.evaluate(testX, testY, batch_size=2048, verbose=0) # fileX = open('./fig1/%s' % type + '/without/pre_score%d.pickle' % i, 'wb') # pickle.dump(pre_score, fileX, protocol=4) # fileX.close() # 最后做对比图写出来 # ######### Print Precision and Recall ########## pred_proba = cnn.predict(testX, batch_size=2048) fileX = open('./fig1/%s' % type + '/without/pred_proba%d.pickle' % i, 'wb') pickle.dump(pred_proba, fileX, protocol=4) fileX.close() pred_score = pred_proba[:, 1] true_class = testY[:, 1] precision, recall, _ = precision_recall_curve(true_class, pred_score) average_precision = average_precision_score(true_class, pred_score) fpr, tpr, thresholds = roc_curve(true_class, pred_score) roc_auc = auc(fpr, tpr) for index in range(len(pred_score)): if pred_score[index] > config.getfloat('others', 'threshold'): pred_score[index] = 1 else: pred_score[index] = 0 mcc = matthews_corrcoef(true_class, pred_score) plt.figure() plt.step(recall, precision, color='navy', where='post') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.grid(True) plt.title('Precision-Recall curve: AP={0:0.2f}'.format(average_precision)) plt.savefig('./fig1/%s' % type + '/without/curve/' + str(type) + 'Precision-Recall%d.png' % i) # ################# Print ROC#################### plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='Inception ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.savefig('./fig1/%s' % type + '/without/curve/' + str(type) + 'ROC %d.png' % i) SN, SP = performance(true_class, pred_score) pre = precision_score(y_true=true_class, y_pred=pred_score) rec = recall_score(y_true=true_class, y_pred=pred_score) f1 = f1_score(y_true=true_class, y_pred=pred_score) # Sn和recall是同一个值 return pre_score, pre, rec, SN, SP, f1, mcc, roc_auc
confusion_train[1, 1] / (confusion_train[1, 1] + confusion_train[0, 1]) # In[100]: # calculate recall # TP/(TP+FN) confusion_train[1, 1] / (confusion_train[1, 1] + confusion_train[1, 0]) # Precision is less when compared to Recall # #### Precision and Recall tradeoff # In[101]: p, r, thres = precision_recall_curve(y_train_pred_final.Converted, y_train_pred_final.Converted_Prob) # In[102]: plt.plot(thres, p[:-1], "g-") plt.plot(thres, r[:-1], "r-") plt.show() # Based on the trade-off value 0.4 is chosen as the threshold for final prediction # **Making Final Predictions using `0.4` as the cut-off value** # In[103]: # Creating new column 'Final_Pred_PR' with 1 if Converted_Prob > 0.4 else 0 and evaluating the model y_train_pred_final['Final_Pred_PR'] = y_train_pred_final.Converted_Prob.map(
metrics.roc_auc_score(scores['true'], scores[c_name]) for c_name in column_names ] auc_rocs_with_names = list(zip(auc_rocs, column_names)) auc_rocs_with_names.sort(key=lambda x: x[0]) best_classifier = auc_rocs_with_names[-1][1] # Save the answer submission_file = open('submissions/metrics/auc_roc.txt', 'w+') submission_file.write(best_classifier) submission_file.close() print(best_classifier) # Find the best classifier, based on precision, when recall is more than 70% pr_curves = [ metrics.precision_recall_curve(scores['true'], scores[c_name]) for c_name in column_names ] precisions = list(range(len(pr_curves))) for index, pr_curve in enumerate(pr_curves): # loop through recall thresholds i = 0 while i < len(pr_curve[1]): if pr_curve[1][i] <= 0.7: break else: i += 1 # find the best precision for such recall precisions[index] = max(pr_curve[0][:i])
print(recall) print("FPR : ") FPR = conf[0,1]/(conf[0,1] + conf[0,0]) print(conf[0,1]/(conf[0,1] + conf[0,0])) #交叉评估 from sklearn.model_selection import cross_val_score score = cross_val_score(clf,TestData,TestLabel, cv=3, scoring="accuracy") print("cross_val_score : {}".format(score)) # precision-recall曲线 label_score = clf.decision_function(TestData) from sklearn.metrics import precision_recall_curve precisions, recalls, thresholds = precision_recall_curve(TestLabel, label_score) prname = "pr" + str(gamma) + "vs" + str(nu) plt.figure("{}".format(prname)) plot_precision_recall_threshold(precisions, recalls, thresholds) prpath = pr_path + os.sep + str(gamma) + "n" + str(nu) + r".png" plt.savefig(prpath) plt.close() # ROC曲线 from sklearn.metrics import roc_curve fpr, tpr, thresholds = roc_curve(TestLabel, label_score) rocname = "roc" + str(gamma) + "vs" + str(nu) plt.figure("{}".format(rocname)) plot_roc_curve(fpr, tpr) rocpath = roc_path + os.sep + str(gamma) + "n" + str(nu) + r".png"
temp['MSE'], alpha=0.7, marker=markers[flag], c=colors[flag], label=labels[flag]) plt.legend(loc=[1, 0], fontsize=12) plt.title('Reconstruction MSE') plt.ylabel('Reconstruction MSE') plt.xlabel('Index') plt.show() # 画出Precision-Recall曲线 plt.figure(figsize=(14, 6)) for i, metric in enumerate(['MAE', 'MSE']): plt.subplot(1, 2, i+1) precision, recall, _ = precision_recall_curve(mse_df['Class'], mse_df[metric]) pr_auc = auc(recall, precision) plt.title('Precision-Recall curve based on %s\nAUC = %0.2f'%(metric, pr_auc)) plt.plot(recall[:-2], precision[:-2], c='coral', lw=4) plt.xlabel('Recall'); plt.ylabel('Precision') plt.show() # 画出ROC曲线 plt.figure(figsize=(14, 6)) for i, metric in enumerate(['MAE', 'MSE']): plt.subplot(1, 2, i+1) fpr, tpr, _ = roc_curve(mse_df['Class'], mse_df[metric]) roc_auc = auc(fpr, tpr) plt.title('Receiver Operating Characteristic based on %s\nAUC = %0.2f'%(metric, roc_auc)) plt.plot(fpr, tpr, c='coral', lw=4) plt.plot([0,1],[0,1], c='dodgerblue', ls='--')
y_real = get_y_real() Y_pred = model.predict_generator(test_gen, test_samples // batch_size + 1) # y_pred = np.argmax(Y_pred, axis=1) y_pred = [] for el in Y_pred: y_pred += [1 if el[1] >= 0.3 else 0] print('Classification Report') target_names = ['Authentic', 'Tampered'] print(classification_report(y_real, y_pred, target_names=target_names)) cm = confusion_matrix( list(map(lambda x: int(x[0]), y_real)), y_pred, normalize='true') print(cm) prec, rec, ll = precision_recall_curve(y_real, list(map(lambda x: x[1], Y_pred))) plt.plot(rec, prec) plt.xlabel('Recall') plt.ylabel('Precision') plt.show() print(auc(rec, prec)) # plt.matshow(cm, cmap='binary') # plt.show()
def rec(y_true, y_pred): return precision_recall_curve(y_true, y_pred)[1]
Res = [a_res] Res = get_CI(AUC, Res) Res = get_CI(ACC, Res) Res = get_CI(TPR, Res) Res = get_CI(TNR, Res) Res = get_CI(PPV, Res) Res = get_CI(NPV, Res) Res = get_CI(F1, Res) Res = get_CI(YOUDEN, Res) f.writerow(Res) plt.figure(1) fpr, tpr, threshold = metric.roc_curve(gt, pre) plt.plot(fpr, tpr, label=a_res.split('/')[-1].split('.npy')[0]) plt.figure(2) precision, recall, t = metric.precision_recall_curve(gt, pre) plt.plot(recall, precision, label=a_res.split('/')[-1].split('.npy')[0]) plt.figure(1) #plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic Curve') plt.legend(loc="lower right") #plt.show() plt.savefig('jpgs/roc_ab_detect.jpg')
y = np.array(data.Class.tolist()) data = data.drop(['Time', 'Amount', 'Class'], axis=1) X = np.array(data.as_matrix()) # 准备训练集和测试集 train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.1, random_state=33) # 逻辑回归分类 model = svm.LinearSVC() model.fit(train_x, train_y) predict_y = model.predict(test_x) # 预测样本的置信分数 score_y = model.decision_function(test_x) # 计算混淆矩阵,并显示 cm = confusion_matrix(test_y, predict_y) class_names = [0, 1] # 显示混淆矩阵 plot_confusion_matrix(cm, classes=class_names, title='逻辑回归 混淆矩阵') # 显示模型评估分数 show_metrics() # 计算精确确率,召回率,阈值用于可视化 precision, recall, thresholds = precision_recall_curve(test_y, score_y) plot_precision_recall() ''' 精确率: 0.846 召回率: 0.733 F1值: 0.786 '''
test_size=0.5, random_state=2) # generate a no skill prediction (majority class) ns_probs = [0 for _ in range(len(testy))] # fit models model = LogisticRegression(solver='lbfgs') model.fit(trainX, trainy) # predict probabilities lr_probs = model.predict_proba(testX) # keep probabilities for the positive outcome only lr_probs = lr_probs[:, 1] # predict class values yhat = model.predict(testX) lr_precision, lr_recall, _ = precision_recall_curve(testy, lr_probs) lr_f1, lr_auc = f1_score(testy, yhat), auc(lr_recall, lr_precision) # calculate scores ns_auc = roc_auc_score(testy, ns_probs) lr_auc = roc_auc_score(testy, lr_probs) # summarize scores print('No Skill: ROC AUC=%.3f' % (ns_auc)) print('Logistic: ROC AUC=%.3f' % (lr_auc)) print('Logistic: f1=%.3f precision-recall AUC=%.3f' % (lr_f1, lr_auc)) # calculate roc curves ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs) lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs) # plot the roc curve for the model plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic') # axis labels
# Set up classifier using A projects gb_clf = GradientBoostingClassifier(n_estimators=250, learning_rate=0.75, max_features=50, max_depth=6, random_state=1, verbose=0) gb_clf.fit(A_X, A_y) # print("Classifier ready. Performing predictions...") # Perform predictions on B B_pred = gb_clf.predict(B_X) # print("Complete! Results: \n") # Evaluate prec = precision_score(y_true=B_y, y_pred=B_pred, average='binary') rec = recall_score(y_true=B_y, y_pred=B_pred, average='binary') p, r, _ = precision_recall_curve(B_y, B_pred, pos_label=1) aucval = auc(r, p) fm = 2 * (prec * rec) / (prec + rec) print("PREC: ", prec) print("REC: ", rec) print("AUC: ", aucval) print("F1: ", fm) # Save the model modelName = retrieve_name(B) + "_model.pkl" with open(modelName, 'wb') as f: pickle.dump(gb_clf, f) # print("\n \nModel saved as " + modelName)
from sklearn.metrics import precision_score, recall_score from sklearn.metrics import f1_score confusion_matrix = confusion_matrix(y_train, y_train_pred) precison = precision_score(y_train, y_train_pred) recall = recall_score(y_train, y_train_pred) f1 = f1_score(y_train, y_train_pred) #%% from sklearn.model_selection import cross_val_predict from sklearn.metrics import precision_recall_curve y_scores = cross_val_predict(logreg_model, X_train, y_train, cv=kfolds, method="decision_function") precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores) def plot_precision_recall_vs_threshold(precisions, recalls, thresholds): plt.figure() plt.plot(thresholds, precisions[:-1], "b--", label="Precision") plt.plot(thresholds, recalls[:-1], "g-", label="Recall") plt.xlabel("Threshold") plt.legend(loc="upper right") plt.ylim([0, 1]) plot_precision_recall_vs_threshold(precisions, recalls, thresholds) #%% from sklearn.metrics import roc_curve