def evaluate(df, modelv="gnb", race="W", census=False, report=True, roc=True, pr=True): """ Run model evaluations for a specified model and race class """ # get model models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string)) model = models[race] # get data df = prep_data(df) tes = joblib.load(DIR + "/data/models/transformers_binary.joblib") # transform data for col in [ "first_name", "last_name", "middle_name"]: te = tes[race][col] df[col] = te.transform(df[col]) df[col] = df[col].fillna(0) tmpa = np.where(df.race_code == race, True, False) df = df.fillna(0) # run specified evaluation visualizer if report: visualizer = ClassificationReport(model, classes=model.classes_, support=True) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if roc: visualizer = ROCAUC(model, classes=["W", "not-W"]) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if pr: viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"]) viz.score(df[MODEL_COLS], tmpa) viz.show()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def draw_binary(outpath=None): _, ax = plt.subplots(figsize=(9,6)) X_train, X_test, y_train, y_test = load_binary(split=True) oz = PrecisionRecallCurve(RidgeClassifier(), ax=ax) oz.fit(X_train, y_train) oz.score(X_test, y_test) oz.poof(outpath=outpath)
def draw_binary(outpath=None): _, ax = plt.subplots(figsize=(9, 6)) X_train, X_test, y_train, y_test = load_binary(split=True) oz = PrecisionRecallCurve(RidgeClassifier(), ax=ax) oz.fit(X_train, y_train) oz.score(X_test, y_test) oz.poof(outpath=outpath)
def precision_recall_f1(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import PrecisionRecallCurve viz = PrecisionRecallCurve(model, per_class=True, iso_f1_curves=True, fill_area=False, micro=False) viz.fit(X_train, Y_train) viz.score(X_test, Y_test) viz.poof()
def precision_recall(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import PrecisionRecallCurve # Load the dataset and split into train/test splits # Create the visualizer, fit, score, and poof it viz = PrecisionRecallCurve(model) viz.fit(X_train, Y_train) viz.score(X_test, Y_test) viz.poof()
def draw_multiclass(outpath=None, simple=True): _, ax = plt.subplots(figsize=(9,6)) X_train, X_test, y_train, y_test = load_multiclass() if simple: oz = PrecisionRecallCurve(RandomForestClassifier(), ax=ax) else: oz = PrecisionRecallCurve(MultinomialNB(), ax=ax, per_class=True, iso_f1_curves=True, fill_area=False, micro=False) oz.fit(X_train, y_train) oz.score(X_test, y_test) oz.poof(outpath=outpath)
def eval_models(df, race="W", models=["gnb", "rf", "xgb"], census=False, report=False, roc=False, pr=False, cpe=False): """ Run evaluation on a set of models and a single race class """ df = prep_data(df) tes = joblib.load(DIR + "/data/models/transformers_binary.joblib") for col in ["first_name", "last_name", "middle_name"]: te = tes[race][col] df[col] = te.transform(df[col]) df[col] = df[col].fillna(0) tmpa = np.where(df.race_code == race, True, False) df = df.fillna(0) for modelv in models: models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string)) model = models[race] model.target_type_ = "binary" if report: visualizer = ClassificationReport(model, classes=model.classes_, support=True) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if roc: visualizer = ROCAUC(model, classes=["W", "not-W"]) visualizer.score(df[MODEL_COLS], tmpa) visualizer.show() if pr: viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"]) viz.score(df[MODEL_COLS], tmpa) viz.show() if cpe: viz = ClassPredictionError(model) viz.score(df[MODEL_COLS], tmpa) viz.show()
def plot_precision_recall_curve_1(X_train, y_train, X_test, y_test, model): """ Function to plot precision recall curve :param X_train: training set :param y_train: training set target :param X_test: test set :param y_test: test set target :param model: model to analyze performance for :return: precision recall curve plot """ viz = PrecisionRecallCurve(model) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.show()
def draw_multiclass(outpath=None, simple=True): _, ax = plt.subplots(figsize=(9, 6)) X_train, X_test, y_train, y_test = load_multiclass() if simple: oz = PrecisionRecallCurve(RandomForestClassifier(), ax=ax) else: oz = PrecisionRecallCurve(MultinomialNB(), ax=ax, per_class=True, iso_f1_curves=True, fill_area=False, micro=False) oz.fit(X_train, y_train) oz.score(X_test, y_test) oz.poof(outpath=outpath)
def PRCurve(model, X_train, X_test, y_train, y_test): ''' A function to visualize Precision Recall Curve. Returns average precision score of the model. Data to fit must be training i.e. X_train, y_train. Data score will be estimated on X_test, y_test. ''' viz = PrecisionRecallCurve(model) viz.fit(X_train, y_train) avg_prec = viz.score(X_test, y_test) plt.legend(labels = ['Binary PR Curve',"AP=%.3f"%avg_prec], loc = 'lower right', prop={'size': 14}) plt.xlabel(xlabel = 'Recall', size = 14) plt.ylabel(ylabel = 'Precision', size = 14) plt.title(label = 'Precision Recall Curve', size = 16)
y_pred_thresh = rf.predict_proba(X_test)[:, 1] > 0.85 # 0.85 as threshold print(classification_report(y_test, y_pred_thresh)) ### Precision-Recall curve from scikitplot.metrics import plot_precision_recall rf_probas = rf.predict_proba(X_test)[:, 1] plot_precision_recall(y_test, rf_probas) from yellowbrick.classifier import PrecisionRecallCurve viz = PrecisionRecallCurve(rf) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.poof() # Discimination Threshold - probability or score at which the positive class is chosen over the negative class from yellowbrick.classifier import DiscriminationThreshold viz = DiscriminationThreshold(rf) viz.fit(X_train, y_train) viz.poof() # Average Precision from sklearn.metrics import average_precision_score average_precision_score(
df_abalone.columns = ['sex', 'length', 'diameter', 'height', 'whole weight', 'sucked weight', 'viscera weight', 'shell weight', 'rings'] # print(df_abalone.head()) sns.countplot(data=df_abalone, x='sex', hue='rings', palette='gist_heat') # plt.show() # print(df_abalone.describe()) # df_abalone.info() le = preprocessing.LabelEncoder() df_abalone['sex'] = le.fit_transform(df_abalone['sex']) # print(df_abalone.head()) cols = [col for col in df_abalone.columns if col is not "rings"] # print(cols) data = df_abalone[cols] target = df_abalone['rings'] data_train, data_test, target_train, target_test = train_test_split(data, target, test_size = 0.20, random_state = 10) data_train.info() logReg = LogisticRegression() pred = logReg.fit(data_train, target_train).predict(data_test) # print(pred) print("Logistic Regression accuracy: ", accuracy_score(target_test, pred, normalize=True)) visualizer = PrecisionRecallCurve(logReg) visualizer.fit(data_train, target_train) visualizer.score(data_test, target_test) visualizer.show()
visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_fclass_train, y_train) visualizer.score(X_fclass_test, y_test) visualizer.poof(outpath="bag_class_errorf_classIF.png") visualizer = DiscriminationThreshold(model) visualizer.fit(X_fclass_train, y_train) # Fit the training data to the visualizer visualizer.score(X_fclass_test, y_test) visualizer.poof(outpath="bag_descrimination_thresholdf_classIF.png") # Create the visualizer, fit, score, and poof it viz = PrecisionRecallCurve(model) viz.fit(X_fclass_train, y_train) viz.score(X_fclass_test, y_test) viz.poof(outpath="bag_precision_recall_curvef_classIF.png") #KNeighborsClassifier with f_classif features model = KNeighborsClassifier() model.fit(X_fclass_train, y_train) visualizer = ClassificationReport(model, classes=classes) visualizer.fit(X_fclass_train, y_train) # Fit the visualizer and the model visualizer.score(X_fclass_test, y_test) # Evaluate the model on the test data visualizer.poof(outpath="kneear_classification_report_fclassIF.png") visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_fclass_train, y_train) visualizer.score(X_fclass_test, y_test) visualizer.poof(outpath="kneear_class_error_fclassIF.png")
# Run model with 4-fold cross validation. Report mean accuracy. scores = cross_val_score(mlp, X_train, y_train, cv=4) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Plot ROC, AUC. classes = ["Normal", "Pre-Ictal", "Seizure"] visualizer = ROCAUC(mlp, classes=classes) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data ROC_title = "ROCAUC_{}.png".format(animal_id) g = visualizer.poof(outpath=ROC_title) # Save plot w unique title # Plot the precision-recall curve. viz = PrecisionRecallCurve(mlp) viz.fit(X_train, y_train) # Fit the training data to the visualizer viz.score(X_test, y_test) # Evaluate the model on the test data PR_title = "PR_{}.png".format(animal_id) viz.poof(outpath=PR_title) # Save plot w unique title # Plot loss curve aka cost function. loss_values = mlp.loss_curve_ plt.plot(loss_values) plt.show() Loss_title = "Loss_{}.png".format(animal_id) plt.savefig(Loss_title) sys.stdout.close() # In[ ]:
def plot_pr(model, X_train, y_train, X_valid, y_valid): visualizer = PrecisionRecallCurve(model) visualizer.fit(X_train, y_train) visualizer.score(X_valid, y_valid) visualizer.poof()