def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def class_prediction_error(self) -> None: """Plot the support (number of training samples) for each class in the fitted classification model as a stacked bar chart. Each bar is segmented to show the proportion of predictions (including false negatives and false positives, like a Confusion Matrix) for each class. You can use a ClassPredictionError to visualize which classes your classifier is having a particularly difficult time with, and more importantly, what incorrect answers it is giving on a per-class basis. """ visualizer = ClassPredictionError(self.trained_model) visualizer.fit(self.X_train, self.y_train) visualizer.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/class_prediction_error_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def draw_plots(): classifier = MultinomialNB(alpha=0.01) for technique in ["base", "SMOTE", "ADASYN", "text-aug"]: X_train, X_test, y_train, y_test = get_baseline_split(representation="bow") if technique == "base": X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test elif technique == "SMOTE": X_plot_train, y_plot_train = smote.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "ADASYN": X_plot_train, y_plot_train = adasyn.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "text-aug": X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run( books_df=get_fully_processed_books_df(), representation="bow") else: raise Exception() # ROC micro average viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # ROC - Per Class viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # Class Prediction Error viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres()) viz_pred_err.fit(X_plot_train, y_plot_train) viz_pred_err.score(X_plot_test, y_plot_test) viz_pred_err.show() # The ConfusionMatrix cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8]) cm.fit(X_plot_train, y_plot_train) cm.score(X_plot_test, y_plot_test) cm.show()
def class_prediction_errors(xx,yy,estimatorss,**kwargs): vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'], cmap="YlGn", size=(600, 360), **kwargs) vz2.fit(xx, yy) vz2.score(xx, yy) vz2.show()
# In[34]: from yellowbrick.classifier import ClassPredictionError # In[35]: classes = ['Exited', 'Not Exited'] clf = RandomForestClassifier(n_estimators = 200, random_state=200) visualizer = ClassPredictionError(clf) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show() # In[36]: svclassifier = SVC(kernel='rbf') visualizer = ClassPredictionError(svclassifier) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show() # In[10]: