def pred_error(X, y, test_size=0.10, random_state=42): models = [ GaussianNB(), KNeighborsClassifier(), SGDClassifier(), BaggingClassifier(KNeighborsClassifier()), DecisionTreeClassifier(), LinearSVC(penalty="l1", dual=False) ] classes = ["not_passed", "passed"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) Reg_len = len(models) i = 0 while i < Reg_len: model = models[i] model.fit(X_train, y_train) visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data print("Coefficient of Determination: %0.6f" % model.score(X_test, y_test)) g = visualizer.poof() print('') i = i + 1
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def log_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): """Log class prediction error chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') exp = neptune.create_experiment() log_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Class Prediction Error') plt.close(fig) except Exception as e: print('Did not log Class Prediction Error chart. Error {}'.format(e))
def create_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test): """Create class prediction error chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfc = RandomForestClassifier() rfc.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/class_prediction_error'] = \ npt_utils.create_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' chart = None try: fig, ax = plt.subplots() visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log Class Prediction Error chart. Error {}'.format(e)) return chart
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ClassPredictionError # Instantiate the classification model and visualizer visualizer = ClassPredictionError(RandomForestClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # Draw visualization g = visualizer.poof()
def classprede(): X, y = make_classification(n_samples=1000, n_classes=5, n_informative=3, n_clusters_per_class=1) classes = ["apple", "kiwi", "pear", "banana", "orange"] # Perform 80/20 training/test split X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20) oz = ClassPredictionError(RandomForestClassifier(), classes=classes, ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "class_prediction_error")
def make_cb_pred_error(dataset="fruit", path=None, clf=None): clf = clf or RandomForestClassifier() loader = { 'fruit': make_fruit_dataset, 'credit': load_credit_dataset, }[dataset] (X_train, X_test, y_train, y_test), classes = loader() _, ax = plt.subplots() viz = ClassPredictionError(clf, ax=ax, classes=classes) viz.fit(X_train, y_train) viz.score(X_test, y_test) return viz.poof(outpath=path)
def class_prediction_error(self) -> None: """Plot the support (number of training samples) for each class in the fitted classification model as a stacked bar chart. Each bar is segmented to show the proportion of predictions (including false negatives and false positives, like a Confusion Matrix) for each class. You can use a ClassPredictionError to visualize which classes your classifier is having a particularly difficult time with, and more importantly, what incorrect answers it is giving on a per-class basis. """ visualizer = ClassPredictionError(self.trained_model) visualizer.fit(self.X_train, self.y_train) visualizer.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/class_prediction_error_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/class_prediction_error_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def draw_plots(): classifier = MultinomialNB(alpha=0.01) for technique in ["base", "SMOTE", "ADASYN", "text-aug"]: X_train, X_test, y_train, y_test = get_baseline_split(representation="bow") if technique == "base": X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test elif technique == "SMOTE": X_plot_train, y_plot_train = smote.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "ADASYN": X_plot_train, y_plot_train = adasyn.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "text-aug": X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run( books_df=get_fully_processed_books_df(), representation="bow") else: raise Exception() # ROC micro average viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # ROC - Per Class viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # Class Prediction Error viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres()) viz_pred_err.fit(X_plot_train, y_plot_train) viz_pred_err.score(X_plot_test, y_plot_test) viz_pred_err.show() # The ConfusionMatrix cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8]) cm.fit(X_plot_train, y_plot_train) cm.score(X_plot_test, y_plot_test) cm.show()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
def get_plots(): all_plots = [] # FEATURE Visualization # Instantiate the visualizer plt.figure(figsize=(3.5, 3.5)) viz = Manifold(manifold="tsne") # Fit the data to the visualizer viz.fit_transform(X_train, y_train) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Manifold Visualization</h4>" + some_htmL) # clear plot plt.clf() if ML_ALG_nr == 1: # classification # Check if we can get the classes classes = None try: classes = list(Enc.inverse_transform(model_def.classes_)) except ValueError as e: app.logger.info(e) if classes is not None: # Instantiate the classification model and visualizer visualizer = ClassPredictionError(DecisionTreeClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Class Prediction Error</h4>" + some_htmL) # clear plot plt.clf() # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model_def, classes=classes) cm = ConfusionMatrix(model_def, classes=classes) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Confusion Matrix</h4>" + some_htmL) # clear plot plt.clf() return all_plots elif ML_ALG_nr == 0: # regression # Instantiate the linear model and visualizer visualizer = PredictionError(model_def, identity=True) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" + some_htmL) # clear plot plt.clf() # Instantiate the model and visualizer visualizer = ResidualsPlot(model_def) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL) # clear plot plt.clf() return all_plots
def class_prediction_errors(xx,yy,estimatorss,**kwargs): vz2 = ClassPredictionError(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Null, Multiple Reaches, Or Multiple Arms'], cmap="YlGn", size=(600, 360), **kwargs) vz2.fit(xx, yy) vz2.score(xx, yy) vz2.show()
print(confusion_matrix(y_test, y_pred)) # In[34]: from yellowbrick.classifier import ClassPredictionError # In[35]: classes = ['Exited', 'Not Exited'] clf = RandomForestClassifier(n_estimators = 200, random_state=200) visualizer = ClassPredictionError(clf) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show() # In[36]: svclassifier = SVC(kernel='rbf') visualizer = ClassPredictionError(svclassifier) visualizer.fit(X_train, y_train) visualizer.score(X_test,y_test) visualizer.show() # In[10]:
roc = ROCAUC(rf, classes=cancer.target_names) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.poof() ### Confusion Matrix from yellowbrick.classifier import ConfusionMatrix classes = cancer.target_names conf_matrix = ConfusionMatrix(rf, classes=classes, label_encoder={ 0: 'benign', 1: 'malignant' }) conf_matrix.fit(X_train, y_train) conf_matrix.score(X_test, y_test) conf_matrix.poof() ### Class Prediction Error from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(rf, classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof()
# plot no skill plt.plot([0, 1], [0, 1], linestyle='--') # plot the roc curve for the model plt.plot(fpr, tpr, marker='.') # show the plot plt.show() from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_curve logit_roc_auc = roc_auc_score(y_test, model.predict(X_test)) fpr, tpr, thresholds = roc_curve(y_test, probs) plt.figure() plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc) plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.savefig('Visually/Log_ROC') plt.show() from sklearn.ensemble import RandomForestClassifier from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(model=LogisticRegression()) visualizer.fit(X=X_train, y=y_train) visualizer.score(X=X_test, y=y_test) visualizer.poof()
print('moyenne score cross validation : {:.2f}'.format(result3.mean())) cm = ConfusionMatrix(foret, classes=[0, 1, 2, 3, 4, 6], percent=True) cm.fit(x_train, y_train) cm.score(x_test, y_test) cm.poof() size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] lc = LearningCurve(RandomForestClassifier(), train_sizes=size, score='r2') lc.fit(x_train, y_train) lc.poof() viz = ClassPredictionError(RandomForestClassifier(), classes=["0", "1", "2", "3", "4", "5", "6"]) viz.fit(x_train, y_train) viz.score(x_test, y_test) viz.poof fig = plt.figure() ax = fig.add_subplot() feat = FeatureImportances(RandomForestClassifier(), ax=ax) feat.fit(x_train, y_train) feat.poof() '''--------------------- Réseau de neurones --------------------- ''' neurone = MLPClassifier() neurone.fit(x_train, y_train) print(neurone.score(x_test, y_test))
# #### load a model in...if starting this notebook from scratch just load pre trained models to visualise # In[64]: #insert the trained classifier from above in here fitted_classifier_for_visualization = XG_clf_finetuned # In[65]: # seems to be predicting non loyal pretty well, however loyal is kind of hit or miss from yellowbrick.classifier import ClassPredictionError visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train, y_train) visualizer_entropy.score(X_test, y_test) g = visualizer_entropy.poof() # #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained # In[66]: from yellowbrick.classifier import ROCAUC visualizer_entropy = ROCAUC(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train, y_train) # Fit the training data to the visualizer visualizer_entropy.score(X_test, y_test) # Evaluate the model on the test data
def draw_prediction_error(self): visualizer = ClassPredictionError(self.model, classes=self.le.classes_) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()