def pred_error(X, y, test_size=0.10, random_state=42): models = [ GaussianNB(), KNeighborsClassifier(), SGDClassifier(), BaggingClassifier(KNeighborsClassifier()), DecisionTreeClassifier(), LinearSVC(penalty="l1", dual=False) ] classes = ["not_passed", "passed"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) Reg_len = len(models) i = 0 while i < Reg_len: model = models[i] model.fit(X_train, y_train) visualizer = ClassPredictionError(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the visualizer and the model visualizer.score(X_test, y_test) # Evaluate the model on the test data print("Coefficient of Determination: %0.6f" % model.score(X_test, y_test)) g = visualizer.poof() print('') i = i + 1
def store_experiment_data(self, X_test, y_test): class_report = ClassificationReport(self.model) score = class_report.score(X_test, y_test) class_report.poof( 'metrics/classification_report.png', clear_figure=True) self.ex.add_artifact('metrics/classification_report.png') confustion_matrix = ConfusionMatrix(self.model) confustion_matrix.score(X_test, y_test) confustion_matrix.poof( 'metrics/confusion_matrix.png', clear_figure=True) self.ex.add_artifact('metrics/confusion_matrix.png') cpd = ClassPredictionError(self.model) cpd.score(X_test, y_test) cpd.poof('metrics/class_prediction_error.png', clear_figure=True) self.ex.add_artifact('metrics/class_prediction_error.png') print('score=', score) self.ex.log_scalar('score', score)
def class_predict_error(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ClassPredictionError # Instantiate the classification model and visualizer visualizer = ClassPredictionError(RandomForestClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # Draw visualization g = visualizer.poof()
def make_cb_pred_error(dataset="fruit", path=None, clf=None): clf = clf or RandomForestClassifier() loader = { 'fruit': make_fruit_dataset, 'credit': load_credit_dataset, }[dataset] (X_train, X_test, y_train, y_test), classes = loader() _, ax = plt.subplots() viz = ClassPredictionError(clf, ax=ax, classes=classes) viz.fit(X_train, y_train) viz.score(X_test, y_test) return viz.poof(outpath=path)
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
# In[64]: #insert the trained classifier from above in here fitted_classifier_for_visualization = XG_clf_finetuned # In[65]: # seems to be predicting non loyal pretty well, however loyal is kind of hit or miss from yellowbrick.classifier import ClassPredictionError visualizer_entropy = ClassPredictionError(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train, y_train) visualizer_entropy.score(X_test, y_test) g = visualizer_entropy.poof() # #### To get the visualization of ROC and AUC curves plug in the CLF object from Section 2.3 to visualize these curves for the specific model that was trained # In[66]: from yellowbrick.classifier import ROCAUC visualizer_entropy = ROCAUC(fitted_classifier_for_visualization, classes=class_names) visualizer_entropy.fit(X_train, y_train) # Fit the training data to the visualizer visualizer_entropy.score(X_test, y_test) # Evaluate the model on the test data g = visualizer_entropy.poof() # Draw/show/poof the data
roc = ROCAUC(rf, classes=cancer.target_names) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.poof() ### Confusion Matrix from yellowbrick.classifier import ConfusionMatrix classes = cancer.target_names conf_matrix = ConfusionMatrix(rf, classes=classes, label_encoder={ 0: 'benign', 1: 'malignant' }) conf_matrix.fit(X_train, y_train) conf_matrix.score(X_test, y_test) conf_matrix.poof() ### Class Prediction Error from yellowbrick.classifier import ClassPredictionError visualizer = ClassPredictionError(rf, classes=classes) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof()
def draw_prediction_error(self): visualizer = ClassPredictionError(self.model, classes=self.le.classes_) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()