def classification_sanity_check(model, X_train, X_test, y_train, y_test, classes=None): visualizer = ROCAUC(model, micro=False, macro=False, classes=classes) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof()
def get_roc(self, on="test"): visualizer = ROCAUC(self.pipe) if on == "test": visualizer.score(self._X_test, self._y_test) elif on == "train": visualizer.score(self._X_train, self._y_train) elif on == "all": visualizer.score(self.X, self.y) visualizer.poof()
def rocauc(X, y, model, outpath, **kwargs): # Create a new figure and axes _, ax = plt.subplots() # Instantiate the classification model and visualizer visualizer = ROCAUC(model, ax=ax, **kwargs) # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) # Save to disk visualizer.poof(outpath=outpath)
def evaluation(estimator, X, Y, x, y): classes = [Y[1], Y[0]] f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6)) #Confusion Matrix cmm = ConfusionMatrix(model=estimator, ax=ax1, classes=classes, label_encoder={ 0.0: 'Negativo', 1.0: 'Positivo' }) cmm.score(x, y) #ROCAUC viz = ROCAUC(model=estimator, ax=ax2) viz.fit(X, Y) viz.score(x, y) #Learning Curve cv_strategy = StratifiedKFold(n_splits=3) sizes = np.linspace(0.3, 1.0, 10) visualizer = LearningCurve(estimator, ax=ax, cv=cv_strategy, scoring='roc_auc', train_sizes=sizes, n_jobs=4) visualizer.fit(X, Y) cmm.poof(), viz.poof(), visualizer.poof() plt.show()
def ROC_AUC(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ROCAUC # Instantiate the visualizer with the classification model visualizer = ROCAUC(model, classes=classes) visualizer.fit(X_train, Y_train) # Fit the training data to the visualizer visualizer.score(X_test, Y_test) # Evaluate the model on the test data g = visualizer.poof()
def get_roc(self, on="test"): """ Produces aAUC/ROC curve graph made through the yellowbrick package Input ----- on : string (default=test) Determines which set of data to score and create a ROC graph on. Default is 'test', meaning it will make a ROC graph of the test results. 'train' and 'all' are alternative values. """ visualizer = ROCAUC(self.pipe) if on == "test": visualizer.score(self._X_test, self._y_test) elif on == "train": visualizer.score(self._X_train, self._y_train) elif on == "all": visualizer.score(self._X, self._y) visualizer.poof()
def roc(model, data_type="music", features_nr=705): classes = ["{}".format(data_type), "no_{}".format(data_type)] from yellowbrick.classifier import ROCAUC data = load_data(how_many=4, last=True, data_type=data_type) data = data.astype({'class': str}) features = data.columns[:features_nr] X = data[features] y = data["class"] # Instantiate the visualizer with the classification model visualizer = ROCAUC(model, classes=classes) visualizer.score(X, y) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
def showROC(): # Load the classification data set data = load_data('occupancy') # Specify the features of interest and the classes of the target features = ["temperature", "relative humidity", "light", "C02", "humidity"] classes = ['unoccupied', 'occupied'] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.occupancy.as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the classification model and visualizer logistic = LogisticRegression() visualizer = ROCAUC(logistic) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
data = load_data(how_many=14) data = data.astype({'class': str}) print("Number of music entries") print(data[data['class'] == "music"].shape) print("Number of no_music entries") print(data[data['class'] == "no_music"].shape) # Specify the features of interest and the classes of the target features = data.columns[:705] classes = ["music", "no_music"] # Extract the instances and target X = data[features] y = data['class'] # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) from yellowbrick.classifier import ROCAUC from sklearn.linear_model import LogisticRegression # model = load_model("models\\random_forest\\rf-10-music-nestimators25.joblib") model = RandomForestClassifier(n_estimators=25, n_jobs=4, random_state=0, verbose=1) # Instantiate the visualizer with the classification model visualizer = ROCAUC(model, classes=classes) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
datasets = DatasetMixin() credit = datasets.load_data('credit') credit_keys = credit.dtype.names datatype = credit.dtype[0] ncols = len(credit_keys) categorical_names = ['edu','married'] y_name = 'default' credit_data = None for j in range(0,ncols): if credit_keys[j] in categorical_names: credit_data = add_categorical(credit_data,credit[credit_keys[j]],datatype) elif credit_keys[j] == y_name: y = credit[y_name].astype(int) else: credit_data = add_column(credit_data,credit[credit_keys[j]]) datashape = credit_data.shape nrows = datashape[0] cmeans = np.mean(credit_data,0) repmeans = numpy.matlib.repmat(cmeans,nrows,1) mydata = credit_data - repmeans sstds = np.std(mydata,0) repstds = numpy.matlib.repmat(sstds,nrows,1) mydata = np.divide(mydata,repstds) visualizer = ROCAUC(LinearSVC()) visualizer.fit(mydata,y) visualizer.score(mydata,y) visualizer.poof()
import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from yellowbrick.classifier import ROCAUC if __name__ == '__main__': # Load the regression data set data = pd.read_csv("../../../examples/data/occupancy/occupancy.csv") features = ["temperature", "relative humidity", "light", "C02", "humidity"] classes = ['unoccupied', 'occupied'] # Extract the numpy arrays from the data frame X = data[features].as_matrix() y = data.occupancy.as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the classification model and visualizer logistic = LogisticRegression() visualizer = ROCAUC(logistic) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof(outpath="images/rocauc.png") # Draw/show/poof the data
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
solver='adam', batch_size=512, activation='tanh') # Run model with 4-fold cross validation. Report mean accuracy. scores = cross_val_score(mlp, X_train, y_train, cv=4) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Plot ROC, AUC. classes = ["Normal", "Pre-Ictal", "Seizure"] visualizer = ROCAUC(mlp, classes=classes) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data ROC_title = "ROCAUC_{}.png".format(animal_id) g = visualizer.poof(outpath=ROC_title) # Save plot w unique title # Plot the precision-recall curve. viz = PrecisionRecallCurve(mlp) viz.fit(X_train, y_train) # Fit the training data to the visualizer viz.score(X_test, y_test) # Evaluate the model on the test data PR_title = "PR_{}.png".format(animal_id) viz.poof(outpath=PR_title) # Save plot w unique title # Plot loss curve aka cost function. loss_values = mlp.loss_curve_ plt.plot(loss_values) plt.show() Loss_title = "Loss_{}.png".format(animal_id) plt.savefig(Loss_title) sys.stdout.close()
ax = fig.add_subplot() viz = FeatureImportances( rf, ax=ax, labels=cancer.feature_names, relative=False) # if True, puts all on scale, max = 100 viz.fit(X, y) viz.poof() ### ROC-AUC from yellowbrick.classifier import ROCAUC roc = ROCAUC(rf, classes=cancer.target_names) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.poof() ### Confusion Matrix from yellowbrick.classifier import ConfusionMatrix classes = cancer.target_names conf_matrix = ConfusionMatrix(rf, classes=classes, label_encoder={ 0: 'benign', 1: 'malignant' }) conf_matrix.fit(X_train, y_train) conf_matrix.score(X_test, y_test)
test_mean test_std = np.std(test_scores, axis=1) test_std plt.plot(train_sizes, train_mean, label='Training Score') plt.plot(train_sizes, test_mean, label='Cross-Validation Score') plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, color='#DDDDDD') plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, color='#DDDDDD') plt.title("Learning Curve") plt.xlabel("Training Size") plt.ylabel("Accuracy Score") plt.legend(loc='best') """**ROC ve AUC**""" from yellowbrick.classifier import ROCAUC fig, ax = plt.subplots(1, 1, figsize=(12, 8)) roc_auc = ROCAUC(clf, ax=ax) roc_auc.fit(pc_train, y_train) roc_auc.score(pc_test, y_test) roc_auc.poof()
def plot(X, Y): X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2) oz = ROCAUC(GaussianNB()) oz.fit(X_train, y_train) oz.score(X_test, y_test) oz.poof()
model = GradientBoostingClassifier() visualizer = ROCAUC(model) visualizer.fit(X_train, y_train) y_pred = model.predict(X_test) acc = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) run.log('Accuracy', acc) run.log('F1', f1) visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof('./outputs/AUC.png') model_file_name = 'breast-cancer-model.pkl' model_file_path = os.path.join('./outputs/', model_file_name) # save model in the outputs folder so it automatically get uploaded with open(model_file_name, "wb") as file: joblib.dump(value=model, filename=model_file_path) # when running in offline mode, model cannot be registered register_model = getattr(run, "register_model", None) if callable(register_model): # supply a model name, and the full path to the serialized model file. model = run.register_model(model_name='breast-cancer-model', model_path=model_file_path) model.add_tags({"run_id": run.id})
def plot_rocauc(model, X_valid, y_valid): visualizer = ROCAUC(model, is_fitted=True) visualizer.score(X_valid, y_valid) visualizer.poof()
#Split the data into training and testing data set X_train, X_test, y_train, y_test = tts(X, y, test_size=0.3) clf = GradientBoostingClassifier(random_state=1, max_depth=10, n_estimators=100, learning_rate=0.1) # clf = AdaBoostClassifier(random_state=1,base_estimator=tree.DecisionTreeClassifier(max_depth=10), n_estimators=100, # learning_rate=0.1) clf.fit(X_train, y_train) #Generate RUC-AUC curve for classifier rocauc = ROCAUC(clf, size=(1080, 720), classes=classes) rocauc.score(X_test, y_test) r = rocauc.poof() # Generate classification report for the given classifier # report = ClassificationReport(clf, size=(1080, 720), classes=classes) # # report.score(X_test, y_test) # c = report.poof() #Generate Prediction error for each class # error = ClassPredictionError(clf, size=(1080, 720), classes=classes) # # error.score(X_test, y_test) # e = error.poof()