def base_model(X_train, y_train,X_test, y_test): model = LogisticRegression(multi_class='auto',solver='lbfgs') visualizer = ROCAUC(model, classes=['dancehall','reggae','soca','pop']) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def ROC_Curve(Model, X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15) model = Model visualizer = ROCAUC(Model) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show()
def plotting_ROC_curve_yellowbrick(model, labels, X_train, y_train, X_test, y_test): st.subheader('ROC Curve') visualizer = ROCAUC(model, classes=labels) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() st.pyplot() return
def roc_curve(self, classes) -> None: visualizer = ROCAUC(self.trained_model, classes=classes) visualizer.fit(self.X_train, self.y_train) # Fit the training data to the visualizer visualizer.score(self.X_test, self.y_test) # Evaluate the model on the test data save_dir = f"{self.plots_dir}/roc_curve_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/roc_curve_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def evaluate_model(model,x_test,y_test,coef_): prediction = model.predict(x_test) acc_test = accuracy_score(y_test,prediction) acc_train = accuracy_score(y_train,model.predict(X_train)) print("\n") print("Accuracy Score(Test): " + str(acc_test)) print("Accuracy Score(Train): " + str(acc_train)) print("Difference between train and test accuracy = {0}".format(abs(acc_test-acc_train))) print("Roc Auc Score: "+ str(roc_auc_score(y_test,prediction))) print("\n") print("Classification Report:") print(classification_report(y_test,prediction)) # confusion matrix plt.figure() cm = confusion_matrix(y_test,prediction) sns.heatmap(cm,annot=True,cmap="YlGnBu",fmt="d") plt.title("Confusion Matrix(1:Churned, 0:Not Churned)") plt.show() # roc-curve plt.figure() visualizer = ROCAUC(model, classes=["Not Churn", "Churn"]) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() # Finalize and show the figure plt.show() if coef_: feature_imp ={} for idx,col_name in enumerate(X_train.columns): feature_imp[col_name] = model.coef_[0][idx] feature_imp = pd.DataFrame(feature_imp.items(),columns = ["Feature","Feature Importance"]) feature_imp.set_index("Feature",inplace=True) ax = feature_imp.plot(kind="bar",fontsize=10,color="red") ax.set_title("Future Importance",fontdict={"fontsize":12,"fontweight":"bold"}) ax.set_ylabel("Coef_") plt.show()
y_pred = model.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix import seaborn as sn import matplotlib.pyplot as plt cmET = confusion_matrix(y_test, y_pred) sn.heatmap(cmET, cmap='Blues_r', annot=True, xticklabels='1234', yticklabels='1234') plt.xlabel('Predicted Label') plt.ylabel('Actual Label') plt.show() #ROCAUC Plot from sklearn.preprocessing import OrdinalEncoder, LabelEncoder from yellowbrick.classifier import ROCAUC # Encode the non-numeric columns X = OrdinalEncoder().fit_transform(X) y = LabelEncoder().fit_transform(y) # Instaniate the classification model and visualizer visualizer = ROCAUC(model, Damage=[1, 2, 3, 4]) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() # Finalize and render the figure
# STEP 5: Accuracy check ######################################################### from sklearn import metrics prediction_test = model.predict(X_test) ##Check accuracy on test dataset. print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test)) from yellowbrick.classifier import ROCAUC print("Classes in the image are: ", np.unique(Y)) #ROC curve for RF roc_auc=ROCAUC(model, classes=[0, 1, 2, 3]) #Create object roc_auc.fit(X_train, y_train) roc_auc.score(X_test, y_test) roc_auc.show() ########################################################## #STEP 6: SAVE MODEL FOR FUTURE USE ########################################################### ##You can store the model for future use. In fact, this is how you do machine elarning ##Train on training images, validate on test images and deploy the model on unknown images. # # ##Save the trained model as pickle string to disk for future use model_name = "sandstone_model" pickle.dump(model, open(model_name, 'wb')) # ##To test the model on future datasets #loaded_model = pickle.load(open(model_name, 'rb'))
from sklearn.model_selection import cross_val_score cv_scores = cross_val_score( estimator = clasificador, X = X_train, y = y_train, scoring = 'neg_root_mean_squared_error', cv = 5 ) print(f"Métricas validación cruzada: {cv_scores}") print(f"Média métricas de validación cruzada: {cv_scores.mean()}") cv_scores = pd.DataFrame(cv_scores) prediccion = clasificador.predict(X_test) from yellowbrick.classifier import ROCAUC visualizer = ROCAUC(clasificador, classes=[0, 1, 2, 3, 4, 5, 6, 7 , 8, 9]) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() y_pred_proba = clasificador.predict_proba(X_test) from sklearn.metrics import roc_auc_score score = roc_auc_score(y_test,y_pred_proba,multi_class="ovr") '''
plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.savefig('Log_ROC') plt.show() # Yellowbrick visualization viz = ClassificationReport(model) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.show() roc = ROCAUC(model) roc.fit(X_train, y_train) roc.score(X_test, y_test) roc.show() # # Perform Gridsearch using various parameters penalty = ['l1', 'l2'] C = [0.1, 0.25, 0.5] solver = ['newton-cg', 'lbfgs', 'liblinear'] class_weight = [{0: 15, 1: 85}, {0: 0.14, 1: 0.86}] miter = [5000] hyperparams = dict(C=C, penalty=penalty, solver=solver, class_weight=class_weight, max_iter=miter) RegLog = LogisticRegression() finalmodel = GridSearchCV(RegLog, hyperparams, verbose=0, cv=5) finalmodel.fit(X_train, y_train) y_predict = finalmodel.predict(X_test)
def best_model(model): visualizer = ROCAUC(model, classes=['reggae','soca','dancehall','pop']) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show()