def make_confusion_matrix(model, X_train, y_train, X_test, y_test): encoder = LabelEncoder() y_train = encoder.fit_transform(y_train) classes = list() for a in np.unique(y_train): classes.append(a) #For some reason it gives an error if not done this way... if len(classes) > 10: classes = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ] else: classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] encoder = LabelEncoder() encoder.fit(y_train) y_train = encoder.transform(y_train) cm = ConfusionMatrix(model, classes=classes) cm.fit(X_train, y_train) encoder.fit(y_train) y_test = encoder.transform(y_test) cm.score(X_test, y_test) cm.show() return
def draw_confusion_matrix(self): visualizer = ConfusionMatrix(self.model, classes=self.le.classes_, label_encoder=self.le) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()
def showConfusionMatrix(): #First do our imports from sklearn.datasets import load_digits from yellowbrick.classifier import ConfusionMatrix # We'll use the handwritten digits data set from scikit-learn. # Each feature of this dataset is an 8x8 pixel image of a handwritten number. # Digits.data converts these 64 pixels into a single array of features digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11) model = LogisticRegression() #The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data #and then creates the confusion_matrix from scikit learn. cm.score(X_test, y_test) #How did we do? cm.poof()
def evaluation(estimator, X, Y, x, y): classes = [Y[1], Y[0]] f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6)) #Confusion Matrix cmm = ConfusionMatrix(model=estimator, ax=ax1, classes=classes, label_encoder={ 0.0: 'Negativo', 1.0: 'Positivo' }) cmm.score(x, y) #ROCAUC viz = ROCAUC(model=estimator, ax=ax2) viz.fit(X, Y) viz.score(x, y) #Learning Curve cv_strategy = StratifiedKFold(n_splits=3) sizes = np.linspace(0.3, 1.0, 10) visualizer = LearningCurve(estimator, ax=ax, cv=cv_strategy, scoring='roc_auc', train_sizes=sizes, n_jobs=4) visualizer.fit(X, Y) cmm.poof(), viz.poof(), visualizer.poof() plt.show()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def nice_confusion(model, X_train, X_test, y_train, y_test): """Creates a nice looking confusion matrix""" plt.figure(figsize=(10, 10)) plt.xlabel('Predicted Class', fontsize=18) plt.ylabel('True Class', fontsize=18) viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.show()
def log_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): """Log confusion matrix. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Confusion Matrix') plt.close(fig) except Exception as e: print('Did not log Confusion Matrix chart. Error: {}'.format(e))
def get_confusion_matrix(self, on="test"): cm = ConfusionMatrix(self.pipe) if on == "test": cm.score(self._X_test, self._y_test) elif on == "train": cm.score(self._X_train, self._y_train) elif on == "all": cm.score(self.X, self.y) # graph the confusion matrix with yellowbrick cm.poof()
def create_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test): """Create confusion matrix. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfc = RandomForestClassifier() rfc.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/confusion_matrix'] = \ npt_utils.create_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' chart = None try: fig, ax = plt.subplots() visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log Confusion Matrix chart. Error: {}'.format(e)) return chart
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ConfusionMatrix iris_cm = ConfusionMatrix(model, classes=classes, label_encoder={ 0: classes[0], 1: classes[1] }) iris_cm.fit(X_train, Y_train) iris_cm.score(X_test, Y_test) iris_cm.poof()
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te): visualizer = ConfusionMatrix(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ClassificationReport(model, classes=classes, support=True) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ROCAUC(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show()
def confusion_matrix(self, class_name_dict=None) -> None: """Plot a confusion matrix """ cm = ConfusionMatrix(self.trained_model, classes=list(class_name_dict.keys()), label_encoder=class_name_dict) cm.fit(self.X_train, self.y_train) cm.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/confusion_matrix_{self.model_id}.png" cm.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/confusion_matrix_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def confusion(dataset): if dataset == "iris": data = load_iris() elif dataset == "digits": data = load_digits() else: raise ValueError("uknown dataset") X_train, X_test, y_train, y_test = tts(data.data, data.target, test_size=0.2) oz = ConfusionMatrix(LogisticRegression(), ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "confusion_matrix_{}".format(dataset))
def find_best_k(X_train, y_train, X_test, y_test, min_k=1, max_k=25): '''Trains K-Nearest Neighbors classifier on passed training and testing subsets, for every odd k value between min_k and max_k. Returns evaluation metrics resulting from classifier with optimal k value, and a confusion matrix. Args: X_train (ndarray): Train subset X (data) y_train (Series): Train subset y (predictions) X_test (ndarray): Test subset X (data) y_test (Series): Test subset y (predictions) min_k (int): Minimum value for best K max_k (int): Maximum value for best K Returns: string: "Best Value for k: {}" string: "Accuracy: {}" string: "Precision: {}" string: "Recall: {}" string: "F1-Score: {}" plot: Confusion Matrix ''' best_k = 0 best_score = 0.0 for k in range(min_k, max_k + 1, 2): # Instantiate KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=k) # Fit the classifier knn.fit(X_train, y_train) # Predict on the test set preds = knn.predict(X_test) accuracy = accuracy_score(y_test, preds) precision = precision_score(y_test, preds, average='macro') recall = recall_score(y_test, preds, average='macro') f1 = f1_score(y_test, preds, average='macro') if f1 > best_score: best_k = k best_score = f1 cm = ConfusionMatrix(knn, classes=y_train.unique()) cm.score(X_test, y_test) print("Best Value for k: {}".format(best_k)) print("Accuracy: {}".format(round(accuracy, 3))) print("Precision: {}".format(round(precision, 3))) print("Recall: {}".format(round(recall, 3))) print("F1-Score: {}".format(round(best_score, 3))) plt.tight_layout() cm.show()
def plot_classifier_metrics(self): fig, axes = plt.subplots(2, 2, figsize=(12, 8)) visualgrid = [ ConfusionMatrix(self.clf, ax=axes[0][0]), ClassificationReport(self.clf, ax=axes[0][1]), ROCAUC(self.clf, ax=axes[1][0]), ] fig.delaxes(axes[1, 1]) for viz in visualgrid: viz.fit(self.X_train, self.y_train) viz.score(self.X_test, self.y_test) viz.finalize() plt.savefig('../docs/metrics_classifier.png') plt.show()
def CM(pipe, X, y): delinq_cm = ConfusionMatrix(pipe, classes=['Current', 'Delinquent'], label_encoder={ 0: 'Current', 1: 'Delinquent' }, is_fitted=True, percent=True) delinq_cm.score(X, y) delinq_cm.show() print(delinq_cm.confusion_matrix_) print( classification_report(y, pipe.predict(X), target_names=['Current', 'Delinquent'], digits=4))
def __init__(self, X_train, X_test, y_train, y_test, labels, model, viz_selection, upsampled=False): """ Class for yellowbrick classifier visualizer Args: X_train: numpy ndarray of model features training data values X_test: numpy ndarray of model features test data values y_train: numpy ndarray of model target variable training data values y_test: numpy ndarray of model target variable test data values labels: list of class labels for binary classification model: sklearn estimator for classification viz_selection: string value used to reference yellowbrick classification visualizer upsampled: binary value to determine to which subdirectory output image should be saved """ self.labels = labels self.model = model self.viz_selection = viz_selection self.upsampled = upsampled self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test if self.viz_selection == 'ClassificationReport': self.visualizer = ClassificationReport(self.model, classes=self.labels, support=True) elif self.viz_selection == 'ROCAUC': self.visualizer = ROCAUC(self.model, classes=self.labels, support=True) elif self.viz_selection == 'PrecisionRecallCurve': self.visualizer = PrecisionRecallCurve(self.model) elif self.viz_selection == 'ConfusionMatrix': self.visualizer = ConfusionMatrix(model, classes=self.labels) else: return print( "Error: viz_selection does not match accepted values. View Visualizer Class for accepted values." )
def draw_plots(): classifier = MultinomialNB(alpha=0.01) for technique in ["base", "SMOTE", "ADASYN", "text-aug"]: X_train, X_test, y_train, y_test = get_baseline_split(representation="bow") if technique == "base": X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test elif technique == "SMOTE": X_plot_train, y_plot_train = smote.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "ADASYN": X_plot_train, y_plot_train = adasyn.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "text-aug": X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run( books_df=get_fully_processed_books_df(), representation="bow") else: raise Exception() # ROC micro average viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # ROC - Per Class viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # Class Prediction Error viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres()) viz_pred_err.fit(X_plot_train, y_plot_train) viz_pred_err.score(X_plot_test, y_plot_test) viz_pred_err.show() # The ConfusionMatrix cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8]) cm.fit(X_plot_train, y_plot_train) cm.score(X_plot_test, y_plot_test) cm.show()
def plot_confusion_matrix(model:sklearn.base.BaseEstimator, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray): """ Plots confusion matrix for given model and train/test data. Inputs: model: an sklearn classifier X_train: training examples X_test: test examples y_train: training labels corresponding to examples in X_train y_test: test labels corresponding to examples in X_test Returns: None """ model_cm = ConfusionMatrix(model) model_cm.fit(X_train, y_train) model_cm.score(X_test, y_test) model_cm.poof()
def classifier_report(classifier, X_test, y_test): classes = np.unique(y_test) cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm.fit(X_test, y_test) cm.score(X_test, y_test) filename = classifier.__class__.__name__ + '_confusion_matrix.png' cm.poof(outpath=filename, clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact(filename) visualizer = ClassificationReport(classifier, classes=classes, support=True) visualizer.fit(X_test, y_test) visualizer.score(X_test, y_test) visualizer.poof(outpath="classification_report.png", clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact('classification_report.png')
def store_experiment_data(self, X_test, y_test): class_report = ClassificationReport(self.model) score = class_report.score(X_test, y_test) class_report.poof( 'metrics/classification_report.png', clear_figure=True) self.ex.add_artifact('metrics/classification_report.png') confustion_matrix = ConfusionMatrix(self.model) confustion_matrix.score(X_test, y_test) confustion_matrix.poof( 'metrics/confusion_matrix.png', clear_figure=True) self.ex.add_artifact('metrics/confusion_matrix.png') cpd = ClassPredictionError(self.model) cpd.score(X_test, y_test) cpd.poof('metrics/class_prediction_error.png', clear_figure=True) self.ex.add_artifact('metrics/class_prediction_error.png') print('score=', score) self.ex.log_scalar('score', score)
def get_confusion_matrix(self, on="test"): """ Produces a confusion matrix made through the yellowbrick package. Input ----- on : string (default=test) Determines which set of data to score and create a confusion matrix on. Default is 'test', meaning it will make a confusion matrix of the test results. 'train' and 'all' are alternative values. """ cm = ConfusionMatrix(self.pipe) if on == "test": cm.score(self._X_test, self._y_test) elif on == "train": cm.score(self._X_train, self._y_train) elif on == "all": cm.score(self._X, self._y) # graph the confusion matrix with yellowbrick cm.poof()
def plot_confusion_matrix (X_train, y_train, X_test, y_test, model, encoder): """ Function to plot a confusion matrix :param X_train: training set :param y_train: training set target :param X_test: test set :param y_test: test set target :param model: model to test performance for :param encoder: :return: Confusion matrix plot """ encoder = encoder # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, encoder=encoder) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) cm.show();
'12 Choices')) # make dummy variable for allergy column df = pd.get_dummies(df, columns=['allergy']) # define feature matrix and target variable X = df[['choice_confidence', 'allergy_No']] y = df['num_choices'] # split and train model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) model = LogisticRegression() # produce confusion matrix cm = ConfusionMatrix(model) cm.fit(X_train, y_train) cm.score(X_test, y_test) cm.poof() # calculate accuracy of model model.fit(X_train, y_train) y_pred = model.predict(X_test) print("Accuracy:", metrics.accuracy_score(y_test, y_pred)) # (Drew) # dummy variables for categorical food data for prediction models to make numerical variables one_hot = pd.get_dummies(df['dinner_choice']) df = df.drop('Timestamp', axis= 1) df = df.drop('age', axis = 1) df = df.drop('date', axis = 1)
vetor[:, i] = labelencoder.fit_transform(vetor[:, i]) labelEncoder(previsores) X_treino, X_teste, y_treino, y_teste = train_test_split(previsores, classe, test_size=0.3, random_state=0) naive_bayes = GaussianNB() naive_bayes.fit(X_treino, y_treino) previsoes = naive_bayes.predict(X_teste) confusao = confusion_matrix(y_teste, previsoes) taxa_acerto = accuracy_score(y_teste, previsoes) v = ConfusionMatrix(GaussianNB()) v.fit(X_treino, y_treino) v.score(X_teste, y_teste) v.poof() novo_credito = pd.read_csv('NovoCredit.csv') novo_credito = novo_credito.iloc[:, 0:20].values labelEncoder(novo_credito) nova_previsao = naive_bayes.predict(novo_credito) print() print('Seu novo cliente e: {} pagador'.format(nova_previsao[0])) print()
print('X matrix dimensionality:', X.shape) print('Y vector dimensionality:', Y.shape) # split the data into a training set and a test set X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split( X, Y, test_size=0.20, random_state=10) # print("X_train: ", X_train.shape) # print("X_validation: ", X_validation.shape)) # print("Y_train: ", Y_train.shape)) # print("Y_validation: ", Y_validation.shape)) gaussianNB = GaussianNB() cm = ConfusionMatrix( gaussianNB, classes="A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z".split(',')) # train the model using the training sets cm.fit(X_train, Y_train) cm.score(X_validation, Y_validation) # predict the responses for test dataset predictions = cm.predict(X_validation) # accuracy classification score print("Accuracy: ", accuracy_score(Y_validation, predictions)) # compute confusion matrix print(confusion_matrix(Y_validation, predictions))
#Criação de uma variável com variável de resposta(y) classe = dados.iloc[:, 1].values #Aqui iremos transformar as colunas categóricas em colunas numéricas labelencoder = LabelEncoder() previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) #Aqui hávera a divisão dos dados para treinamento e teste X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0) #Criação do algoritmo da floresta randômica (Random Forest), juntamente com o treinamento do algoritmo floresta = RandomForestClassifier(criterion = 'gini', random_state = 0) floresta.fit(X_treinamento, y_treinamento) #Faz as previsões da variável teste. previsoes = floresta.predict(X_teste) #Gera uma variável com a matriz de confusão confusao = confusion_matrix(y_teste, previsoes) #Gera duas variáveis com as taxas de acertos e erros da floresta randômica (Random Forest) taxa_acerto = accuracy_score(y_teste, previsoes) taxa_erro = 1 - taxa_acerto #Gera o imagem da matriz de confusão v = ConfusionMatrix(floresta) v.fit(X_treinamento, y_treinamento) v.score(X_teste, y_teste) v.poof() '''Obs: Como constatado, a taxa de acerto foi 80%, aproximadamente, com 30% de dados para teste.'''
def score_model_outcome(X_train, y_train, X_test, y_test, model, **kwargs): """ A function that returns the different metrics of accuracy, confusion matrix and other model reports depending on the type of model that is asked. This function is for prognosis Parameters ---------- X_train: matrix of training features y_train: vector of training labels X_test: matrix of test features y_test: vector of test labels Returns ------- - Accuracy, F1 score and ROC_AUC for the train and test set - Confusion matrix - ClassificationReport - PrecisionRecallCurve - ClassPredictionError """ # Train the model model.fit(X_train, y_train, **kwargs) # Predict on the train set prediction_train = model.predict(X_train) # Compute metrics for the train set accuracy_train = accuracy_score(y_train, prediction_train) # False Positive Rate, True Positive Rate, Threshold fpr_train, tpr_train, thresholds_train = roc_curve(y_train, prediction_train) auc_train = auc(fpr_train, tpr_train) f1_score_train = f1_score(y_train, prediction_train) # Predict on the test set prediction_test = model.predict(X_test) accuracy_test = accuracy_score(y_test, prediction_test) fpr_test, tpr_test, thresholds_test = roc_curve(y_test, prediction_test) auc_test = auc(fpr_test, tpr_test) f1_score_test = f1_score(y_test, prediction_test) print("{}:".format(model.__class__.__name__)) # Compute and return F1 (harmonic mean of precision and recall) print( "On training we get an Accuracy {}, an AUC {} and F1 score {} ".format( accuracy_train, auc_train, f1_score_train)) print("For test we get an Accuracy {}, an AUC {} and F1 score {}".format( accuracy_test, auc_test, f1_score_test)) fig, axes = plt.subplots(3, 2, figsize=(20, 20)) visualgrid = [ ConfusionMatrix(model, ax=axes[0][0], classes=['Death', 'Survival'], cmap="YlGnBu"), ClassificationReport( model, ax=axes[0][1], classes=['Death', 'Survival'], cmap="YlGn", ), PrecisionRecallCurve(model, ax=axes[1][0]), ClassPredictionError(model, classes=['Death', 'Survival'], ax=axes[1][1]), ] for viz in visualgrid: viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.finalize() try: roc_auc(model, X_train, y_train, X_test=X_test, y_test=y_test, classes=['Death', 'Survival'], ax=axes[2][0]) except: print('Can plot ROC curve for this model') try: viz = FeatureImportances(model, ax=axes[2][1], stack=True, relative=False) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.finalize() except: print('Don\'t have feature importance') plt.show() print('\n')
previsores = dados.iloc[:, [0, 3]].values #Criação de uma variável com variável de resposta(y) classe = dados.iloc[:, 1].values #Aqui iremos transformar as colunas categóricas em colunas numéricas labelencoder = LabelEncoder() previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) #Fazendo a padronização dos atributos previsores scaler = StandardScaler() previsores = scaler.fit_transform(previsores) #Aqui hávera a divisão dos dados para treinamento e teste X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0) #Criação do classificador SVM e treinamento do algoritmo svm = SVC(random_state = 1, C = 2.0, gamma = 'auto') svm.fit(X_treinamento, y_treinamento) previsoes = svm.predict(X_teste) #Visalização da acurácia e matriz de confusão precisao = accuracy_score(y_teste, previsoes) matriz = confusion_matrix(y_teste, previsoes) v = ConfusionMatrix(svm) v.fit(X_treinamento, y_treinamento) v.score(X_teste, y_teste) v.poof() '''Obs: Como constatado, a taxa de acerto do modelo foi 62.2%, aproximadamente, com 30% de dados para teste.'''
classes = ["non-seizure", "seizure"] #viz_classification_report= ClassificationReport(gb_classifier , classes=classes) #viz_classification_report.fit(X_train, y_train) # Fit the visualizer and the model #viz_classification_report.score(X_test, y_test) # Evaluate the model on the test data #c = viz_classification_report.poof() # Instantiate the visualizer with the classification model #viz_ROC = ROCAUC(gb_classifier, classes=classes) #viz_ROC.fit(X_train, y_train) # Fit the training data to the visualizer #viz_ROC.score(X_test, y_test) # Evaluate the model on the test data #g = viz_ROC.poof() # Draw/show/poof the data # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(gb_classifier, classes=classes, label_encoder={ 0: 'non-seizure', 1: 'seizure' }) cm.fit(X_train, y_train) cm.score(X_test, y_test) c = cm.poof() plt.tight_layout() #Latency X_test_latency = df_seizures_power_22_states[ df_seizures_power_22_states.columns[:-2]] latency = gb_classifier.predict(X_test_latency) latency_test = pd.concat( [df_seizures_power_22_states['State'], pd.Series(latency)], axis=1)