def make_confusion_matrix(model, X_train, y_train, X_test, y_test): encoder = LabelEncoder() y_train = encoder.fit_transform(y_train) classes = list() for a in np.unique(y_train): classes.append(a) #For some reason it gives an error if not done this way... if len(classes) > 10: classes = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ] else: classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] encoder = LabelEncoder() encoder.fit(y_train) y_train = encoder.transform(y_train) cm = ConfusionMatrix(model, classes=classes) cm.fit(X_train, y_train) encoder.fit(y_train) y_test = encoder.transform(y_test) cm.score(X_test, y_test) cm.show() return
def showConfusionMatrix(): #First do our imports from sklearn.datasets import load_digits from yellowbrick.classifier import ConfusionMatrix # We'll use the handwritten digits data set from scikit-learn. # Each feature of this dataset is an 8x8 pixel image of a handwritten number. # Digits.data converts these 64 pixels into a single array of features digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11) model = LogisticRegression() #The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data #and then creates the confusion_matrix from scikit learn. cm.score(X_test, y_test) #How did we do? cm.poof()
def draw_confusion_matrix(self): visualizer = ConfusionMatrix(self.model, classes=self.le.classes_, label_encoder=self.le) visualizer.fit(self.training_data, self.training_labels) visualizer.score(self.test_data, self.test_labels) visualizer.poof()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir): model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c) rfe = RFE(model, n_features_to_select=n) ## learning curve plt.clf() viz_LC = LearningCurve( rfe, scoring='f1_weighted', n_jobs=4 ) viz_LC.fit(X, y) viz_LC.show(outpath=outdir + '/LC.png') ## classification report plt.clf() viz_CR = ClassificationReport(rfe, classes=class_names, support=True) viz_CR.fit(X, y) viz_CR.score(X_test, y_test) viz_CR.show(outpath=outdir + '/CR.png') ## confusion matrix plt.clf() viz_CM = ConfusionMatrix(rfe, classes=class_names) viz_CM.fit(X, y) viz_CM.score(X_test, y_test) viz_CM.show(outpath=outdir + '/CM.png') ## precision recall curve plt.clf() viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True, fill_area=False, micro=False, classes=class_names) viz_PRC.fit(X, y) viz_PRC.score(X_test, y_test) viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720)) ## class prediction error plt.clf() viz_CPE = ClassPredictionError( rfe, classes=class_names ) viz_CPE.fit(X, y) viz_CPE.score(X_test, y_test) viz_CPE.show(outpath=outdir + '/CPE.png') ## ROCAUC plt.clf() viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720)) viz_RA.fit(X, y) viz_RA.score(X, y) viz_RA.show(outpath=outdir + '/RA.png') fit = rfe.fit(X,y) y_predict = fit.predict(X_test) f1 = f1_score(y_test, y_predict, average='weighted') features_retained_RFE = X.columns[rfe.get_support()].values feature_df =pd.DataFrame(features_retained_RFE.tolist()) feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False) return f1
def nice_confusion(model, X_train, X_test, y_train, y_test): """Creates a nice looking confusion matrix""" plt.figure(figsize=(10, 10)) plt.xlabel('Predicted Class', fontsize=18) plt.ylabel('True Class', fontsize=18) viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18) viz.fit(X_train, y_train) viz.score(X_test, y_test) viz.show()
def log_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): """Log confusion matrix. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Confusion Matrix') plt.close(fig) except Exception as e: print('Did not log Confusion Matrix chart. Error: {}'.format(e))
def create_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test): """Create confusion matrix. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: classifier (:obj:`classifier`): | Fitted sklearn classifier object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The classification target for training y_test (:obj:`ndarray`): | The classification target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfc = RandomForestClassifier() rfc.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/confusion_matrix'] = \ npt_utils.create_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test) """ assert is_classifier( classifier), 'classifier should be sklearn classifier.' chart = None try: fig, ax = plt.subplots() visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log Confusion Matrix chart. Error: {}'.format(e)) return chart
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test): from yellowbrick.classifier import ConfusionMatrix iris_cm = ConfusionMatrix(model, classes=classes, label_encoder={ 0: classes[0], 1: classes[1] }) iris_cm.fit(X_train, Y_train) iris_cm.score(X_test, Y_test) iris_cm.poof()
def confusion_matrix(self, class_name_dict=None) -> None: """Plot a confusion matrix """ cm = ConfusionMatrix(self.trained_model, classes=list(class_name_dict.keys()), label_encoder=class_name_dict) cm.fit(self.X_train, self.y_train) cm.score(self.X_test, self.y_test) save_dir = f"{self.plots_dir}/confusion_matrix_{self.model_id}.png" cm.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/confusion_matrix_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def confusion(dataset): if dataset == "iris": data = load_iris() elif dataset == "digits": data = load_digits() else: raise ValueError("uknown dataset") X_train, X_test, y_train, y_test = tts(data.data, data.target, test_size=0.2) oz = ConfusionMatrix(LogisticRegression(), ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "confusion_matrix_{}".format(dataset))
def plot_confusion_matrix(model:sklearn.base.BaseEstimator, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray): """ Plots confusion matrix for given model and train/test data. Inputs: model: an sklearn classifier X_train: training examples X_test: test examples y_train: training labels corresponding to examples in X_train y_test: test labels corresponding to examples in X_test Returns: None """ model_cm = ConfusionMatrix(model) model_cm.fit(X_train, y_train) model_cm.score(X_test, y_test) model_cm.poof()
def draw_plots(): classifier = MultinomialNB(alpha=0.01) for technique in ["base", "SMOTE", "ADASYN", "text-aug"]: X_train, X_test, y_train, y_test = get_baseline_split(representation="bow") if technique == "base": X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test elif technique == "SMOTE": X_plot_train, y_plot_train = smote.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "ADASYN": X_plot_train, y_plot_train = adasyn.run(X_train, y_train) X_plot_test, y_plot_test = X_test, y_test elif technique == "text-aug": X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run( books_df=get_fully_processed_books_df(), representation="bow") else: raise Exception() # ROC micro average viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # ROC - Per Class viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True) viz_roc.fit(X_plot_train, y_plot_train) # Fit the training data to the viz_roc viz_roc.score(X_plot_test, y_plot_test) # Evaluate the model on the test data viz_roc.show() # Finalize and show the figure # Class Prediction Error viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres()) viz_pred_err.fit(X_plot_train, y_plot_train) viz_pred_err.score(X_plot_test, y_plot_test) viz_pred_err.show() # The ConfusionMatrix cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8]) cm.fit(X_plot_train, y_plot_train) cm.score(X_plot_test, y_plot_test) cm.show()
def classifier_report(classifier, X_test, y_test): classes = np.unique(y_test) cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm.fit(X_test, y_test) cm.score(X_test, y_test) filename = classifier.__class__.__name__ + '_confusion_matrix.png' cm.poof(outpath=filename, clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact(filename) visualizer = ClassificationReport(classifier, classes=classes, support=True) visualizer.fit(X_test, y_test) visualizer.score(X_test, y_test) visualizer.poof(outpath="classification_report.png", clear_figure=True, kwargs=dict(transparent=False, dpi=80, inches='tight')) ex.add_artifact('classification_report.png')
def plot_confusion_matrix (X_train, y_train, X_test, y_test, model, encoder): """ Function to plot a confusion matrix :param X_train: training set :param y_train: training set target :param X_test: test set :param y_test: test set target :param model: model to test performance for :param encoder: :return: Confusion matrix plot """ encoder = encoder # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, encoder=encoder) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) cm.show();
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te): visualizer = ConfusionMatrix(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ClassificationReport(model, classes=classes, support=True) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show() visualizer = ROCAUC(model, classes=classes) visualizer.fit(X_tr, y_tr) visualizer.score(X_te, y_te) visualizer.show()
def nbml(dados): #Mongodb = MongoClient('localhost', 27123) #db = Mongodb.dbcredit #dados = db.cadastro.find_one({"cpf": cpf}) #print(dados) dict_data = [ [ # 'nome', # 'cpf', # 'email', 'checking_status', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings_status', 'employment', 'installment_commitment', 'personal_status', 'other_parties', 'residence_since', 'property_magnitude', 'age', 'other_payment_plans', 'housing', 'existing_credits', 'job', 'num_dependents', 'own_telephone', 'foreign_worker' ], [ # [dados['nome'], # dados['cpf'], # dados['email'], dados['checking_status'], dados['duration'], dados['credit_history'], dados['purpose'], dados['credit_amount'], dados['savings_status'], dados['employment'], dados['installment_commitment'], dados['personal_status'], dados['other_parties'], dados['residence_since'], dados['property_magnitude'], dados['age'], dados['other_payment_plans'], dados['housing'], dados['existing_credits'], dados['job'], dados['num_dependents'], dados['own_telephone'], dados['foreign_worker'] ] ] myfile = open('teste_head.csv', 'w') with myfile: writer = csv.writer(myfile) writer.writerows(dict_data) base = pd.read_csv('creditfiltrado.csv', sep=';') print(base) base2 = pd.read_csv('teste_head.csv', sep=",") print(base2) base.class_result.unique() tempo_ini = time.time() X = base.iloc[:, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ]].values y = base.iloc[:, 20].values z = base2.iloc[:, 0:20].values labelencoder = LabelEncoder() z[:, 0] = labelencoder.fit_transform(z[:, 0]) z[:, 2] = labelencoder.fit_transform(z[:, 2]) z[:, 3] = labelencoder.fit_transform(z[:, 3]) z[:, 5] = labelencoder.fit_transform(z[:, 5]) z[:, 6] = labelencoder.fit_transform(z[:, 6]) z[:, 8] = labelencoder.fit_transform(z[:, 8]) z[:, 9] = labelencoder.fit_transform(z[:, 9]) z[:, 11] = labelencoder.fit_transform(z[:, 11]) z[:, 13] = labelencoder.fit_transform(z[:, 13]) z[:, 14] = labelencoder.fit_transform(z[:, 14]) z[:, 16] = labelencoder.fit_transform(z[:, 16]) z[:, 18] = labelencoder.fit_transform(z[:, 18]) z[:, 19] = labelencoder.fit_transform(z[:, 19]) X[:, 0] = labelencoder.fit_transform(X[:, 0]) X[:, 2] = labelencoder.fit_transform(X[:, 2]) X[:, 3] = labelencoder.fit_transform(X[:, 3]) X[:, 5] = labelencoder.fit_transform(X[:, 5]) X[:, 6] = labelencoder.fit_transform(X[:, 6]) X[:, 8] = labelencoder.fit_transform(X[:, 8]) X[:, 9] = labelencoder.fit_transform(X[:, 9]) X[:, 11] = labelencoder.fit_transform(X[:, 11]) X[:, 13] = labelencoder.fit_transform(X[:, 13]) X[:, 14] = labelencoder.fit_transform(X[:, 14]) X[:, 16] = labelencoder.fit_transform(X[:, 16]) X[:, 18] = labelencoder.fit_transform(X[:, 18]) X[:, 19] = labelencoder.fit_transform(X[:, 19]) X_treinamento, X_teste, y_treinamento, y_teste = train_test_split( X, y, test_size=0.5, random_state=0) modelo = GaussianNB() modelo.fit(X_treinamento, y_treinamento) previsoes = modelo.predict(X_teste) previsoes2 = modelo.predict(z) print(previsoes2) accuracy_score(y_teste, previsoes) confusao = ConfusionMatrix(modelo, classes=['good', 'bad']) confusao.fit(X_treinamento, y_treinamento) confusao.score(X_teste, y_teste) UpdateMongoPrevisao(dados['cpf'], str(previsoes2)) return previsoes2
import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_digits from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from yellowbrick.classifier import ConfusionMatrix if __name__ == '__main__': # Load the regression data set digits = load_digits() X = digits.data y = digits.target X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11) model = LogisticRegression() #The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) cm.fit(X_train, y_train) # Fit the training data to the visualizer cm.score(X_test, y_test) # Evaluate the model on the test data g = cm.poof(outpath="images/confusion_matrix.png") # Draw/show/poof the data
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest): np.random.seed(100) with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run: tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5) my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect), ('lr', LogisticRegression(random_state=42))]) my_pipeline.fit(xtrain, ytrain) predictions = my_pipeline.predict(xtest) joblib.dump(my_pipeline, 'pipeline_lr.pkl') accuracy = accuracy_score(ytest, predictions) f1score = f1_score(ytest, predictions) auc_score = roc_auc_score(ytest, predictions) class_report = classification_report(ytest, predictions) print(f'Accuracy : {round(accuracy, 2)}') print(f'f1_score : {round(f1score, 2)}') print(f'auc_score : {round(auc_score, 2)}') print(f'class_report : \n {class_report}') mlflow.log_metric('Accuracy', round(accuracy, 2)) mlflow.log_metric('f1_score', round(f1score, 2)) mlflow.log_metric('auc_score', round(auc_score, 2)) fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4) visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1]) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) a=visualizer.poof(outpath="image/classification_report.png") print(' ') mlflow.log_artifact("image/classification_report.png") # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1]) cm.fit(xtrain, ytrain) cm.score(xtest, ytest) b=cm.poof(outpath="image/confusionmatrix.png") mlflow.log_artifact("image/confusionmatrix.png") print(' ') vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1]) vis.fit(xtrain, ytrain) # Fit the training data to the visualizer vis.score(xtest, ytest) # Evaluate the model on the test data c = vis.poof(outpath="image/rocauc.png") # Draw/show/poof the data print(' ') mlflow.log_artifact("image/rocauc.png") visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1]) visual.fit(xtrain, ytrain) visual.score(xtest, ytest) g = visual.poof(outpath="image/ClassificationError.png") print(' ') mlflow.log_artifact("image/ClassificationError.png") return run.info.run_uuid
x[:, 6] = labelencoder.fit_transform(x[:, 6]) x[:, 7] = labelencoder.fit_transform(x[:, 7]) x[:, 8] = labelencoder.fit_transform(x[:, 8]) x[:, 9] = labelencoder.fit_transform(x[:, 9]) x[:, 10] = labelencoder.fit_transform(x[:, 10]) x[:, 11] = labelencoder.fit_transform(x[:, 11]) x[:, 12] = labelencoder.fit_transform(x[:, 12]) x[:, 13] = labelencoder.fit_transform(x[:, 13]) x[:, 14] = labelencoder.fit_transform(x[:, 14]) x[:, 15] = labelencoder.fit_transform(x[:, 15]) x[:, 16] = labelencoder.fit_transform(x[:, 16]) x[:, 17] = labelencoder.fit_transform(x[:, 17]) x[:, 18] = labelencoder.fit_transform(x[:, 17]) x_treinamento, x_teste, y_treinamento, y_teste = train_test_split( x, y, test_size=0.3, random_state=0 ) # com 0 teremos sempre os mesmos registro, como no exemplo do video modelo = GaussianNB() modelo.fit(x_treinamento, y_treinamento) # Criando a tabela de probabilidade Naive Bayes previsoes = modelo.predict(x_teste) accuracy_score(y_teste, previsoes) # A taxa de acerto é de 70% e de erro 30% # Matriz de confusão confusao = ConfusionMatrix(modelo, classes=['ruim', 'bom']) confusao.fit(x_treinamento, y_treinamento) confusao.score(x_teste, y_teste) confusao.poof()
random_state=0) # Treinamento do modelo naive_bayes = GaussianNB() naive_bayes.fit(X_train, y_train) # Teste do modelo previsoes = naive_bayes.predict(X_test) confusao = confusion_matrix(y_test, previsoes) taxaAcerto = accuracy_score(y_test, previsoes) taxaErro = 1 - taxaAcerto # Visualização de modelos de Machine Learning from yellowbrick.classifier import ConfusionMatrix visualizador = ConfusionMatrix(GaussianNB()) visualizador.fit(X_train, y_train) visualizador.score(X_test, y_test) visualizador.poof """ Simulando modelo em produção """ # Carregando no dado para previsão novoCredito = pd.read_csv('NovoCredit.csv') # Identificação dos atributos categóricos (tipo 'Object') atributosParaEncoderEmProducao = [] for i in list(novoCredito.columns): if (novoCredito[i].dtype == 'O'): atributosParaEncoderEmProducao.append(i) del i # Encoder dos atributos do tipo 'Object' para usar o modelo
def get_plots(): all_plots = [] # FEATURE Visualization # Instantiate the visualizer plt.figure(figsize=(3.5, 3.5)) viz = Manifold(manifold="tsne") # Fit the data to the visualizer viz.fit_transform(X_train, y_train) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Manifold Visualization</h4>" + some_htmL) # clear plot plt.clf() if ML_ALG_nr == 1: # classification # Check if we can get the classes classes = None try: classes = list(Enc.inverse_transform(model_def.classes_)) except ValueError as e: app.logger.info(e) if classes is not None: # Instantiate the classification model and visualizer visualizer = ClassPredictionError(DecisionTreeClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Class Prediction Error</h4>" + some_htmL) # clear plot plt.clf() # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model_def, classes=classes) cm = ConfusionMatrix(model_def, classes=classes) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Confusion Matrix</h4>" + some_htmL) # clear plot plt.clf() return all_plots elif ML_ALG_nr == 0: # regression # Instantiate the linear model and visualizer visualizer = PredictionError(model_def, identity=True) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" + some_htmL) # clear plot plt.clf() # Instantiate the model and visualizer visualizer = ResidualsPlot(model_def) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL) # clear plot plt.clf() return all_plots
meilleur_score = 0 profondeur = [5, 7, 10, 12, 15] for k in profondeur: arbre = DecisionTreeClassifier(max_depth=k) arbre.fit(x_train, y_train) score = arbre.score(x_test, y_test) if score > meilleur_score: meilleur_score = score meilleur_param = {'profondeur': k} cm = ConfusionMatrix(arbre, classes=[0, 1, 2, 3, 4, 5, 6], percent=True) cm.fit(x_train, y_train) cm.score(x_test, y_test) cm.poof() print('meilleur score : {:.2f}'.format(meilleur_score)) print('meilleur paramètre : {}'.format(meilleur_param)) ''' ----------- Forêt Aléatoire ----------------------''' meilleur_score = 0 nbr_arbre = [20, 40, 60, 80, 100, 120, 140] features = ['sqrt', 'log2'] critere = ['gini', 'entropy'] for arbre_choix in nbr_arbre:
#matriz,cl1=create_matriz('m1.txt') #matriz2,cl2=create_matriz('m2.txt') #matriz3,cl3=create_matriz('m3.txt') #matriz4,cl4=create_matriz('m4.txt') #matriz5,cl5=create_matriz('m5.txt') #zero,cl0=create_matriz('zero.txt') teste, cl_teste = create_matriz('m7.txt') #data=matriz+matriz2+matriz3+matriz4+matriz5+zero #cl_data=cl1+cl2+cl3+cl4+cl5+cl0 data, cl_data = create_matriz('Matriz total.txt') modelo = GaussianNB(var_smoothing=1e-10) modelo.fit(data, cl_data) previsoes = modelo.predict(teste) accuracy_score(cl_teste, previsoes) confusao = ConfusionMatrix(modelo, classes=[0, 1, 2, 3, 4]) confusao.fit(data, cl_data) confusao.score(teste, cl_teste) confusao.poof() with open( 'C:\\Users\\RENÊ MICHEL\\Desktop\\Codigos\\Python\\Batalha Naval\\nv.pickle', 'wb') as f: pickle.dump((modelo), f)
vetor[:, i] = labelencoder.fit_transform(vetor[:, i]) labelEncoder(previsores) X_treino, X_teste, y_treino, y_teste = train_test_split(previsores, classe, test_size=0.3, random_state=0) naive_bayes = GaussianNB() naive_bayes.fit(X_treino, y_treino) previsoes = naive_bayes.predict(X_teste) confusao = confusion_matrix(y_teste, previsoes) taxa_acerto = accuracy_score(y_teste, previsoes) v = ConfusionMatrix(GaussianNB()) v.fit(X_treino, y_treino) v.score(X_teste, y_teste) v.poof() novo_credito = pd.read_csv('NovoCredit.csv') novo_credito = novo_credito.iloc[:, 0:20].values labelEncoder(novo_credito) nova_previsao = naive_bayes.predict(novo_credito) print() print('Seu novo cliente e: {} pagador'.format(nova_previsao[0])) print()
#Criação de uma variável com variável de resposta(y) classe = dados.iloc[:, 1].values #Aqui iremos transformar as colunas categóricas em colunas numéricas labelencoder = LabelEncoder() previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0]) #Aqui hávera a divisão dos dados para treinamento e teste X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0) #Criação do algoritmo da floresta randômica (Random Forest), juntamente com o treinamento do algoritmo floresta = RandomForestClassifier(criterion = 'gini', random_state = 0) floresta.fit(X_treinamento, y_treinamento) #Faz as previsões da variável teste. previsoes = floresta.predict(X_teste) #Gera uma variável com a matriz de confusão confusao = confusion_matrix(y_teste, previsoes) #Gera duas variáveis com as taxas de acertos e erros da floresta randômica (Random Forest) taxa_acerto = accuracy_score(y_teste, previsoes) taxa_erro = 1 - taxa_acerto #Gera o imagem da matriz de confusão v = ConfusionMatrix(floresta) v.fit(X_treinamento, y_treinamento) v.score(X_teste, y_teste) v.poof() '''Obs: Como constatado, a taxa de acerto foi 80%, aproximadamente, com 30% de dados para teste.'''
Com as (previsoes) usando os atributos de Testes geramos previsoes que usando nossa I.A. Podemos comparar as (previsoes) com as (classesTestes), pois ela vai ter as respostas corretas assim podemos já observar a porcentagem de acerto da nossa I.A ''' acuracidade = accuracy_score(classeTeste, previsoes) ''' Usando a função (accuracy_score) passando como párametro a classeTeste e a nossá váriavel de previsões podemos gerar o valor de porcentagem de acertos da nossa I.A Neste exemplo nossa I.A acertou 0.8658 (86%) ''' ############################### MATRIZ DE CONFUSÃO ################################## ''' Por meio da nossa biblioteca (ConfusionMatrix) podemos gerar a matriz de confusão em Python mostrando assim de forma mais clara como foi o percentual de acerto da nossa I.A ''' confusao = ConfusionMatrix(modelo, classes=["Nenhum", "Severo", "Leve", "Moderado"]) confusao.fit(atributosTreinamentos, classeTreinamento) confusao.score(atributosTestes, classeTeste) confusao.poof() confusao = ConfusionMatrix(modelo, classes=["None", "Severe", "Mild", "Moderate"]) confusao.fit(atributosTreinamentos, classeTreinamento) confusao.score(atributosTestes, classeTeste) confusao.poof()
def confusion_matrix(xx,yy,estimatorss,**kwargs): vz1 = ConfusionMatrix(estimatorss, classes=['Reach, 1 Reach, or L/R Reach', 'Discard'], cmap="YlGn", size=(600, 360), **kwargs) vz1.fit(xx, yy) vz1.score(xx, yy) vz1.show()
# split the data into a training set and a test set X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split( X, Y, test_size=0.20, random_state=10) # print("X_train: ", X_train.shape) # print("X_validation: ", X_validation.shape)) # print("Y_train: ", Y_train.shape)) # print("Y_validation: ", Y_validation.shape)) gaussianNB = GaussianNB() cm = ConfusionMatrix( gaussianNB, classes="A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z".split(',')) # train the model using the training sets cm.fit(X_train, Y_train) cm.score(X_validation, Y_validation) # predict the responses for test dataset predictions = cm.predict(X_validation) # accuracy classification score print("Accuracy: ", accuracy_score(Y_validation, predictions)) # compute confusion matrix print(confusion_matrix(Y_validation, predictions)) # text report showing the main classification metrics print(classification_report(Y_validation, predictions, digits=5))
print(count) print(count[0] / Ytest.shape[0], count[1] / Ytest.shape[0]) # decision tree ############################################3 tr = DecisionTreeClassifier() tr.fit(Xtrain, Ytrain) # print("\nimportances=",tr.feature_importances_) y_predict = tr.predict(Xtest) print( f"Accuracy score for Decision Tree Classifier is: {accuracy_score(Ytest, y_predict)}" ) print("DT importances=", tr.feature_importances_) cm = ConfusionMatrix(tr, classes=[0, 1]) cm.fit(Xtrain, Ytrain) cm.score(Xtest, Ytest) cm.show() #Logistic Regression##################################### tr2 = LogisticRegression() tr2.fit(Xtrain, Ytrain) # print("importances =", tr2.feature_importances_) y_predict = tr2.predict(Xtest) print( f"\nAccuracy score for Logistic Regression Classifier is: {accuracy_score(Ytest, y_predict)}" ) print("RL importances=", tr2.coef_) # print("Ypred=", y_predict)
# make dummy variable for allergy column df = pd.get_dummies(df, columns=['allergy']) # define feature matrix and target variable X = df[['choice_confidence', 'allergy_No']] y = df['num_choices'] # split and train model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) model = LogisticRegression() # produce confusion matrix cm = ConfusionMatrix(model) cm.fit(X_train, y_train) cm.score(X_test, y_test) cm.poof() # calculate accuracy of model model.fit(X_train, y_train) y_pred = model.predict(X_test) print("Accuracy:", metrics.accuracy_score(y_test, y_pred)) # (Drew) # dummy variables for categorical food data for prediction models to make numerical variables one_hot = pd.get_dummies(df['dinner_choice']) df = df.drop('Timestamp', axis= 1) df = df.drop('age', axis = 1) df = df.drop('date', axis = 1) df = df.drop('time', axis = 1)
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split as tts from yellowbrick.classifier import ConfusionMatrix if __name__ == '__main__': digits = load_digits() digit_X = digits.data digit_y = digits.target d_X_train, d_X_test, d_y_train, d_y_test = tts( digit_X, digit_y, test_size=0.2 ) model = LogisticRegression() digit_cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) digit_cm.fit(d_X_train, d_y_train) digit_cm.score(d_X_test, d_y_test) d = digit_cm.poof(outpath="images/confusion_matrix_digits.png") iris = load_iris() iris_X = iris.data iris_y = iris.target iris_classes = iris.target_names i_X_train, i_X_test, i_y_train, i_y_test = tts( iris_X, iris_y, test_size=0.2 ) model = LogisticRegression() iris_cm = ConfusionMatrix( model, classes=iris_classes, label_encoder={0: 'setosa', 1: 'versicolor', 2: 'virginica'}