Exemplo n.º 1
0
def showConfusionMatrix():
    #First do our imports

    from sklearn.datasets import load_digits

    from yellowbrick.classifier import ConfusionMatrix
    # We'll use the handwritten digits data set from scikit-learn.
    # Each feature of this dataset is an 8x8 pixel image of a handwritten number.
    # Digits.data converts these 64 pixels into a single array of features
    digits = load_digits()
    X = digits.data
    y = digits.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=11)

    model = LogisticRegression()

    #The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
    cm.fit(X_train, y_train)

    #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
    #and then creates the confusion_matrix from scikit learn.
    cm.score(X_test, y_test)

    #How did we do?
    cm.poof()
Exemplo n.º 2
0
 def draw_confusion_matrix(self):
     visualizer = ConfusionMatrix(self.model,
                                  classes=self.le.classes_,
                                  label_encoder=self.le)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()
Exemplo n.º 3
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
Exemplo n.º 4
0
def nice_confusion(model):
    """Creates a nice looking confusion matrix"""
    plt.figure(figsize=(10, 10))
    plt.xlabel('Predicted Class', fontsize=18)
    plt.ylabel('True Class', fontsize=18)
    #     plt.xticks(labels=[''])
    viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.poof()
    def get_confusion_matrix(self, on="test"):
        cm = ConfusionMatrix(self.pipe)
        if on == "test":
            cm.score(self._X_test, self._y_test)
        elif on == "train":
            cm.score(self._X_train, self._y_train)
        elif on == "all":
            cm.score(self.X, self.y)

        # graph the confusion matrix with yellowbrick
        cm.poof()
Exemplo n.º 6
0
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ConfusionMatrix
    iris_cm = ConfusionMatrix(model,
                              classes=classes,
                              label_encoder={
                                  0: classes[0],
                                  1: classes[1]
                              })

    iris_cm.fit(X_train, Y_train)
    iris_cm.score(X_test, Y_test)

    iris_cm.poof()
Exemplo n.º 7
0
def classifier_report(classifier, X_test, y_test):
    classes = np.unique(y_test)
    cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    cm.fit(X_test, y_test)
    cm.score(X_test, y_test)
    filename = classifier.__class__.__name__ + '_confusion_matrix.png'
    cm.poof(outpath=filename,
            clear_figure=True,
            kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact(filename)
    visualizer = ClassificationReport(classifier,
                                      classes=classes,
                                      support=True)
    visualizer.fit(X_test, y_test)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath="classification_report.png",
                    clear_figure=True,
                    kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact('classification_report.png')
Exemplo n.º 8
0
def plot_confusion_matrix(model:sklearn.base.BaseEstimator,
                          X_train: np.ndarray,
                          X_test: np.ndarray,
                          y_train: np.ndarray,
                          y_test: np.ndarray):
    """
    Plots confusion matrix for given model and train/test data.
    Inputs:
        model: an sklearn classifier
        X_train: training examples
        X_test: test examples
        y_train: training labels corresponding to examples in X_train
        y_test: test labels corresponding to examples in X_test
    Returns: None
    """
    model_cm = ConfusionMatrix(model)
    model_cm.fit(X_train, y_train)
    model_cm.score(X_test, y_test)
    model_cm.poof()
Exemplo n.º 9
0
    def store_experiment_data(self, X_test, y_test):
        class_report = ClassificationReport(self.model)
        score = class_report.score(X_test, y_test)
        class_report.poof(
            'metrics/classification_report.png', clear_figure=True)
        self.ex.add_artifact('metrics/classification_report.png')

        confustion_matrix = ConfusionMatrix(self.model)
        confustion_matrix.score(X_test, y_test)
        confustion_matrix.poof(
            'metrics/confusion_matrix.png', clear_figure=True)
        self.ex.add_artifact('metrics/confusion_matrix.png')

        cpd = ClassPredictionError(self.model)
        cpd.score(X_test, y_test)
        cpd.poof('metrics/class_prediction_error.png', clear_figure=True)
        self.ex.add_artifact('metrics/class_prediction_error.png')

        print('score=', score)
        self.ex.log_scalar('score', score)
Exemplo n.º 10
0
    def get_confusion_matrix(self, on="test"):
        """
        Produces a confusion matrix made through the yellowbrick package.

        Input
        -----
        on : string (default=test)
            Determines which set of data to score and create a confusion matrix on.
            Default is 'test', meaning it will make a confusion matrix of the test results. 
            'train' and 'all' are alternative values. 
        """

        cm = ConfusionMatrix(self.pipe)
        if on == "test":
            cm.score(self._X_test, self._y_test)
        elif on == "train":
            cm.score(self._X_train, self._y_train)
        elif on == "all":
            cm.score(self._X, self._y)

        # graph the confusion matrix with yellowbrick
        cm.poof()
Exemplo n.º 11
0
Com as (previsoes) usando os atributos de Testes geramos previsoes que usando nossa I.A. 
Podemos comparar as (previsoes) com as (classesTestes), pois ela vai ter as respostas corretas assim podemos
já observar a porcentagem de acerto da nossa I.A 
'''

acuracidade = accuracy_score(classeTeste, previsoes)
'''
Usando a função (accuracy_score) passando como párametro a classeTeste e a nossá váriavel de previsões podemos
gerar o valor de porcentagem de acertos da nossa I.A

Neste exemplo nossa I.A acertou 0.8658 (86%)
'''

############################### MATRIZ DE CONFUSÃO ##################################
'''
Por meio da nossa biblioteca (ConfusionMatrix) podemos gerar a matriz de confusão em Python mostrando assim
de forma mais clara como foi o percentual de acerto da nossa I.A
'''

confusao = ConfusionMatrix(modelo,
                           classes=["Nenhum", "Severo", "Leve", "Moderado"])
confusao.fit(atributosTreinamentos, classeTreinamento)
confusao.score(atributosTestes, classeTeste)
confusao.poof()

confusao = ConfusionMatrix(modelo,
                           classes=["None", "Severe", "Mild", "Moderate"])
confusao.fit(atributosTreinamentos, classeTreinamento)
confusao.score(atributosTestes, classeTeste)
confusao.poof()
def train(experiment_id, run_name, xtrain, xtest, ytrain, ytest):
    
    np.random.seed(100)

    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        
        tfid_vect =TfidfVectorizer(analyzer='word', tokenizer=nltk.tokenize.word_tokenize, stop_words='english', min_df=5)
        
        
        my_pipeline = Pipeline(steps=[('vectorizer', tfid_vect),
                                       ('lr', LogisticRegression(random_state=42))])
        
           
        my_pipeline.fit(xtrain, ytrain)
        predictions = my_pipeline.predict(xtest)
                                      
        joblib.dump(my_pipeline, 'pipeline_lr.pkl')
        
        accuracy = accuracy_score(ytest, predictions)
        
        f1score = f1_score(ytest, predictions)
        
        auc_score = roc_auc_score(ytest, predictions)
        
        class_report = classification_report(ytest, predictions)
        
        print(f'Accuracy : {round(accuracy, 2)}')
        print(f'f1_score : {round(f1score, 2)}')
        print(f'auc_score : {round(auc_score, 2)}')
        print(f'class_report : \n {class_report}')
        
        mlflow.log_metric('Accuracy', round(accuracy, 2))
        mlflow.log_metric('f1_score', round(f1score, 2))
        mlflow.log_metric('auc_score', round(auc_score, 2))
        
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4)
        
        visualizer = ClassificationReport(my_pipeline, ax=ax1, classes=[0,1])
        visualizer.fit(xtrain, ytrain)
        visualizer.score(xtest, ytest)
        a=visualizer.poof(outpath="image/classification_report.png")
        print(' ')
        
        mlflow.log_artifact("image/classification_report.png")
        
        # The ConfusionMatrix visualizer taxes a model
        cm = ConfusionMatrix(my_pipeline, ax=ax2, classes=[0,1])
        cm.fit(xtrain, ytrain)
        cm.score(xtest, ytest) 
        b=cm.poof(outpath="image/confusionmatrix.png")
        
        mlflow.log_artifact("image/confusionmatrix.png")
        print(' ')
        
        vis = ROCAUC(my_pipeline, ax=ax3, classes=[0,1])
        vis.fit(xtrain, ytrain)  # Fit the training data to the visualizer
        vis.score(xtest, ytest)  # Evaluate the model on the test data
        c = vis.poof(outpath="image/rocauc.png")             # Draw/show/poof the data
        print(' ')
        mlflow.log_artifact("image/rocauc.png")
        
        visual = ClassPredictionError(my_pipeline, ax=ax4, classes=[0,1])
        visual.fit(xtrain, ytrain)
        visual.score(xtest, ytest)
        g = visual.poof(outpath="image/ClassificationError.png")
        print(' ')
        mlflow.log_artifact("image/ClassificationError.png")
        
        
        return run.info.run_uuid
Exemplo n.º 13
0
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from yellowbrick.classifier import ConfusionMatrix


if __name__ == '__main__':
    # Load the regression data set
    digits = load_digits()
    X = digits.data
    y = digits.target

    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11)

    model = LogisticRegression()

    #The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])

    cm.fit(X_train, y_train)  # Fit the training data to the visualizer
    cm.score(X_test, y_test)  # Evaluate the model on the test data
    g = cm.poof(outpath="images/confusion_matrix.png")             # Draw/show/poof the data
classes = ['Not_BendCurve', 'BendCurve']
cm = ConfusionMatrix(model, fontsize=13, classes=classes, percent=False)

#Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
cm.fit(X_train, y_train)

#To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
#and then creates the confusion_matrix from scikit learn.
cm.score(X_val, y_val)

# change fontsize of the labels in the figure
for label in cm.ax.texts:
    label.set_size(20)

#How did we do?
cm.poof(bbox_inches='tight')

# Precision, Recall, and F1 Score
# set the size of the figure and the font size
plt.tight_layout(rect=[.5, 0.5, .5, 0.05])
plt.rcParams['figure.figsize'] = (15, 7)
plt.rcParams['font.size'] = 20

# Instantiate the visualizer
visualizer = ClassificationReport(model, classes=classes)

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_val, y_val)  # Evaluate the model on the test data
g = visualizer.poof()

# ROC and AUC
Exemplo n.º 15
0
def plot_confusion_matrix(model, X_valid, y_valid):
    visualizer = ConfusionMatrix(model, is_fitted=True)
    visualizer.score(X_valid, y_valid)
    visualizer.poof()
Exemplo n.º 16
0
df = pd.get_dummies(df, columns=['allergy'])

# define feature matrix and target variable
X = df[['choice_confidence', 'allergy_No']]
y = df['num_choices']

# split and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

model = LogisticRegression()

# produce confusion matrix
cm = ConfusionMatrix(model)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
cm.poof()

# calculate accuracy of model
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# (Drew)
# dummy variables for categorical food data for prediction models to make numerical variables
one_hot = pd.get_dummies(df['dinner_choice'])
df = df.drop('Timestamp', axis= 1)
df = df.drop('age', axis = 1)
df = df.drop('date', axis = 1)
df = df.drop('time', axis = 1)
df = df.drop('hour', axis = 1)
df = df.drop('allergy_No', axis = 1)
predicciones1 = modelo1.predict(X_probar)

#calcular la precisión

accuracy_score(y_probar, predicciones1)
print('salida: ',accuracy_score(y_probar, predicciones1))
#y_probar = vector de prueba
#precciones1 = vector de predicciones

#Generar la matriz de confusion

confusion1= ConfusionMatrix(modelo1)
confusion1.fit(X_entrenar, y_entrenar)
confusion1.score(X_probar, y_probar)
confusion1.poof()


#Mejorar (modelar)
modelo2 = DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 100)
modelo2.fit(X_entrenar, y_entrenar)
export_graphviz(modelo1, out_file = 'modelo2.dot')

predicciones2 = modelo1.predict(X_probar)
#calcular la precisión
accuracy_score(y_probar, predicciones2)
print('salida: ',accuracy_score(y_probar, predicciones2))
#y_probar = vector de prueba
#precciones1 = vector de predicciones
#Generar la matriz de confusion
confusion2= ConfusionMatrix(modelo2)
Exemplo n.º 18
0
roc = ROCAUC(rf, classes=cancer.target_names)
roc.fit(X_train, y_train)
roc.score(X_test, y_test)
roc.poof()

### Confusion Matrix

from yellowbrick.classifier import ConfusionMatrix

classes = cancer.target_names

conf_matrix = ConfusionMatrix(rf,
                              classes=classes,
                              label_encoder={
                                  0: 'benign',
                                  1: 'malignant'
                              })
conf_matrix.fit(X_train, y_train)
conf_matrix.score(X_test, y_test)
conf_matrix.poof()

### Class Prediction Error

from yellowbrick.classifier import ClassPredictionError

visualizer = ClassPredictionError(rf, classes=classes)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
Exemplo n.º 19
0
le = LabelEncoder()
df_data_encoder = df_data.apply(lambda col: le.fit_transform(col))

X_train, X_test, y_train, y_test = \
    train_test_split(df_data_encoder, df_class, test_size = 0.3, random_state = 0)

modelo = GaussianNB()
# modelo = MultinomialNB()
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_test)

matrix = confusion_matrix(y_test, y_pred)

accuracy_score(y_test, y_pred)
# np.sum(matrix.diagonal()) / np.sum(matrix)

# yeallowbrick
from yellowbrick.classifier import ConfusionMatrix
confusion = ConfusionMatrix(GaussianNB())
# confusion = ConfusionMatrix(MultinomialNB())
confusion.fit(X_train, y_train)
confusion.score(X_test, y_test)
confusion.poof()

### Em produção

df_novo_credit = pd.read_csv('NovoCredit.csv', sep=',')
df_novo_data = df_novo_credit.apply(lambda col: le.fit_transform(col))
modelo.predict(df_novo_data)
Exemplo n.º 20
0
#Aprendizagem
naive_bayes = GaussianNB()  #objeto naive_bayes
naive_bayes.fit(x_treinamento, y_treinamento)

#Previsão/Teste do modelo
previsoes = naive_bayes.predict(x_teste)
confusao = confusion_matrix(y_teste, previsoes)
#parâmetros: Ground truth (correct) labels; Predicted labels, as returned by a classifier
indice_acerto = accuracy_score(y_teste, previsoes)
indice_erro = 1 - indice_acerto
print(indice_acerto)

visualizador = ConfusionMatrix(GaussianNB())  #criando objeto
visualizador.fit(x_treinamento, y_treinamento)
visualizador.score(x_teste, y_teste)
visualizador.poof()  #renderiza a visualização

#--------------------------------------------------------------------------
#Simulando o modelo em Produção
novo_credito = pd.read_csv(
    r'/home/larag/Desktop/Data Science/Machine Learning/NovoCredit.csv')
novo_credito = novo_credito.iloc[:, 0:20].values
novo_credito[:, 0] = labelencoder.fit_transform(novo_credito[:, 0])
novo_credito[:, 2] = labelencoder.fit_transform(novo_credito[:, 2])
novo_credito[:, 3] = labelencoder.fit_transform(novo_credito[:, 3])
novo_credito[:, 5] = labelencoder.fit_transform(novo_credito[:, 5])
novo_credito[:, 6] = labelencoder.fit_transform(novo_credito[:, 6])
novo_credito[:, 8] = labelencoder.fit_transform(novo_credito[:, 8])
novo_credito[:, 9] = labelencoder.fit_transform(novo_credito[:, 9])
novo_credito[:, 11] = labelencoder.fit_transform(novo_credito[:, 11])
novo_credito[:, 13] = labelencoder.fit_transform(novo_credito[:, 13])
Exemplo n.º 21
0
previsoes = naive_bayes.predict(X_teste)  #predict passando os dados de teste!

confusao = confusion_matrix(
    y_teste, previsoes)  #comparar os resultados do treino com as previsoes

taxa_acerto = accuracy_score(y_teste, previsoes)

taxa_erro = 1 - taxa_acerto

#Visualizacaoes
from yellowbrick.classifier import ConfusionMatrix  #visualizacao de modelos de Machine Learning
v = ConfusionMatrix(GaussianNB())  #Tabela Confusao
#treinamento
v.fit(X_treinamento, y_treinamento)  #treinamento
v.score(X_teste, y_teste)  #Calculo do Erro
v.poof()  #Visualizar

#SIMULACAOOOOO COM NOVOS REGISTROS!
novo_cliente = pd.read_csv('NovoCredit.csv')
novo_cliente = novo_cliente.iloc[:,
                                 0:20].values  #deixar de forma de numpy array

novo_cliente[:, 0] = labelEncoder.fit_transform(novo_cliente[:, 0])
novo_cliente[:, 2] = labelEncoder.fit_transform(novo_cliente[:, 2])
novo_cliente[:, 3] = labelEncoder.fit_transform(novo_cliente[:, 3])
novo_cliente[:, 5] = labelEncoder.fit_transform(novo_cliente[:, 5])
novo_cliente[:, 6] = labelEncoder.fit_transform(novo_cliente[:, 6])
novo_cliente[:, 8] = labelEncoder.fit_transform(novo_cliente[:, 8])
novo_cliente[:, 9] = labelEncoder.fit_transform(novo_cliente[:, 9])
novo_cliente[:, 11] = labelEncoder.fit_transform(novo_cliente[:, 11])
novo_cliente[:, 13] = labelEncoder.fit_transform(novo_cliente[:, 13])
Exemplo n.º 22
0
from sklearn.model_selection import train_test_split as tts

from yellowbrick.classifier import ConfusionMatrix

if __name__ == '__main__':
    digits = load_digits()
    digit_X = digits.data
    digit_y = digits.target
    d_X_train, d_X_test, d_y_train, d_y_test = tts(digit_X,
                                                   digit_y,
                                                   test_size=0.2)
    model = LogisticRegression()
    digit_cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    digit_cm.fit(d_X_train, d_y_train)
    digit_cm.score(d_X_test, d_y_test)
    d = digit_cm.poof(outpath="images/confusion_matrix_digits.png")

    iris = load_iris()
    iris_X = iris.data
    iris_y = iris.target
    iris_classes = iris.target_names
    i_X_train, i_X_test, i_y_train, i_y_test = tts(iris_X,
                                                   iris_y,
                                                   test_size=0.2)
    model = LogisticRegression()
    iris_cm = ConfusionMatrix(model,
                              classes=iris_classes,
                              label_encoder={
                                  0: 'setosa',
                                  1: 'versicolor',
                                  2: 'virginica'
previsores = Arquivo.iloc[:, 0:4].values  #Seleciona os atributos previsores
classe = Arquivo.iloc[:, 3].values  #Seleciona o atributo classificador

labelencoder = LabelEncoder(
)  #Prepara os atributos para analise (todos os que não forem numericos) altera os atributos categoricos em numericos
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3])

X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(
    previsores, classe, test_size=0.3, random_state=0
)  #DIvide a base em teste e treinamento. Deixando a base de treinamento com 30% do tamanho, rando sempre divide a base da mesma forma

naive_bayes = GaussianNB()  #Cria o classificador
naive_bayes.fit(X_treinamento, y_treinamento)  #Usa os dados para classificação

previsoes = naive_bayes.predict(X_teste)  # Faz as previsões
print(previsoes)
confusao = confusion_matrix(y_teste, previsoes)  #Cria matriz de confusão
print(confusao)
taxa_acerto = accuracy_score(y_teste, previsoes)  # Teste de acuracia
print(taxa_acerto)
taxa_erro = 1 - taxa_acerto
print(taxa_erro)
from yellowbrick.classifier import ConfusionMatrix  #Biblioteca específica para visualização da matriz de confusão
v = ConfusionMatrix(GaussianNB())  #Cria o visualizador
v.fit(X_treinamento, y_treinamento)  #Fazendo o treinamento
v.score(X_teste, y_teste)  #Fazendo o score
v.poof()  #Cria visualizador
Exemplo n.º 24
0
def execute_classification_code(code, session):
    global df, model, problem_class, order
    code_str = urllib.parse.unquote(code)
    code_arr = code_str.split("\n")
    print(code_arr)
    problem_class = code_arr[0]
    print(problem_class)
    order = code_arr[1]
    print(order)
    exec(code_arr[2])
    print(df)
    exec(code_arr[3], globals())

    cmap_pink_green = sns.diverging_palette(352, 136, s=96, l=51, n=7)
    viz = ClassificationReport(model, cmap=cmap_pink_green)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.poof(outpath="./plots/classificationmatrix" + session + ".png")
    image_path_class = "classificationmatrix"

    plt.clf()
    plt.cla()
    plt.close()

    le = LabelEncoder()
    dec_viz = DecisionViz(model,
                          title="Decision Boundaries",
                          features=np.where(cols == True)[0].tolist(),
                          classes=list(map(str, y.iloc[:, 0].unique())).sort())
    dec_viz.fit(X_train.to_numpy(), le.fit_transform(y_train))
    dec_viz.draw(X_test.to_numpy(), le.fit_transform(y_test))
    dec_viz.poof(outpath="./plots/decviz" + session + ".png")
    image_path_dec = "decviz"

    plt.clf()
    plt.cla()
    plt.close()

    print(list(map(str, y.iloc[:, 0].unique())))
    cmap_salmon_dijon = sns.diverging_palette(28, 65, s=98, l=78, n=7)
    cm = ConfusionMatrix(model,
                         classes=list(map(str, y.iloc[:, 0].unique())).sort(),
                         cmap=cmap_salmon_dijon)
    cm.fit(X_train, y_train)
    cm.score(X_test, y_test)
    plt.tight_layout()
    cm.poof(outpath="./plots/cm" + session + ".png")
    image_path_cm = "cm"

    plt.clf()
    plt.cla()
    plt.close()

    model.fit(X_train, y_train)

    file = 'pickled_models/trained_model' + session + '.sav'
    pickle_path = 'trained_model'
    pickle.dump(model, open(file, 'wb'))

    return jsonify(image_path_class, image_path_dec, image_path_cm,
                   pickle_path)
Exemplo n.º 25
0
#viz_classification_report= ClassificationReport(gb_classifier , classes=classes)
#viz_classification_report.fit(X_train, y_train)  # Fit the visualizer and the model
#viz_classification_report.score(X_test, y_test)  # Evaluate the model on the test data
#c = viz_classification_report.poof()

# Instantiate the visualizer with the classification model
#viz_ROC = ROCAUC(gb_classifier, classes=classes)
#viz_ROC.fit(X_train, y_train)  # Fit the training data to the visualizer
#viz_ROC.score(X_test, y_test)  # Evaluate the model on the test data
#g = viz_ROC.poof()             # Draw/show/poof the data

# The ConfusionMatrix visualizer taxes a model
cm = ConfusionMatrix(gb_classifier,
                     classes=classes,
                     label_encoder={
                         0: 'non-seizure',
                         1: 'seizure'
                     })
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
c = cm.poof()
plt.tight_layout()

#Latency
X_test_latency = df_seizures_power_22_states[
    df_seizures_power_22_states.columns[:-2]]
latency = gb_classifier.predict(X_test_latency)

latency_test = pd.concat(
    [df_seizures_power_22_states['State'],
     pd.Series(latency)], axis=1)
Exemplo n.º 26
0
from yellowbrick.classifier import ConfusionMatrix

iris = datasets.load_iris()
#iris
stats.describe(iris.data)

previsores = iris.data
classe = iris.target

x_treinamento, x_teste, y_treinamento, y_teste = train_test_split(
    previsores, classe, test_size=0.3, random_state=0)
#n_neighbors=3 considera os 3 vizinhos mais próximos
knn = KNeighborsClassifier(n_neighbors=3)
#Fazendo o treinamento
knn.fit(x_treinamento, y_treinamento)
#Para fazer uma classificação simplesmente vai fazer a comparação da distância com
#esses registros que já estão armazenados
previsoes = knn.predict(x_teste)
confusao = confusion_matrix(y_teste, previsoes)
#visualização da matriz de confusão bonitinha
visualizador = ConfusionMatrix(KNeighborsClassifier(n_neighbors=3))
visualizador.fit(x_treinamento, y_treinamento)
visualizador.score(x_teste, y_teste)
visualizador.poof()

indice_acertos = accuracy_score(y_teste, previsoes)
print(indice_acertos)
indice_erros = 1 - indice_acertos

# In[ ]:
Exemplo n.º 27
0
            vetor[:, i] = labelencoder.fit_transform(vetor[:, i])


labelEncoder(previsores)

X_treino, X_teste, y_treino, y_teste = train_test_split(previsores,
                                                        classe,
                                                        test_size=0.3,
                                                        random_state=0)

naive_bayes = GaussianNB()
naive_bayes.fit(X_treino, y_treino)

previsoes = naive_bayes.predict(X_teste)
confusao = confusion_matrix(y_teste, previsoes)
taxa_acerto = accuracy_score(y_teste, previsoes)

v = ConfusionMatrix(GaussianNB())
v.fit(X_treino, y_treino)
v.score(X_teste, y_teste)
v.poof()

novo_credito = pd.read_csv('NovoCredit.csv')
novo_credito = novo_credito.iloc[:, 0:20].values
labelEncoder(novo_credito)

nova_previsao = naive_bayes.predict(novo_credito)

print()
print('Seu novo cliente e: {} pagador'.format(nova_previsao[0]))
print()
from yellowbrick.classifier import ConfusionMatrix


if __name__ == '__main__':
    digits = load_digits()
    digit_X = digits.data
    digit_y = digits.target
    d_X_train, d_X_test, d_y_train, d_y_test = tts(
        digit_X, digit_y, test_size=0.2
    )
    model = LogisticRegression()
    digit_cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])
    digit_cm.fit(d_X_train, d_y_train)
    digit_cm.score(d_X_test, d_y_test)
    d = digit_cm.poof(outpath="images/confusion_matrix_digits.png")


    iris = load_iris()
    iris_X = iris.data
    iris_y = iris.target
    iris_classes = iris.target_names
    i_X_train, i_X_test, i_y_train, i_y_test = tts(
        iris_X, iris_y, test_size=0.2
    )
    model = LogisticRegression()
    iris_cm = ConfusionMatrix(
        model, classes=iris_classes,
        label_encoder={0: 'setosa', 1: 'versicolor', 2: 'virginica'}
    )
    iris_cm.fit(i_X_train, i_y_train)
)  # Usando a entropy  ganho de informação para o calculo dos atributos mais importantes
modelo1.fit(X_treinamento,
            y_treinamento)  # Gerando a árvore de decisão com os 700 registros
export_graphviz(modelo1, out_file='modelo1.dot'
                )  # Visualizando a árvore de decisão (arq = modelo 1.dot)

# Criando as previsões
previsoes1 = modelo1.predict(X_teste)  # teste com os 300 registros
accuracy_score(y_teste,
               previsoes1)  # Comparação onde se tem o percentual de acerto

# Criando a matriz de confusão
confusao1 = ConfusionMatrix(modelo1)
confusao1.fit(X_treinamento, y_treinamento)
confusao1.score(X_teste, y_teste)
confusao1.poof()

# Modelo 2
modelo2 = DecisionTreeClassifier(
    criterion='entropy',
    min_samples_split=20)  # Usando a entropy mas com min_sample_split
modelo2.fit(X_treinamento,
            y_treinamento)  # Gerando a árvore de decisão com os 700 registros
export_graphviz(modelo2, out_file='modelo2.dot'
                )  # Visualizando a árvore de decisão (arq = modelo 2.dot)

# Criando as previsões
previsoes2 = modelo2.predict(X_teste)  # teste com os 300 registros
accuracy_score(y_teste,
               previsoes2)  # Comparação onde se tem o percentual de acerto
"""
como foi constatado na comparação entre previsões e y_teste, há algumas previsões que vieram com erro (isso é comum)
precisamos agora contabilizar essa taxa de erros
"""

confusao = confusion_matrix(y_teste, previsoes)

#obtivemos 71% de acerto com esse algoritmo
taxa_acerto = accuracy_score(y_teste, previsoes)
taxa_erro = 1 - taxa_acerto
"""
como na matrix de confusão gerada pela confusion_matrix da sklearn não conseguimos distinguir os valores pra good e bad,
vamos importar a ConfusionMatrix da yellowbrick e gerar uma nova matriz de confusão

no resultado lê-se:

    ------------------------------------------------------------
    | bad classificado como bad  | bad classificado como good  |
    ------------------------------------------------------------
    | good classificado como bad | good classificado como good |
    ------------------------------------------------------------

"""

from yellowbrick.classifier import ConfusionMatrix

visualizador = ConfusionMatrix(GaussianNB())
visualizador.fit(X_treinamento, y_treinamento)
visualizador.score(X_teste, y_teste)
visualizador.poof()  #para visualizar