예제 #1
0
def make_confusion_matrix(model, X_train, y_train, X_test, y_test):

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train)

    classes = list()

    for a in np.unique(y_train):
        classes.append(a)

    #For some reason it gives an error if not done this way...
    if len(classes) > 10:
        classes = [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40
        ]
    else:
        classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    encoder = LabelEncoder()
    encoder.fit(y_train)
    y_train = encoder.transform(y_train)

    cm = ConfusionMatrix(model, classes=classes)
    cm.fit(X_train, y_train)

    encoder.fit(y_train)
    y_test = encoder.transform(y_test)

    cm.score(X_test, y_test)

    cm.show()

    return
예제 #2
0
 def draw_confusion_matrix(self):
     visualizer = ConfusionMatrix(self.model,
                                  classes=self.le.classes_,
                                  label_encoder=self.le)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.score(self.test_data, self.test_labels)
     visualizer.poof()
예제 #3
0
def showConfusionMatrix():
    #First do our imports

    from sklearn.datasets import load_digits

    from yellowbrick.classifier import ConfusionMatrix
    # We'll use the handwritten digits data set from scikit-learn.
    # Each feature of this dataset is an 8x8 pixel image of a handwritten number.
    # Digits.data converts these 64 pixels into a single array of features
    digits = load_digits()
    X = digits.data
    y = digits.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=11)

    model = LogisticRegression()

    #The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
    cm.fit(X_train, y_train)

    #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
    #and then creates the confusion_matrix from scikit learn.
    cm.score(X_test, y_test)

    #How did we do?
    cm.poof()
예제 #4
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
예제 #5
0
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
def nice_confusion(model, X_train, X_test, y_train, y_test):
    """Creates a nice looking confusion matrix"""
    plt.figure(figsize=(10, 10))
    plt.xlabel('Predicted Class', fontsize=18)
    plt.ylabel('True Class', fontsize=18)
    viz = ConfusionMatrix(model, cmap='PuBu', fontsize=18)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.show()
예제 #7
0
def log_confusion_matrix_chart(classifier,
                               X_train,
                               X_test,
                               y_train,
                               y_test,
                               experiment=None):
    """Log confusion matrix.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Confusion Matrix')
        plt.close(fig)
    except Exception as e:
        print('Did not log Confusion Matrix chart. Error: {}'.format(e))
    def get_confusion_matrix(self, on="test"):
        cm = ConfusionMatrix(self.pipe)
        if on == "test":
            cm.score(self._X_test, self._y_test)
        elif on == "train":
            cm.score(self._X_train, self._y_train)
        elif on == "all":
            cm.score(self.X, self.y)

        # graph the confusion matrix with yellowbrick
        cm.poof()
예제 #9
0
def create_confusion_matrix_chart(classifier, X_train, X_test, y_train,
                                  y_test):
    """Create confusion matrix.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The classification target for training
        y_test (:obj:`ndarray`):
            | The classification target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/confusion_matrix'] = \
                npt_utils.create_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Confusion Matrix chart. Error: {}'.format(e))

    return chart
예제 #10
0
def confusion_matrix(model, classes, X_train, Y_train, X_test, Y_test):
    from yellowbrick.classifier import ConfusionMatrix
    iris_cm = ConfusionMatrix(model,
                              classes=classes,
                              label_encoder={
                                  0: classes[0],
                                  1: classes[1]
                              })

    iris_cm.fit(X_train, Y_train)
    iris_cm.score(X_test, Y_test)

    iris_cm.poof()
def yellowbrick_visualizations(model, classes, X_tr, y_tr, X_te, y_te):
    visualizer = ConfusionMatrix(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ClassificationReport(model, classes=classes, support=True)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()

    visualizer = ROCAUC(model, classes=classes)
    visualizer.fit(X_tr, y_tr)
    visualizer.score(X_te, y_te)
    visualizer.show()
예제 #12
0
 def confusion_matrix(self, class_name_dict=None) -> None:
     """Plot a confusion matrix
     """
     cm = ConfusionMatrix(self.trained_model,
                          classes=list(class_name_dict.keys()),
                          label_encoder=class_name_dict)
     cm.fit(self.X_train, self.y_train)
     cm.score(self.X_test, self.y_test)
     save_dir = f"{self.plots_dir}/confusion_matrix_{self.model_id}.png"
     cm.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/confusion_matrix_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
예제 #13
0
def confusion(dataset):
    if dataset == "iris":
        data = load_iris()
    elif dataset == "digits":
        data = load_digits()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(data.data,
                                           data.target,
                                           test_size=0.2)
    oz = ConfusionMatrix(LogisticRegression(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "confusion_matrix_{}".format(dataset))
def find_best_k(X_train, y_train, X_test, y_test, min_k=1, max_k=25):
    '''Trains K-Nearest Neighbors classifier on passed training and
    testing subsets, for every odd k value between min_k and max_k.
    Returns evaluation metrics resulting from classifier with
    optimal k value, and a confusion matrix.

    Args:
        X_train (ndarray): Train subset X (data)
        y_train (Series): Train subset y (predictions)
        X_test (ndarray): Test subset X (data)
        y_test (Series): Test subset y (predictions)
        min_k (int): Minimum value for best K
        max_k (int): Maximum value for best K

    Returns:
        string: "Best Value for k: {}"
        string: "Accuracy: {}"
        string: "Precision: {}"
        string: "Recall: {}"
        string: "F1-Score: {}"
        plot: Confusion Matrix

    '''
    best_k = 0
    best_score = 0.0
    for k in range(min_k, max_k + 1, 2):
        # Instantiate KNeighborsClassifier
        knn = KNeighborsClassifier(n_neighbors=k)
        # Fit the classifier
        knn.fit(X_train, y_train)
        # Predict on the test set
        preds = knn.predict(X_test)
        accuracy = accuracy_score(y_test, preds)
        precision = precision_score(y_test, preds, average='macro')
        recall = recall_score(y_test, preds, average='macro')
        f1 = f1_score(y_test, preds, average='macro')
        if f1 > best_score:
            best_k = k
            best_score = f1
    cm = ConfusionMatrix(knn, classes=y_train.unique())
    cm.score(X_test, y_test)
    print("Best Value for k: {}".format(best_k))
    print("Accuracy: {}".format(round(accuracy, 3)))
    print("Precision: {}".format(round(precision, 3)))
    print("Recall: {}".format(round(recall, 3)))
    print("F1-Score: {}".format(round(best_score, 3)))
    plt.tight_layout()
    cm.show()
예제 #15
0
    def plot_classifier_metrics(self):

        fig, axes = plt.subplots(2, 2, figsize=(12, 8))

        visualgrid = [
            ConfusionMatrix(self.clf, ax=axes[0][0]),
            ClassificationReport(self.clf, ax=axes[0][1]),
            ROCAUC(self.clf, ax=axes[1][0]),
        ]
        fig.delaxes(axes[1, 1])
        for viz in visualgrid:
            viz.fit(self.X_train, self.y_train)
            viz.score(self.X_test, self.y_test)
            viz.finalize()
        plt.savefig('../docs/metrics_classifier.png')
        plt.show()
예제 #16
0
def CM(pipe, X, y):
    delinq_cm = ConfusionMatrix(pipe,
                                classes=['Current', 'Delinquent'],
                                label_encoder={
                                    0: 'Current',
                                    1: 'Delinquent'
                                },
                                is_fitted=True,
                                percent=True)
    delinq_cm.score(X, y)
    delinq_cm.show()
    print(delinq_cm.confusion_matrix_)
    print(
        classification_report(y,
                              pipe.predict(X),
                              target_names=['Current', 'Delinquent'],
                              digits=4))
예제 #17
0
    def __init__(self,
                 X_train,
                 X_test,
                 y_train,
                 y_test,
                 labels,
                 model,
                 viz_selection,
                 upsampled=False):
        """
        Class for yellowbrick classifier visualizer

        Args:
            X_train: numpy ndarray of model features training data values
            X_test: numpy ndarray of model features test data values
            y_train: numpy ndarray of model target variable training data values
            y_test: numpy ndarray of model target variable test data values
            labels: list of class labels for binary classification
            model: sklearn estimator for classification
            viz_selection: string value used to reference yellowbrick classification visualizer
            upsampled: binary value to determine to which subdirectory output image should be saved

        """

        self.labels = labels
        self.model = model
        self.viz_selection = viz_selection
        self.upsampled = upsampled
        self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test

        if self.viz_selection == 'ClassificationReport':
            self.visualizer = ClassificationReport(self.model,
                                                   classes=self.labels,
                                                   support=True)
        elif self.viz_selection == 'ROCAUC':
            self.visualizer = ROCAUC(self.model,
                                     classes=self.labels,
                                     support=True)
        elif self.viz_selection == 'PrecisionRecallCurve':
            self.visualizer = PrecisionRecallCurve(self.model)
        elif self.viz_selection == 'ConfusionMatrix':
            self.visualizer = ConfusionMatrix(model, classes=self.labels)
        else:
            return print(
                "Error: viz_selection does not match accepted values. View Visualizer Class for accepted values."
            )
def draw_plots():
    classifier = MultinomialNB(alpha=0.01)

    for technique in ["base", "SMOTE", "ADASYN", "text-aug"]:
        X_train, X_test, y_train, y_test = get_baseline_split(representation="bow")
        if technique == "base":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = X_train, X_test, y_train, y_test
        elif technique == "SMOTE":
            X_plot_train, y_plot_train = smote.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "ADASYN":
            X_plot_train, y_plot_train = adasyn.run(X_train, y_train)
            X_plot_test, y_plot_test = X_test, y_test
        elif technique == "text-aug":
            X_plot_train, X_plot_test, y_plot_train, y_plot_test = text_augmentation.run(
                books_df=get_fully_processed_books_df(),
                representation="bow")
        else:
            raise Exception()

        # ROC micro average
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=False)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # ROC - Per Class
        viz_roc = ROCAUC(classifier, classes=get_selected_genres(), micro=True, per_class=True)
        viz_roc.fit(X_plot_train, y_plot_train)  # Fit the training data to the viz_roc
        viz_roc.score(X_plot_test, y_plot_test)  # Evaluate the model on the test data
        viz_roc.show()  # Finalize and show the figure

        # Class Prediction Error
        viz_pred_err = ClassPredictionError(classifier, classes=get_selected_genres())
        viz_pred_err.fit(X_plot_train, y_plot_train)
        viz_pred_err.score(X_plot_test, y_plot_test)
        viz_pred_err.show()

        # The ConfusionMatrix
        cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])
        cm.fit(X_plot_train, y_plot_train)
        cm.score(X_plot_test, y_plot_test)
        cm.show()
예제 #19
0
def plot_confusion_matrix(model:sklearn.base.BaseEstimator,
                          X_train: np.ndarray,
                          X_test: np.ndarray,
                          y_train: np.ndarray,
                          y_test: np.ndarray):
    """
    Plots confusion matrix for given model and train/test data.
    Inputs:
        model: an sklearn classifier
        X_train: training examples
        X_test: test examples
        y_train: training labels corresponding to examples in X_train
        y_test: test labels corresponding to examples in X_test
    Returns: None
    """
    model_cm = ConfusionMatrix(model)
    model_cm.fit(X_train, y_train)
    model_cm.score(X_test, y_test)
    model_cm.poof()
예제 #20
0
def classifier_report(classifier, X_test, y_test):
    classes = np.unique(y_test)
    cm = ConfusionMatrix(classifier, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    cm.fit(X_test, y_test)
    cm.score(X_test, y_test)
    filename = classifier.__class__.__name__ + '_confusion_matrix.png'
    cm.poof(outpath=filename,
            clear_figure=True,
            kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact(filename)
    visualizer = ClassificationReport(classifier,
                                      classes=classes,
                                      support=True)
    visualizer.fit(X_test, y_test)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath="classification_report.png",
                    clear_figure=True,
                    kwargs=dict(transparent=False, dpi=80, inches='tight'))
    ex.add_artifact('classification_report.png')
예제 #21
0
    def store_experiment_data(self, X_test, y_test):
        class_report = ClassificationReport(self.model)
        score = class_report.score(X_test, y_test)
        class_report.poof(
            'metrics/classification_report.png', clear_figure=True)
        self.ex.add_artifact('metrics/classification_report.png')

        confustion_matrix = ConfusionMatrix(self.model)
        confustion_matrix.score(X_test, y_test)
        confustion_matrix.poof(
            'metrics/confusion_matrix.png', clear_figure=True)
        self.ex.add_artifact('metrics/confusion_matrix.png')

        cpd = ClassPredictionError(self.model)
        cpd.score(X_test, y_test)
        cpd.poof('metrics/class_prediction_error.png', clear_figure=True)
        self.ex.add_artifact('metrics/class_prediction_error.png')

        print('score=', score)
        self.ex.log_scalar('score', score)
예제 #22
0
    def get_confusion_matrix(self, on="test"):
        """
        Produces a confusion matrix made through the yellowbrick package.

        Input
        -----
        on : string (default=test)
            Determines which set of data to score and create a confusion matrix on.
            Default is 'test', meaning it will make a confusion matrix of the test results. 
            'train' and 'all' are alternative values. 
        """

        cm = ConfusionMatrix(self.pipe)
        if on == "test":
            cm.score(self._X_test, self._y_test)
        elif on == "train":
            cm.score(self._X_train, self._y_train)
        elif on == "all":
            cm.score(self._X, self._y)

        # graph the confusion matrix with yellowbrick
        cm.poof()
def plot_confusion_matrix (X_train, y_train, X_test, y_test, model, encoder):
    """
    Function to plot a confusion matrix
    :param X_train: training set
    :param y_train: training set target
    :param X_test: test set
    :param y_test: test set target
    :param model: model to test performance for
    :param encoder:
    :return: Confusion matrix plot
    """
    encoder = encoder
    
    # The ConfusionMatrix visualizer taxes a model
    cm = ConfusionMatrix(model, encoder=encoder)

    # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
    cm.fit(X_train, y_train)

    # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
    # and then creates the confusion_matrix from scikit-learn.
    cm.score(X_test, y_test)

    cm.show();
예제 #24
0
                                      '12 Choices'))

# make dummy variable for allergy column
df = pd.get_dummies(df, columns=['allergy'])

# define feature matrix and target variable
X = df[['choice_confidence', 'allergy_No']]
y = df['num_choices']

# split and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

model = LogisticRegression()

# produce confusion matrix
cm = ConfusionMatrix(model)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
cm.poof()

# calculate accuracy of model
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# (Drew)
# dummy variables for categorical food data for prediction models to make numerical variables
one_hot = pd.get_dummies(df['dinner_choice'])
df = df.drop('Timestamp', axis= 1)
df = df.drop('age', axis = 1)
df = df.drop('date', axis = 1)
예제 #25
0
            vetor[:, i] = labelencoder.fit_transform(vetor[:, i])


labelEncoder(previsores)

X_treino, X_teste, y_treino, y_teste = train_test_split(previsores,
                                                        classe,
                                                        test_size=0.3,
                                                        random_state=0)

naive_bayes = GaussianNB()
naive_bayes.fit(X_treino, y_treino)

previsoes = naive_bayes.predict(X_teste)
confusao = confusion_matrix(y_teste, previsoes)
taxa_acerto = accuracy_score(y_teste, previsoes)

v = ConfusionMatrix(GaussianNB())
v.fit(X_treino, y_treino)
v.score(X_teste, y_teste)
v.poof()

novo_credito = pd.read_csv('NovoCredit.csv')
novo_credito = novo_credito.iloc[:, 0:20].values
labelEncoder(novo_credito)

nova_previsao = naive_bayes.predict(novo_credito)

print()
print('Seu novo cliente e: {} pagador'.format(nova_previsao[0]))
print()
예제 #26
0
print('X matrix dimensionality:', X.shape)
print('Y vector dimensionality:', Y.shape)

# split the data into a training set and a test set
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(
    X, Y, test_size=0.20, random_state=10)
# print("X_train: ", X_train.shape)
# print("X_validation: ", X_validation.shape))
# print("Y_train: ", Y_train.shape))
# print("Y_validation: ", Y_validation.shape))

gaussianNB = GaussianNB()

cm = ConfusionMatrix(
    gaussianNB,
    classes="A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z".split(','))

# train the model using the training sets
cm.fit(X_train, Y_train)

cm.score(X_validation, Y_validation)

# predict the responses for test dataset
predictions = cm.predict(X_validation)

# accuracy classification score
print("Accuracy: ", accuracy_score(Y_validation, predictions))

# compute confusion matrix
print(confusion_matrix(Y_validation, predictions))
예제 #27
0
#Criação de uma variável com variável de resposta(y)
classe = dados.iloc[:, 1].values

#Aqui iremos transformar as colunas categóricas em colunas numéricas
labelencoder = LabelEncoder()
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])

#Aqui hávera a divisão dos dados para treinamento e teste 
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0)

#Criação do algoritmo da floresta randômica (Random Forest), juntamente com o treinamento do algoritmo
floresta = RandomForestClassifier(criterion = 'gini', random_state = 0)
floresta.fit(X_treinamento, y_treinamento)

#Faz as previsões da variável teste.
previsoes = floresta.predict(X_teste)

#Gera uma variável com a matriz de confusão
confusao = confusion_matrix(y_teste, previsoes)

#Gera duas variáveis com as taxas de acertos e erros da floresta randômica (Random Forest)
taxa_acerto = accuracy_score(y_teste, previsoes)
taxa_erro = 1 - taxa_acerto

#Gera o imagem da matriz de confusão
v = ConfusionMatrix(floresta)
v.fit(X_treinamento, y_treinamento)
v.score(X_teste, y_teste)
v.poof()

'''Obs: Como constatado, a taxa de acerto foi 80%, aproximadamente, com 30% de dados para teste.''' 
예제 #28
0
def score_model_outcome(X_train, y_train, X_test, y_test, model, **kwargs):
    """ A function that returns the different metrics of accuracy, confusion matrix and other model reports depending on the type of model that is asked.
    
    This function is for prognosis

    Parameters
    ----------
    X_train: matrix of training features
    
    y_train: vector of training labels
    
    X_test: matrix of test features
    
    y_test: vector of test labels

    Returns
    -------
    
    - Accuracy, F1 score and ROC_AUC for the train and test set
    
    - Confusion matrix
    
    - ClassificationReport
    
    - PrecisionRecallCurve
    
    - ClassPredictionError
    
    """

    # Train the model
    model.fit(X_train, y_train, **kwargs)

    # Predict on the train set
    prediction_train = model.predict(X_train)

    # Compute metrics for the train set
    accuracy_train = accuracy_score(y_train, prediction_train)

    # False Positive Rate, True Positive Rate, Threshold
    fpr_train, tpr_train, thresholds_train = roc_curve(y_train,
                                                       prediction_train)
    auc_train = auc(fpr_train, tpr_train)

    f1_score_train = f1_score(y_train, prediction_train)

    # Predict on the test set
    prediction_test = model.predict(X_test)

    accuracy_test = accuracy_score(y_test, prediction_test)

    fpr_test, tpr_test, thresholds_test = roc_curve(y_test, prediction_test)
    auc_test = auc(fpr_test, tpr_test)

    f1_score_test = f1_score(y_test, prediction_test)

    print("{}:".format(model.__class__.__name__))
    # Compute and return F1 (harmonic mean of precision and recall)
    print(
        "On training we get an Accuracy {}, an AUC {} and F1 score {} ".format(
            accuracy_train, auc_train, f1_score_train))

    print("For test we get an Accuracy {}, an AUC {} and F1 score {}".format(
        accuracy_test, auc_test, f1_score_test))

    fig, axes = plt.subplots(3, 2, figsize=(20, 20))

    visualgrid = [
        ConfusionMatrix(model,
                        ax=axes[0][0],
                        classes=['Death', 'Survival'],
                        cmap="YlGnBu"),
        ClassificationReport(
            model,
            ax=axes[0][1],
            classes=['Death', 'Survival'],
            cmap="YlGn",
        ),
        PrecisionRecallCurve(model, ax=axes[1][0]),
        ClassPredictionError(model,
                             classes=['Death', 'Survival'],
                             ax=axes[1][1]),
    ]

    for viz in visualgrid:
        viz.fit(X_train, y_train)
        viz.score(X_test, y_test)
        viz.finalize()

    try:
        roc_auc(model,
                X_train,
                y_train,
                X_test=X_test,
                y_test=y_test,
                classes=['Death', 'Survival'],
                ax=axes[2][0])
    except:
        print('Can plot ROC curve for this model')

    try:
        viz = FeatureImportances(model,
                                 ax=axes[2][1],
                                 stack=True,
                                 relative=False)
        viz.fit(X_train, y_train)
        viz.score(X_test, y_test)
        viz.finalize()
    except:
        print('Don\'t have feature importance')

    plt.show()
    print('\n')
예제 #29
0
previsores = dados.iloc[:, [0, 3]].values

#Criação de uma variável com variável de resposta(y)
classe = dados.iloc[:, 1].values

#Aqui iremos transformar as colunas categóricas em colunas numéricas
labelencoder = LabelEncoder()
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])

#Fazendo a padronização dos atributos previsores
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

#Aqui hávera a divisão dos dados para treinamento e teste 
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(previsores, classe, test_size = 0.3, random_state = 0)

#Criação do classificador SVM e treinamento do algoritmo
svm = SVC(random_state = 1, C = 2.0, gamma = 'auto')
svm.fit(X_treinamento, y_treinamento)
previsoes = svm.predict(X_teste)

#Visalização da acurácia e matriz de confusão
precisao = accuracy_score(y_teste, previsoes)
matriz = confusion_matrix(y_teste, previsoes)

v = ConfusionMatrix(svm)
v.fit(X_treinamento, y_treinamento)
v.score(X_teste, y_teste)
v.poof()

'''Obs: Como constatado, a taxa de acerto do modelo foi 62.2%, aproximadamente, com 30% de dados para teste.'''
예제 #30
0
classes = ["non-seizure", "seizure"]
#viz_classification_report= ClassificationReport(gb_classifier , classes=classes)
#viz_classification_report.fit(X_train, y_train)  # Fit the visualizer and the model
#viz_classification_report.score(X_test, y_test)  # Evaluate the model on the test data
#c = viz_classification_report.poof()

# Instantiate the visualizer with the classification model
#viz_ROC = ROCAUC(gb_classifier, classes=classes)
#viz_ROC.fit(X_train, y_train)  # Fit the training data to the visualizer
#viz_ROC.score(X_test, y_test)  # Evaluate the model on the test data
#g = viz_ROC.poof()             # Draw/show/poof the data

# The ConfusionMatrix visualizer taxes a model
cm = ConfusionMatrix(gb_classifier,
                     classes=classes,
                     label_encoder={
                         0: 'non-seizure',
                         1: 'seizure'
                     })
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
c = cm.poof()
plt.tight_layout()

#Latency
X_test_latency = df_seizures_power_22_states[
    df_seizures_power_22_states.columns[:-2]]
latency = gb_classifier.predict(X_test_latency)

latency_test = pd.concat(
    [df_seizures_power_22_states['State'],
     pd.Series(latency)], axis=1)