def test_pandas_confusion_normalized():
    y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    cm = ConfusionMatrix(y_true, y_pred)
    df = cm.to_dataframe()
    df_norm = cm.to_dataframe(normalized=True)
    assert(df_norm.sum(axis=1).sum() == len(df))
def test_pandas_confusion_normalized_issue1():
    # should insure issue 1 is fixed
    # see http://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054#31720054

    y_true = ['business', 'business', 'business', 'business', 'business',
              'business', 'business', 'business', 'business', 'business',
              'business', 'business', 'business', 'business', 'business',
              'business', 'business', 'business', 'business', 'business']

    y_pred = ['health', 'business', 'business', 'business', 'business',
              'business', 'health', 'health', 'business', 'business', 'business',
              'business', 'business', 'business', 'business', 'business',
              'health', 'health', 'business', 'health']

    cm = ConfusionMatrix(y_true, y_pred)
    df = cm.to_dataframe()
    df_norm = cm.to_dataframe(normalized=True)
    assert(df_norm.sum(axis=1, skipna=False).fillna(1).sum() == len(df))
Пример #3
0
def train_test_and_evaluate(pipeline, X_train, y_train, X_test, y_test):
    pipeline.fit(X_train, y_train)
    y_pred_class = pipeline.predict(X_test)
    unique_label = np.unique(y_test)
    matrix = ConfusionMatrix(y_test,
                             y_pred_class,
                             labels=['True Value', 'Predicted Value'])
    print('-' * 75 + '\nConfusion Matrix\n')
    print(matrix)
    print('f1_score', f1_score(y_test, y_pred_class, average="macro"))
    print('precision', precision_score(y_test, y_pred_class, average="macro"))
    print('recall', recall_score(y_test, y_pred_class, average="macro"))

    return pipeline, matrix.to_dataframe(), y_pred_class
def get_confusion_matrix(_results_file):
    df = pd.read_csv(results_file, sep='\t', header=None)
    true_lbls = df[1]
    pred_lbls = df[2]
    confusion_matrix = ConfusionMatrix(true_lbls, pred_lbls)
    confusion_matrix.plot()
    cm_file = _results_file.replace('.txt', '_cm.jpg')
    plt.savefig(cm_file)

    print()
    print(confusion_matrix)
    print()
    cm = confusion_matrix.to_dataframe()
    correct = 0
    for i in range(cm.shape[0]):
        correct += cm.iloc[i][i]
        recall = cm.iloc[i][i] * 100 / cm.sum(axis=0)[i]
        prec = cm.iloc[i][i] * 100 / cm.sum(axis=1)[i]
        print('Class %s recall = %.4f precision = %.4f' %
              (cm.columns[i], recall, prec))
    print('Overall accuracy = %.4f' %
          float(correct * 100 / sum(cm.sum(axis=0))))
def main(save, show):
    basepath = os.path.dirname(__file__)

    # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    # y_true = [2, 0, 2, 2, 0, 1]
    # y_pred = [0, 0, 2, 2, 0, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
    y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
    cm = ConfusionMatrix(y_true, y_pred)

    # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"])
    # array([[2, 0, 0],
    #       [0, 0, 1],
    #       [1, 0, 2]])
    # cm = ConfusionMatrix(y_true, y_pred)

    print("Confusion matrix:\n%s" % cm)
    df = cm.to_dataframe()
    print(df)
    print(df.dtypes)

    cm.plot()
    filename = 'cm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.plot(normalized=True)
    filename = 'cm_norm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.print_stats()
    print(cm.classification_report)

    print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    # stat = 'precision'
    # print(cm._avg_stat(stat))
    # print(cm.ACC)

    # import seaborn as sns
    # cm.plot(normalized=True, backend=Backend.Seaborn)
    # sns.plt.show()

    print("Binarize a confusion matrix")
    y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    cm = ConfusionMatrix(y_true, y_pred)
    print(cm)
    binary_cm = cm.binarize(['ant', 'cat'])
    # A bird is not a "land_animal"
    print(binary_cm)
Пример #6
0
def main(save, show):
    basepath = os.path.dirname(__file__)

    # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    # y_true = [2, 0, 2, 2, 0, 1]
    # y_pred = [0, 0, 2, 2, 0, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    y_true = [
        'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit',
        'rabbit', 'cat', 'dog', 'rabbit'
    ]
    y_pred = [
        'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit',
        'cat', 'rabbit', 'rabbit'
    ]
    cm = ConfusionMatrix(y_true, y_pred)

    # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"])
    # array([[2, 0, 0],
    #       [0, 0, 1],
    #       [1, 0, 2]])
    # cm = ConfusionMatrix(y_true, y_pred)

    print("Confusion matrix:\n%s" % cm)
    df = cm.to_dataframe()
    print(df)
    print(df.dtypes)

    cm.plot()
    filename = 'cm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.plot(normalized=True)
    filename = 'cm_norm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.print_stats()
    print(cm.classification_report)

    print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    # stat = 'precision'
    # print(cm._avg_stat(stat))
    # print(cm.ACC)

    # import seaborn as sns
    # cm.plot(normalized=True, backend=Backend.Seaborn)
    # sns.plt.show()

    print("Binarize a confusion matrix")
    y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    cm = ConfusionMatrix(y_true, y_pred)
    print(cm)
    binary_cm = cm.binarize(['ant', 'cat'])
    # A bird is not a "land_animal"
    print(binary_cm)
plt.plot(c, cv_scores, '-o')
plt.xscale('log')


# In[ ]:

predicted = clf.predict(X_test)
expected = y_test
print(accuracy_score(expected, predicted))


# In[ ]:

predicted_probs = clf.predict_proba(X_test)
print(log_loss(y_test, predicted_probs))


# In[ ]:

cm = ConfusionMatrix(expected, predicted)
cm_stats = cm.to_dataframe().apply(lambda x: x/sum(x), axis=1)
cm_stats.to_csv('data/confusion_matrix_stats.csv')


# In[ ]:

mpl.rcParams['figure.figsize'] = (10.0, 5.0)
cm.plot(normalized=True)