Пример #1
0
def number_pred(save, show):
    y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
    y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
    cm = ConfusionMatrix(y_true, y_pred)

    # print(cm.binarize(100).P)
    # cm.enlarge(300)
    # cm.enlarge([300, 400])

    print(cm)

    cm.plot()
    filename = 'numbers.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    # print("")

    # print(cm.classes)

    # print("")

    # cm.print_stats(None)
    cm.print_stats()
def test_pandas_confusion_cm_stats_integers():
    y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
    y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
    print("y_true: %s" % y_true)
    print("y_pred: %s" % y_pred)
    cm = ConfusionMatrix(y_true, y_pred)
    assert isinstance(cm.stats(), OrderedDict)
    cm.print_stats()
def test_value_counts():
    df = pd.DataFrame({
        'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190],
        'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100],
        'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'],
        'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'],
    })
    cm = ConfusionMatrix(df["Size"], df["SizePred"])
    assert (cm.true - df.Size.value_counts()).sum() == 0
    assert (cm.pred - df.SizePred.value_counts()).sum() == 0
    cm.print_stats()
def test_pandas_confusion_cm_stats_animals():
    y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
    y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
    print("y_true: %s" % y_true)
    print("y_pred: %s" % y_pred)
    cm = ConfusionMatrix(y_true, y_pred)
    assert isinstance(cm.stats(), OrderedDict)
    assert cm.population == len(y_true)  # 12
    cm.print_stats()
    cm_stats = cm.stats()  # noqa
    assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
    assert cm.binarize("cat").TP == 3
    assert cm.binarize("dog").TP == cm.get("dog")  # 1
    assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3
Пример #5
0
def size_pred(save, show):
    df = pd.DataFrame({
        'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190],
        'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100],
        'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'],
        'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'],
    })
    cm = ConfusionMatrix(df["Size"], df["SizePred"])
    print(cm)

    cm.print_stats()

    cm.plot()
    filename = 'size.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()
Пример #6
0
neigh = neigh.fit(X, y)
y_predicted_train = neigh.predict_proba(X)  #predicted class for training set

#obtain optimal probability threshold for classification
maxrev = 0
final_threshold = 0.5
for x in xrange(1, 100):
    thresh = 0.01 * x
    predicted_y_train = np.array(
        [1 if x > thresh else 0 for x in list(y_predicted_train[:, 1])])
    cmatrix = confusion_matrix(y, predicted_y_train)
    newROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * (
        -15) + cmatrix[1, 0] * (-30)
    if newROI > maxrev:
        maxrev = newROI
        final_threshold = thresh

y_predicted_test = neigh.predict_proba(
    X_test)  #predicted probability for test set
predicted_y_test = np.array([
    1 if x > final_threshold else 0 for x in list(y_predicted_test[:, 1])
])  #apply threshold to classify the test set

#obtain relevant statistics
cm = ConfusionMatrix(y_test, predicted_y_test)
cm.print_stats()
acc = accuracy_score(y_test, predicted_y_test)
cmatrix = confusion_matrix(y_test, predicted_y_test)
ROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * (
    -15) + cmatrix[1, 0] * (-30)
def main(save, show):
    basepath = os.path.dirname(__file__)

    # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    # y_true = [2, 0, 2, 2, 0, 1]
    # y_pred = [0, 0, 2, 2, 0, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
    y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
    cm = ConfusionMatrix(y_true, y_pred)

    # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"])
    # array([[2, 0, 0],
    #       [0, 0, 1],
    #       [1, 0, 2]])
    # cm = ConfusionMatrix(y_true, y_pred)

    print("Confusion matrix:\n%s" % cm)
    df = cm.to_dataframe()
    print(df)
    print(df.dtypes)

    cm.plot()
    filename = 'cm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.plot(normalized=True)
    filename = 'cm_norm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.print_stats()
    print(cm.classification_report)

    print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    # stat = 'precision'
    # print(cm._avg_stat(stat))
    # print(cm.ACC)

    # import seaborn as sns
    # cm.plot(normalized=True, backend=Backend.Seaborn)
    # sns.plt.show()

    print("Binarize a confusion matrix")
    y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    cm = ConfusionMatrix(y_true, y_pred)
    print(cm)
    binary_cm = cm.binarize(['ant', 'cat'])
    # A bird is not a "land_animal"
    print(binary_cm)
Пример #8
0
    votes = dict.fromkeys(alpha, 0) # reset dictionary for next test case
    y.append(count) # add classfication to array for confusion matrix
    if count == tcf.index[i]: #if the vote matches the known value of the target incremement correct
        t_correct += 1

# ***************************************************************************************************
# **************************** Creates and Displays Confusion Matrix ********************************
# ***************************************************************************************************

# uses pandas_confusion library to generate confusion matrix
print '\n\nConfusion Matrix:\n\n'

print '\tAccuracy is: ', m.ceil(float(t_correct) / 10000 * 100), '\n\n'


y_actul = pd.Series(y_true, name='Actual')
y_pred = pd.Series(y, name='Predicted')

confusion1 = ConfusionMatrix(y_actul, y_pred)
#
confusion1.print_stats()

confusion2 = pd.crosstab(y_actul, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)

print confusion2

print confusion_matrix(y_actul, y_pred)



Пример #9
0
    y.append(count)  # add classfication to array for confusion matrix
    if count == tcf.index[
            i]:  #if the vote matches the known value of the target incremement correct
        t_correct += 1

# ***************************************************************************************************
# **************************** Creates and Displays Confusion Matrix ********************************
# ***************************************************************************************************

# uses pandas_confusion library to generate confusion matrix
print '\n\nConfusion Matrix:\n\n'

print '\tAccuracy is: ', m.ceil(float(t_correct) / 10000 * 100), '\n\n'

y_actul = pd.Series(y_true, name='Actual')
y_pred = pd.Series(y, name='Predicted')

confusion1 = ConfusionMatrix(y_actul, y_pred)
#
confusion1.print_stats()

confusion2 = pd.crosstab(y_actul,
                         y_pred,
                         rownames=['Actual'],
                         colnames=['Predicted'],
                         margins=True)

print confusion2

print confusion_matrix(y_actul, y_pred)
Пример #10
0
def main(save, show):
    basepath = os.path.dirname(__file__)

    # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    # y_true = [2, 0, 2, 2, 0, 1]
    # y_pred = [0, 0, 2, 2, 0, 2]
    # cm = ConfusionMatrix(y_true, y_pred)
    # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"])

    y_true = [
        'rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit',
        'rabbit', 'cat', 'dog', 'rabbit'
    ]
    y_pred = [
        'cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit',
        'cat', 'rabbit', 'rabbit'
    ]
    cm = ConfusionMatrix(y_true, y_pred)

    # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"])
    # array([[2, 0, 0],
    #       [0, 0, 1],
    #       [1, 0, 2]])
    # cm = ConfusionMatrix(y_true, y_pred)

    print("Confusion matrix:\n%s" % cm)
    df = cm.to_dataframe()
    print(df)
    print(df.dtypes)

    cm.plot()
    filename = 'cm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.plot(normalized=True)
    filename = 'cm_norm.png'
    if save:
        plt.savefig(os.path.join(basepath, '..', 'screenshots', filename))
    if show:
        plt.show()

    cm.print_stats()
    print(cm.classification_report)

    print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    # stat = 'precision'
    # print(cm._avg_stat(stat))
    # print(cm.ACC)

    # import seaborn as sns
    # cm.plot(normalized=True, backend=Backend.Seaborn)
    # sns.plt.show()

    print("Binarize a confusion matrix")
    y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
    y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
    cm = ConfusionMatrix(y_true, y_pred)
    print(cm)
    binary_cm = cm.binarize(['ant', 'cat'])
    # A bird is not a "land_animal"
    print(binary_cm)
Пример #11
0
# and record votes for which letter perceptron returns
for letter in letters_list_testing:
    # text = "\rTesting instance "+str((letter_increment)+1)+"/"+str(len(letters_list_testing))
    # sys.stdout.write(text)

    # collect perceptron votes to build confusion matrix
    # collect_votes runs perceptron for instances of letters in the testing data set
    # returns the winning letter by vote to store into predicted
    predicted = collect_votes(letter)
    #print letter.value[0], predicted

    # append to confusion matrix using pandas
    y_pred = y_pred.append(pd.Series(predicted, index=[letter_increment]))
    y_actu = y_actu.append(pd.Series(letter.value[0], index=[letter_increment]))

    # append pandas_confusion
    y_pred_stats.append(predicted)
    y_actu_stats.append(letter.value[0])

    # increment counter for next letter
    letter_increment += 1

# make confusion matrix using pandas
df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print df_confusion

# make confusion matrix and print stats using pandas_confusion
cm = ConfusionMatrix(y_actu_stats, y_pred_stats)
# print("Confusion matrix:\n%s" % cm)
cm.print_stats()
Пример #12
0
    def benchmark(self, clf):
        """
        Prints out results of all classifier used

        Parameters
        ----------
        clf : The classifier to benchmark (MultinonialNB and Ber...)
        returns:
           clf_descr, score, train_time, test_time
                the classifier description, score, training time and testing time to plot
        """
        print('_' * 80)
        print("Training: ")
        print(clf)
        t0 = time()
        clf.fit(self.X_train, self.y_train
                )  # fit the classifier with the features/ train the classifier
        train_time = time() - t0
        print("train time: %0.3fs" % train_time)  # get the duration

        t0 = time()
        pred = clf.predict(self.X_test)  # perform prediction
        print("Predictions: ", pred)
        test_time = time() - t0
        print("Test time:  %0.3fs" %
              test_time)  # show estimated time for prediction

        score = metrics.accuracy_score(
            self.y_test, pred)  # calculate the accuracy on the test file
        print("Accuracy:   %0.3f" % score)

        if hasattr(clf, 'coef_'):
            print("dimensionality: %d" %
                  clf.coef_.shape[1])  # prints the dimentionality of the data
            print("density: %f" % density(clf.coef_))

            print("top 10 keywords per category:"
                  )  # gets the Top10 features per category
            for i, category in enumerate(self.categories):
                top10 = np.argsort(clf.coef_[i])[-10:]
                print("%s: %s" %
                      (category, " ".join(self.feature_names[top10])))
            print()

        # prints the classification report for a regular test
        print("Classification report:")
        print(
            metrics.classification_report(self.y_test,
                                          pred,
                                          target_names=self.categories))

        # prints the confusion matrix for a regular test
        #print("Confusion matrix:")
        #print(metrics.confusion_matrix(self.y_test, pred))
        #y_actu = pd.Series(self.y_test, name='Actual')
        #y_pred = pd.Series(pred, name='Predicted')
        #df_confusion = pd.crosstab(y_actu, y_pred)
        #print (df_confusion)
        cm = ConfusionMatrix(self.y_test, pred)
        cm.print_stats()

        print()
        clf_descr = str(clf).split('(')[0]

        # if split_data is enabled, perform all forms of cross validation
        if self.split_data:

            processes = [self.kfoldCV, self.shuffleCV,
                         self.recurCV]  # our cross validations
            #processes = [self.kfoldCV]
            for p in processes:

                # use multiprocessing to make computation faster
                # ============= K-Fold Validation =============
                # ============ Shuffle Split cross validation (learning Curve) ================
                # ============ Recursive feature elimination ================
                self.process = multiprocessing.Process(target=p, args=(clf, ))
                self.process.start()

        return clf_descr, score, train_time, test_time  # return for regular splitting between test and training file