Пример #1
0
def estimateAccuracy(model, limit):
    asTrain, asTest = split("../data/train.csv", limit)
     
    model.fit(asTrain)
   
    testY = [ x.Y for x in asTest ]
    testPredictions = model.predict(asTest)
  
    print("%f" % (accuracy_score(testY, testPredictions)))
    
    print confusion_matrix(testY, testPredictions)
Пример #2
0
def main():

    X, y = get_data()

    X_train_std, X_test_std, y_train, y_test = stand_train_test(X, y)
    '''
    ---hard margin svm ---
    '''

    svc = LinearSVC(C=1e9, multi_class='ovr')  ## 几乎就是一个hard svm,如果C 很大

    svc.fit(X_train_std, y_train)

    y_predict = svc.predict(X_train_std)

    cm = confusion_matrix(y_train, y_predict)

    print("confusion_,matrix=", cm, "pre_score=",
          precision_score(y_train, y_predict), "recall_score=",
          recall_score(y_train, y_predict), "f1_score=",
          f1_score(y_train, y_predict))

    plot_decision_boundary(svc, axis=[-3, 3, -3, 3])

    plt.scatter(X_train_std[y_train == 0, 0], X_train_std[y_train == 0, 1])

    plt.scatter(X_train_std[y_train == 1, 0], X_train_std[y_train == 1, 1])

    plt.show()
    '''
    ---soft margin svm ---
    '''
    svc1 = LinearSVC(C=1e-1)  ## 此时就是一个soft svm,有一个蓝色的outlier 被错误的分类

    svc1.fit(X_train_std, y_train)

    y_predict1 = svc1.predict(X_train_std)

    cm = confusion_matrix(y_train, y_predict1)

    print("confusion_,matrix=", cm, "pre_score=",
          precision_score(y_train, y_predict1), "recall_score=",
          recall_score(y_train, y_predict1), "f1_score=",
          f1_score(y_train, y_predict1))

    plot_decision_boundary(svc1, axis=[-3, 3, -3, 3])

    plt.scatter(X_train_std[y_train == 0, 0], X_train_std[y_train == 0, 1])

    plt.scatter(X_train_std[y_train == 1, 0], X_train_std[y_train == 1, 1])

    plt.show()
        def eval_data(data_dev_all, gold_labels):
            dev_batches = batch_iter(data_dev_all,
                                     batch_size_train,
                                     1,
                                     shuffle=False)

            predictions_all = []
            for batch in dev_batches:
                batch_stories, batch_endings1, batch_endings2, batch_labels, _ = zip(
                    *batch)
                batch_stories_padded, batch_stories_seqlen = pad_data_and_return_seqlens(
                    batch_stories)
                batch_endings1_padded, batch_endings1_seqlen = pad_data_and_return_seqlens(
                    batch_endings1)
                batch_endings2_padded, batch_endings2_seqlen = pad_data_and_return_seqlens(
                    batch_endings2)

                res_cost, res_acc, res_pred_y = dev_step(
                    zip(batch_stories_padded, batch_stories_seqlen,
                        batch_endings1_padded, batch_endings1_seqlen,
                        batch_endings2_padded, batch_endings2_seqlen),
                    batch_labels)

                predictions_all.extend(res_pred_y)

            logging.info("Confusion matrix:")
            conf_matrix = confusion_matrix(gold_labels, predictions_all)
            logging.info("\n" + str(conf_matrix))
            res = precision_recall_fscore_support(gold_labels, predictions_all)
            logging.info("precision_recall_fscore_support:%s" % str(res))

            logging.info("accuracy_score:%s" %
                         accuracy_score(gold_labels, predictions_all))
            return res
def qwkappa(y, ypred):
    """Calcula el Quadratic Wweighted Kappa para la clasificación realizada por la red.
    
    :param y: Vector de n elementos con los valores de clase reales de cada patrón.
    :param ypred: Matriz de nxk con las probabilidades de pertenencia o clases predichas de cada patrón a cada clase.
    :return: Valor de QWK para la clasificación realizada.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        cm = confusion_matrix(y, ypred)
        n_class = cm.shape[0]
        costes = np.reshape(np.tile(range(n_class), n_class),
                            (n_class, n_class))
        costes = (costes - costes.T)**2
        f = 1 - costes

        n = cm.sum()
        x = cm / n

        r = x.sum(axis=1)  # Row sum
        s = x.sum(axis=0)  # Col sum
        Ex = r.reshape(-1, 1) * s
        po = (x * f).sum()
        pe = (Ex * f).sum()
        return (po - pe) / (1 - pe)
Пример #5
0
    def binary_class_measures(cls, y_true: list,
                              y_predicted: list) -> OrderedDict:
        """Assessment measures of a classification task with binary
        classes i.e. Fantasy/Non-Fantasy

        Parameters
        ----------
        y_true : list
            Expected class labels in binary form
        y_predicted : list
            Predicted class labels in binary form

        Returns
        -------
        OrderedDict
            An ordered dictionary of assessment measures
        """
        cm = confusion_matrix(y_true, y_predicted)
        tp, fp, fn, tn = cm.flatten()
        measures = OrderedDict()
        measures['accuracy'] = (tp + tn) / (tp + fp + fn + tn)
        measures['specificity'] = tn / (tn + fp)
        measures['sensitivity'] = tp / (tp + fn)
        measures['precision'] = tp / (tp + fp)
        measures['f1score'] = 2 * tp / (2 * tp + fp + fn)
        return measures
Пример #6
0
    def matthews_corrcoef(y_true, y_pred, sample_weight=None):
        from sklearn.metrics.classification import (
            _check_targets,
            LabelEncoder,
            confusion_matrix,
        )

        y_type, y_true, y_pred = _check_targets(y_true, y_pred)
        if y_type not in {'binary', 'multiclass'}:
            raise ValueError('%s is not supported' % y_type)
        lb = LabelEncoder()
        lb.fit(np.hstack([y_true, y_pred]))
        y_true = lb.transform(y_true)
        y_pred = lb.transform(y_pred)
        C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
        t_sum = C.sum(axis=1)
        p_sum = C.sum(axis=0)
        n_correct = np.trace(C)
        n_samples = p_sum.sum()
        cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)
        cov_ypyp = n_samples**2 - np.dot(p_sum, p_sum)
        cov_ytyt = n_samples**2 - np.dot(t_sum, t_sum)
        mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
        if np.isnan(mcc):
            return 0.0
        else:
            return mcc
Пример #7
0
def plot_cm(y_trues, y_preds, normalize=True, cmap=plt.cm.Blues):
    classes = ['SNR', 'AF', 'IAVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE']
    for i, label in enumerate(classes):
        y_true = y_trues[:, i]
        y_pred = y_preds[:, i]
        cm = confusion_matrix(y_true, y_pred)
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        fig, ax = plt.subplots(figsize=(4, 4))
        im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        ax.figure.colorbar(im, ax=ax)
        ax.set(xticks=np.arange(cm.shape[1]),
               yticks=np.arange(cm.shape[0]),
               xticklabels=[0, 1],
               yticklabels=[0, 1],
               title=label,
               ylabel='True label',
               xlabel='Predicted label')
        plt.setp(ax.get_xticklabels(), ha="center")

        fmt = '.3f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j,
                        i,
                        format(cm[i, j], fmt),
                        ha="center",
                        va="center",
                        color="white" if cm[i, j] > thresh else "black")
        np.set_printoptions(precision=3)
        fig.tight_layout()
        plt.savefig(f'results/{label}.png')
        plt.close(fig)
Пример #8
0
def test(model, cross_idx, cross_path, inner_epoch=0, outer_epoch=0, ckp_pth=None, device=None):
	# print()
	model.eval()
	print('============================Meta-test Testing Start============================')

	all_conf_mat = np.zeros((len(lab_m_list), len(lab_m_list)))

	with torch.no_grad():

		with open(cross_path + 'test_set.pkl', 'rb') as f:
			data = pickle.load(f)

		ses_names = list(data.keys())
		for wav_file in ses_names:
			mat, true_y = data.get(wav_file)
			true_y = np.array([true_y])
			mat = torch.tensor(mat).type(torch.FloatTensor).to(device)
			pred_y = model(mat, mode='query').sum(dim=0)
			pred_y = ((pred_y.topk(1))[1]).data.cpu().flatten().tolist()
			confusion_mat = confusion_matrix(true_y, pred_y, labels=[0, 1, 2, 3])
			all_conf_mat += confusion_mat

	all_conf_mat = all_conf_mat.T
	UA_metric = get_UA(all_conf_mat)
	WA_metric = get_WA(all_conf_mat)
	npy_name = ckp_pth + '/' + 'Leave_' + cross_idx + '_Outer_' + str(outer_epoch) + '_Inner_' + str(inner_epoch) + \
			   '_test.npy'
	np.save(npy_name, all_conf_mat)

	print('============================Meta-test Testing Finish============================')
	print()

	return UA_metric, WA_metric
Пример #9
0
def printConfusionMatrix(y_true, y_pred, class_names=None):
    """ Print a confusion matrix similar to R's confusionMatrix """
    confMatrix = classification.confusion_matrix(y_true, y_pred)
    accuracy = classification.accuracy_score(y_true, y_pred)

    print('Confusion Matrix (Accuracy {:.4f})\n'.format(accuracy))
    _printConfusionMatrix(confMatrix, class_names)
def test_model(classifier, X, y):
    y_pred = classifier.predict(X)
    conf_matrix = confusion_matrix(y, y_pred)
    accuracy = accuracy_score(y, y_pred)
    report = classification_report(y, y_pred)
    print(conf_matrix)
    print(report)
    print(accuracy)
Пример #11
0
def do_classification(x_train, y_train, x_test, y_test, gamma_val, c_val):

    classifier = svm.SVC(kernel='rbf', gamma=gamma_val, C=c_val)
    classifier.fit(x_train, y_train)
    predicted = classifier.predict(x_test)
    accuracy = np.mean(y_test == predicted)
    cfm = confusion_matrix(y_test, predicted)
    return accuracy, gamma_val, c_val, cfm
Пример #12
0
def balanced_accuracy_score(y_true, y_pred, balance=0.5):
    """Balanced accuracy classification score.
    The formula for the balanced accuracy score ::
        balanced accuracy = balance * TP/(TP + FP) + (1 - balance) * TN/(TN + FN)
    Because it needs true/false negative/positive notion it only
    supports binary classification.
    The `balance` parameter determines the weight of sensitivity in the combined
    score. ``balance -> 1`` lends more weight to sensitiviy, while ``balance -> 0``
    favors specificity (``balance = 1`` considers only sensitivity, ``balance = 0``
    only specificity).
    Read more in the :ref:`User Guide <balanced_accuracy_score>`.
    Parameters
    ----------
    y_true : 1d array-like, or label indicator array / sparse matrix
        Ground truth (correct) labels.
    y_pred : 1d array-like, or label indicator array / sparse matrix
        Predicted labels, as returned by a classifier.
    balance : float between 0 and 1. Weight associated with the sensitivity
        (or recall) against specificty in final score.
    Returns
    -------
    score : float
    See also
    --------
    accuracy_score
    References
    ----------
    .. [1] `Wikipedia entry for the accuracy and precision
           <http://en.wikipedia.org/wiki/Accuracy_and_precision>`
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import balanced_accuracy_score
    >>> y_pred = [0, 0, 1]
    >>> y_true = [0, 1, 1]
    >>> balanced_accuracy_score(y_true, y_pred)
    0.75
    >>> y_pred = ["cat", "cat", "ant"]
    >>> y_true = ["cat", "ant", "ant"]
    >>> balanced_accuracy_score(y_true, y_pred)
    0.75
    """

    if balance < 0. or 1. < balance:
        raise ValueError("balance has to be between 0 and 1")

    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    if y_type is not "binary":
        raise ValueError("%s is not supported" % y_type)

    cm = confusion_matrix(y_true, y_pred)
    neg, pos = cm.sum(axis=1, dtype='float')
    tn, tp = np.diag(cm)

    sensitivity = tp / pos
    specificity = tn / neg

    return balance * sensitivity + (1 - balance) * specificity
Пример #13
0
def balanced_accuracy_score(y_true, y_pred, balance=0.5):
    """Balanced accuracy classification score.
    The formula for the balanced accuracy score ::
        balanced accuracy = balance * TP/(TP + FP) + (1 - balance) * TN/(TN + FN)
    Because it needs true/false negative/positive notion it only
    supports binary classification.
    The `balance` parameter determines the weight of sensitivity in the combined
    score. ``balance -> 1`` lends more weight to sensitiviy, while ``balance -> 0``
    favors specificity (``balance = 1`` considers only sensitivity, ``balance = 0``
    only specificity).
    Read more in the :ref:`User Guide <balanced_accuracy_score>`.
    Parameters
    ----------
    y_true : 1d array-like, or label indicator array / sparse matrix
        Ground truth (correct) labels.
    y_pred : 1d array-like, or label indicator array / sparse matrix
        Predicted labels, as returned by a classifier.
    balance : float between 0 and 1. Weight associated with the sensitivity
        (or recall) against specificty in final score.
    Returns
    -------
    score : float
    See also
    --------
    accuracy_score
    References
    ----------
    .. [1] `Wikipedia entry for the accuracy and precision
           <http://en.wikipedia.org/wiki/Accuracy_and_precision>`
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import balanced_accuracy_score
    >>> y_pred = [0, 0, 1]
    >>> y_true = [0, 1, 1]
    >>> balanced_accuracy_score(y_true, y_pred)
    0.75
    >>> y_pred = ["cat", "cat", "ant"]
    >>> y_true = ["cat", "ant", "ant"]
    >>> balanced_accuracy_score(y_true, y_pred)
    0.75
    """

    if balance < 0. or 1. < balance:
        raise ValueError("balance has to be between 0 and 1")

    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    if y_type is not "binary":
        raise ValueError("%s is not supported" % y_type)

    cm = confusion_matrix(y_true, y_pred)
    neg, pos = cm.sum(axis=1, dtype='float')
    tn, tp = np.diag(cm)

    sensitivity = tp / pos
    specificity = tn / neg

    return balance * sensitivity + (1 - balance) * specificity
Пример #14
0
def gm(y, ypred):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        cm = confusion_matrix(y, ypred)
        sum_byclass = np.sum(cm,axis=1)
        sensitivities = np.diag(cm)/sum_byclass.astype('double')
        sensitivities[sum_byclass==0] = 1
        gm_result = pow(np.prod(sensitivities),1.0/cm.shape[0])
        return gm_result
Пример #15
0
def ms(y, ypred):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        cm = confusion_matrix(y, ypred)
        sum_byclass = np.sum(cm,axis=1)
        sensitivities = np.diag(cm)/sum_byclass.astype('double')
        sensitivities[sum_byclass==0] = 1
        ms = np.min(sensitivities)

        return ms
Пример #16
0
def evalRes(Y_test, pred, test_labels):
    y_pred = np.argmax(pred, axis=1)
    y_test = np.argmax(Y_test, axis=1)
    print('Classification Report')
    target_names = ['Reading', 'Speaking', 'Watching']
    cnf_matrix = confusion_matrix(y_pred, test_labels)
    df_class_report = pandas_classification_report(y_true=y_test, y_pred=y_pred)
    df_class_report.to_csv('classification_report.csv', sep=',')
    plot_confusion_matrix(cnf_matrix, classes=target_names, normalize=True,
                          title='Normalized confusion matrix')
Пример #17
0
def mmae(y, ypred):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        cm = confusion_matrix(y, ypred)
        n_class = cm.shape[0]
        costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class))
        costes = np.abs(costes - np.transpose(costes))
        errores = costes*cm
        amaes = np.sum(errores,axis=1)/np.sum(cm,axis=1).astype('double')
        amaes = amaes[~np.isnan(amaes)]
        return amaes.max()
    def test_classification(self, test, testlabel,bestmodel):
#        bestmodel=bestmodel
        outputtest = bestmodel.predict(test)
        accuracytest = accuracy_score(testlabel, outputtest)
        print ("The accuracy for the test set is %r" %accuracytest, "and the confusion matrix is")
        print (confusion_matrix(outputtest,testlabel))
        print( classification_report(testlabel, outputtest))
#        probaout=bestmodel.predict_prob(test)
#       probaout= DataFrame(probaout)
#        print probaout
        return outputtest
Пример #19
0
    def calculate(self) -> None:
        """
        Calculates all of the metrics
        (precision, recall, F score and support)
        and stores them
        in the results dictionary.
        Note: This function may eat up a lot of memory
        if it's used on a large file.

        :return:
        """
        print('\nCalculating metrics...')
        ftr_all = []
        fpr_all = []

        gen = generate_tuples_from_file(self.fpath,
                                        encodings=self.encodings,
                                        first_layer=self.first_layer,
                                        batch_size=self.batch_size)

        if tqdm:
            for _ in tqdm(range(self.steps)):
                x, y = next(gen)

                y_pred = self.model.predict_classes(x, verbose=0)
                y_true = y.argmax(2)

                ftr, fpr = self._score(y_true, y_pred)
                ftr_all.extend(ftr)
                fpr_all.extend(fpr)
        else:
            print('[!] For progress logging during metrics calculation '
                  'install tqdm.')
            for _ in range(self.steps):
                x, y = next(gen)

                y_pred = self.model.predict_classes(x, verbose=0)
                y_true = y.argmax(2)

                ftr, fpr = self._score(y_true, y_pred)
                ftr_all.extend(ftr)
                fpr_all.extend(fpr)

        confusion = confusion_matrix(ftr_all, fpr_all)
        p, r, f, s = precision_recall_fscore_support(ftr_all, fpr_all)

        self.results = {
            'confusion_matrix': confusion,
            'precision': p,
            'recall': r,
            'fscore': f,
            'f1mean': np.mean(f),
            'support': s
        }
 def test_classification(self, test, testlabel, bestmodel):
     #        bestmodel=bestmodel
     outputtest = bestmodel.predict(test)
     accuracytest = accuracy_score(testlabel, outputtest)
     print("The accuracy for the test set is %r" % accuracytest,
           "and the confusion matrix is")
     print(confusion_matrix(outputtest, testlabel))
     print(classification_report(testlabel, outputtest))
     #        probaout=bestmodel.predict_prob(test)
     #       probaout= DataFrame(probaout)
     #        print probaout
     return outputtest
        def eval_data(sess,
                      data_dev_all,
                      gold_labels,
                      save_features=False,
                      save_features_file="file.features.pickle"):
            dev_batches = batch_iter(data_dev_all,
                                     batch_size_train,
                                     1,
                                     shuffle=False)

            overal_loss = 0
            steps_cnt = 0
            ids_all = []
            predictions_all = []
            res_feats_all = []
            for batch in dev_batches:
                batch_stories, batch_endings1, batch_endings2, batch_labels, batch_ids = zip(
                    *batch)
                batch_stories_padded, batch_stories_seqlen = pad_data_and_return_seqlens(
                    batch_stories)
                batch_endings1_padded, batch_endings1_seqlen = pad_data_and_return_seqlens(
                    batch_endings1)
                batch_endings2_padded, batch_endings2_seqlen = pad_data_and_return_seqlens(
                    batch_endings2)

                res_cost, res_acc, res_pred_y, res_feats = dnn_model.dev_step(
                    sess,
                    zip(batch_stories_padded, batch_stories_seqlen,
                        batch_endings1_padded, batch_endings1_seqlen,
                        batch_endings2_padded, batch_endings2_seqlen),
                    batch_labels)

                steps_cnt += 1
                overal_loss += res_cost
                predictions_all.extend(res_pred_y)
                res_feats_all.extend(res_feats)
                ids_all.extend(batch_ids)

            prec_rec_f_supp = precision_recall_fscore_support(
                gold_labels, predictions_all)
            conf_matrix = confusion_matrix(gold_labels, predictions_all)
            overall_accuracy = accuracy_score(gold_labels, predictions_all)
            overal_loss = overal_loss / steps_cnt

            if save_features:
                write_feats = open(save_features_file, "wb")
                pickle.dump(res_feats_all, write_feats)
                write_feats.close()
                # DataUtilities_ROCStories.save_data_to_json_file(res_feats_all, output_json_file=save_features_file)
                logging.info("Features saved to file: %s" % save_features_file)

            return prec_rec_f_supp, overal_loss, overall_accuracy, predictions_all, ids_all, conf_matrix
Пример #22
0
def main():
    load_dataset_mnist("../libs")
    mndata = MNIST('../libs/data_mnist', gz=True)
    weight_path = "nn_weights.pkl"
    training_phase = weight_path not in os.listdir(".")
    if training_phase:
        images, labels = mndata.load_training()
        images, labels = preprocess_data(images, labels)
        epochs = 10
        batch_size = 64
        learning_rate = 0.01

        optimizer = Adam(learning_rate)
        loss_func = CrossEntropy()
        archs = [
            InputLayer(),
            FCLayer(num_neurons=100, weight_init="he_normal"),
            ActivationLayer(activation="relu"),
            DropoutLayer(keep_prob=0.8),
            FCLayer(num_neurons=125, weight_init="he_normal"),
            ActivationLayer(activation="relu"),
            DropoutLayer(keep_prob=0.8),
            FCLayer(num_neurons=50, weight_init="he_normal"),
            BatchNormLayer(),
            ActivationLayer(activation="relu"),
            FCLayer(num_neurons=labels.shape[1], weight_init="he_normal"),
            ActivationLayer(activation="softmax"),
        ]
        nn = NeuralNetwork(optimizer=optimizer,
                           layers=archs,
                           loss_func=loss_func)

        trainer = Trainer(nn, batch_size, epochs)
        trainer.train(images, labels)
        trainer.save_model("nn_weights.pkl")
    else:
        import pickle
        images_test, labels_test = mndata.load_testing()
        images_test, labels_test = preprocess_data(images_test,
                                                   labels_test,
                                                   test=True)
        with open(weight_path, "rb") as f:
            nn = pickle.load(f)
        pred = nn.predict(images_test)

        print("Accuracy:", len(pred[labels_test == pred]) / len(pred))
        from sklearn.metrics.classification import confusion_matrix

        print("Confusion matrix: ")
        print(confusion_matrix(labels_test, pred))
Пример #23
0
def run_grid_search(grid_search, show_evaluation=True):
    """ Run the GridSearch algorithm and compute evaluation metrics """
    X_train, X_test, y_train, y_test = split_dataset()

    grid_search.fit(X_train, y_train)
    # for key, value in grid_search.cv_results_.items():
    #     print key, value

    predictions = grid_search.predict(X_test)

    if show_evaluation:
        logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro"))
        logger.debug(precision_recall_fscore_support(y_test, predictions))
        logger.debug(confusion_matrix(y_test, predictions))
Пример #24
0
def classificationSummary(y_true, y_pred, class_names=None):
    """ Provide a comprehensive summary of classification performance similar to R's confusionMatrix """
    confMatrix = classification.confusion_matrix(y_true, y_pred)
    TP = confMatrix[0, 0]
    FP = confMatrix[1, 0]
    TN = confMatrix[1, 1]
    FN = confMatrix[0, 1]
    N = TN + TP + FN + FP
    sensitivity = TP / (TP + FN)
    specificity = TN / (TN + FP)
    prevalence = (TP + FN) / N 
    PPV = TP / (TP + FP)
    NPV = TN / (TN + FN)
    BAC = (sensitivity + specificity) / 2
    
    metrics = [
        ('Accuracy', classification.accuracy_score(y_true, y_pred)),
        ('95% CI', None),
        ('No Information Rate', None),
        ('P-Value [Acc > NIR]', None),
        (None, None),
        ('Kappa', classification.cohen_kappa_score(y_true, y_pred)),
        ("Mcnemar's Test P-Value", None),
        (None, None),
        ('Sensitivity', sensitivity),
        ('Specificity', specificity),
        ('Pos Pred Value', PPV),
        ('Neg Pred Value', NPV),
        ('Prevalence', prevalence),
        ('Detection Rate', None),
        ('Detection Prevalence', None),
        ('Balanced Accuracy', BAC),
        ]

    print('Confusion Matrix and Statistics\n')
    _printConfusionMatrix(confMatrix, class_names)
    if len(set(y_true)) < 5:
        print(classification_report(y_true, y_pred, digits=4))
    
    fmt1 = '{{:>{}}} : {{:.3f}}'.format(max(len(m[0]) for m in metrics if m[0] is not None))
    fmt2 = '{{:>{}}} : {{}}'.format(max(len(m[0]) for m in metrics if m[0] is not None))
    for metric, value in metrics:
        if metric is None:
            print()
        elif value is None:
            pass
            # print(fmt2.format(metric, 'missing'))
        else:
            print(fmt1.format(metric, value))
Пример #25
0
def KNN(X_train, X_test, y_train, y_test):
    print("training data shape: ", X_train.shape)
    print("################# KNN #################")
    model = KNeighborsClassifier(n_neighbors=9)
    
    scores = sklearn.model_selection.cross_val_score(model, X_train, y_train, cv=KFold(n_splits=10, shuffle=True), scoring='accuracy')
    print("KNN cross-validation Accuracy: %0.2f" % scores.mean())
    
    model.fit(X_train, y_train)
    test_predict = model.predict(X_test)
    print("report for KNN: ")
    report = sklearn.metrics.classification_report(y_test, test_predict, digits=4)
    print(report)
    print("KNN overall accuracy: " + str(sklearn.metrics.accuracy_score(y_test, test_predict)))
    print(confusion_matrix(y_test, test_predict))
Пример #26
0
def evalRes(pred, test_labels, y_testMultiClass, name):
    y_pred = np.argmax(pred, axis=1)
    y_test = test_labels
    target_names = ['Reading', 'Speaking', 'Watching']
    cnf_matrix = confusion_matrix(y_pred, test_labels)
    df_class_report = pandas_classification_report(y_true=y_test,
                                                   y_pred=y_pred)
    df_class_report.to_csv(folder + name + 'classification_report.csv',
                           sep=',')
    plot_confusion_matrix(cnf_matrix,
                          classes=target_names,
                          normalize=True,
                          title='',
                          name=name)
    plot_ROC(pred, y_testMultiClass, name)
Пример #27
0
def plot_confusion_matrix2(y_true,
                           y_pred,
                           labels,
                           ymap=None,
                           figsize=(10, 10)):
    """
    Generate matrix plot of confusion matrix with pretty annotations.
    The plot image is saved to disk.
    args:
      y_true:    true label of the input, with shape (nsamples,)
      y_pred:    prediction of the input, with shape (nsamples,)
      filename:  filename of figure file to save
      labels:    string array, name the order of class labels in the confusion matrix.
                 use `clf.classes_` if using scikit-learn models.
                 with shape (nclass,).
      ymap:      dict: any -> string, length == nclass.
                 if not None, map the labels & ys to more understandable strings.
                 Caution: original y_true, y_pred and labels must align.
      figsize:   the size of the figure plotted.
    """
    if ymap is not None:
        y_pred = [ymap[yi] for yi in y_pred]
        y_true = [ymap[yi] for yi in y_true]
        labels = [ymap[yi] for yi in labels]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum.astype(float) * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if i == j:
                s = cm_sum[i]
                annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
            elif c == 0:
                annot[i, j] = ''
            else:
                annot[i, j] = '%.1f%%\n%d' % (p, c)
    cm = pd.DataFrame(cm, index=labels, columns=labels)
    cm.index.name = 'Actual'
    cm.columns.name = 'Predicted'
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(cm, annot=annot, fmt='', ax=ax)
    # plt.savefig(filename)
    plt.show()
def evalRes(X_test, Y_test, pred, test_labels):
    y_pred = np.argmax(pred, axis=1)
    y_test = np.argmax(Y_test, axis=1)
    print('Classification Report')
    target_names = ['Reading', 'Speaking', 'Watching']
    print(classification_report(y_pred, y_test, target_names=target_names))
    print('Confusion Matrix')
    cnf_matrix = confusion_matrix(y_pred, test_labels)
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=target_names,
                          title='Confusion matrix, without normalization')
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=target_names,
                          normalize=True,
                          title='Normalized confusion matrix')
    plt.show()
Пример #29
0
def wkappa(y, ypred):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        cm = confusion_matrix(y, ypred)
        n_class = cm.shape[0]
        costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class))
        costes = np.abs(costes - np.transpose(costes))
        f = 1 - costes

        n = cm.sum()
        x = cm/n

        r = x.sum(axis=1) # Row sum
        s = x.sum(axis=0) # Col sum
        Ex = r.reshape(-1, 1) * s
        po = (x * f).sum()
        pe = (Ex * f).sum()
        return (po - pe) / (1 - pe)
Пример #30
0
def classificationSummary(y_true, y_pred, class_names=None):
    """ Print a summary of classification performance
    
    Input:
        y_true: actual values
        y_pred: predicted values
        class_names (optional): list of class names
    """
    confusionMatrix = classification.confusion_matrix(y_true, y_pred)
    accuracy = classification.accuracy_score(y_true, y_pred)

    print('Confusion Matrix (Accuracy {:.4f})\n'.format(accuracy))

    # Pretty-print confusion matrix
    cm = confusionMatrix

    labels = class_names
    if labels is None:
        labels = [str(i) for i in range(len(cm))]

    # Convert the confusion matrix and labels to strings
    cm = [[str(i) for i in row] for row in cm]
    labels = [str(i) for i in labels]

    # Determine the width for the first label column and the individual cells
    prediction = 'Prediction'
    actual = 'Actual'
    labelWidth = max(len(s) for s in labels)
    cmWidth = max(max(len(s) for row in cm for s in row), labelWidth) + 1
    labelWidth = max(labelWidth, len(actual))

    # Construct the format statements
    fmt1 = '{{:>{}}}'.format(labelWidth)
    fmt2 = '{{:>{}}}'.format(cmWidth) * len(labels)

    # And print the confusion matrix
    print(fmt1.format(' ') + ' ' + prediction)
    print(fmt1.format(actual), end='')
    print(fmt2.format(*labels))

    for cls, row in zip(labels, cm):
        print(fmt1.format(cls), end='')
        print(fmt2.format(*row))
Пример #31
0
def scores(y_test, predictions, pp, clf):
    print()
    if pp == 'Y':
        print('Scores After Preprocessing :')
    else:
        print('Scores Before Preprocessing :')
    print('Classifier = {clf}'.format(clf=clf))
    print('Accuracy score = {accuracy}'.format(
        accuracy=accuracy_score(y_test, predictions)))
    print('Precision score = {precision}'.format(
        precision=precision_score(y_test, predictions)))
    print('Recall score = {recall}'.format(
        recall=recall_score(y_test, predictions)))
    print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions)))
    print('ROC AUC = {roc_auc}'.format(
        roc_auc=roc_auc_score(y_test, predictions)))
    print(confusion_matrix(y_test, predictions))
    print(classification_report(y_test, predictions))
    print()
Пример #32
0
def evaluate(y_pred, y_test):
    perf= matthews_corrcoef(y_test, y_pred)

    print("Prediction score:%s" % perf)
    tn,fp,fn,tp=confusion_matrix(y_test, y_pred).ravel()
    print("True negatives:%s" % tn)
    print("True positives: %s" % tp)
    print("False negatives: %s" % fn)
    print("False positives: %s" % fp)
    
    nn= tn + fp
    np= tp + fn
    
    ratio_tp = float(tp)/float(np) #Proche de 1 si on a bien prédit que ça échouait au test
    ratio_tn = float(tn)/float(nn) #Proche de 1 si on a bien prédit que ça passait le test
    ratio_fp = float(fp)/float(np+nn) #Proche de 0 si on se loupe pas
    ratio_fn = float(fn)/float(nn+nn) #Proche de 0 si on se loupe pas
    print("Ratio TP sur Nbre Pos:%s, Ratio TN sur Nbre N:%s, Ratio FP sur NTotal:%s, Ratio FN sur NTotal:%s" % (ratio_tp, ratio_tn, ratio_fp,ratio_fn))
    
    return perf
Пример #33
0
def print_metrics(y_true, y_pred):
    print('auc:', roc_auc_score(y_true, y_pred))
    print('accuracy:', classification.accuracy_score(y_true, y_pred))

    confusion_matrix = classification.confusion_matrix(y_true, y_pred)
    # print('confusion matrix:')
    # print('report:', classification.classification_report(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix.ravel()
    sensitivity = tp / (tp + fn)
    print('sensitivity: {}'.format(sensitivity))
    specificity = tn / (tn + fp)
    print('specificity: {}'.format(specificity))
    print('precision: {}'.format(tp / (tp + fp)))
    total_acc = (tp + tn) / (tp + tn + fp + fn)
    random_acc = (((tn + fp) * (tn + fn) + (fn + tp) * (fp + tp)) /
                  (tp + tn + fp + fn)**2)
    kappa = (total_acc - random_acc) / (1 - random_acc)
    print('Cohen\'s kappa: {}'.format(kappa))
    youdens = sensitivity - (1 - specificity)
    print('Youden\'s index: {}'.format(youdens))
    print('log loss:', classification.log_loss(y_true, y_pred))
                # model = LogisticRegression(C=subsample, verbose=0, penalty='l1', max_iter=100)
                # model = KNeighborsClassifier(n_neighbors=learning_rate)
                # model = xgb.XGBRegressor(max_depth=depth, n_estimators=n_estimators, learning_rate=learning_rate,
                #                          nthread=1, subsample=subsample, silent=True, colsample_bytree=0.8)
                # model = LinearSVC(C=0.9, penalty='l2', dual=False, verbose=1, max_iter=100000)

                model.fit(trtrfe, trtrtrue)
                # mean accuracy on the given test data and labels
                predicted = [math.floor(x) for x in model.predict(trtefe)]

                score = model.score(trtefe, trtetrue)
                print("score =", score)

                print(classification_report(trtetrue, predicted))
                print(confusion_matrix(trtetrue, predicted))

                if score > best_score or True:
                    best_model = model
                    best_score = score

                    best_model.fit(train_features, train_true)
                    predicted = [math.floor(x) for x in best_model.predict(test_features)]
                    fname = "data/net_result/sol_" + str(score) + "_" + str(time.time()) + ".csv"
                    write_sol(predicted, fname)
                    print("this model", depth, "\t", subsample, "\t", score)
                    print("best model", best_score)

best_model.fit(trtefe, trtetrue)
predicted = [math.floor(x) for x in best_model.predict(test_features)]
write_sol(predicted, "data/net_result/sol.csv")