Exemplo n.º 1
0
def get_results(results, instance_of_datasets, classifier_name, y_true, y_pred, file_dump):
    tmp_ = {"y_pred": y_pred,
                                                      "y_true": y_true,
                                                      "accuracy": accuracy_score(y_true, y_pred),
                                                      "precision_micro": precision_score(y_true, y_pred,
                                                                                         average="micro"),
                                                      "precision_macro": precision_score(y_true, y_pred,
                                                                                         average="macro"),
                                                      "recall_micro": recall_score(y_true, y_pred, average="micro"),
                                                      "recall_macro": recall_score(y_true, y_pred, average="macro"),
                                                      "f1_micro": f1_score(y_true, y_pred, average="micro"),
                                                      "f1_macro": f1_score(y_true, y_pred, average="macro")
                                                      }

    cPickle.dump(tmp_, gzip.open("%s/single_%s_%s_%s.zcp"%(dir_results,file_dump,instance_of_datasets, classifier_name), "wb+"))
    results[instance_of_datasets][classifier_name]=tmp_
    print(classifier_name,
          "accuracy", results[instance_of_datasets][classifier_name]["accuracy"],
          "f1 score_micro", results[instance_of_datasets][classifier_name]["f1_micro"],
          "precision_micro", results[instance_of_datasets][classifier_name]["precision_micro"],
          "recall_micro", results[instance_of_datasets][classifier_name]["recall_micro"],
          "f1 score_macro", results[instance_of_datasets][classifier_name]["f1_macro"],
          "precision_macro", results[instance_of_datasets][classifier_name]["precision_macro"],
          "recall_macro", results[instance_of_datasets][classifier_name]["recall_macro"]
          )
    cPickle.dump(results, gzip.open(dir_results+"/"+file_dump, "wb+"))
    return results
Exemplo n.º 2
0
def calc_fit(model, metric, train_x, train_y, test_x, test_y, p):
    train_x = map(lambda x: list(compress(x, p)), train_x)
    test_x = map(lambda x: list(compress(x, p)), test_x)
    clf = model.fit(train_x, train_y)
    predictions = clf.predict(test_x)
    if metric == 'precision': return precision_score(test_y, predictions, [0, 1])
    elif metric == 'recall': return recall_score(test_y, predictions, [0, 1])
    elif metric == 'accuracy': return accuracy_score(test_y, predictions, [0, 1])
    return precision_score(test_y, predictions, [0, 1]) + recall_score(test_y, predictions, [0, 1]) + accuracy_score(test_y, predictions, [0, 1])
Exemplo n.º 3
0
def get_score(a, b_max):
    a_max = np.argmax(a, axis=-1)
    acc = accuracy_score(a_max, b_max)
    p = precision_score(a_max, b_max, average='macro')
    r = recall_score(a_max, b_max, average='macro')
    f1 = f1_score(a_max, b_max, average='macro')
    return acc, p, r, f1
Exemplo n.º 4
0
def metric_permission_based_outlier(scores, marks, target_labels, title=None):
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    for i in range(len(target_labels)):
        label_i = target_labels[i]

        scores_i, y_true = [], []
        for j in range(len(scores)):
            if marks[j][i] != 0:
                scores_i.append(scores[j][i])
                y_true.append(1 if marks[j][i] == 1 else 0)

        pk, rk = [], []
        for k in range(1, len(y_true)):
            y_predict = get_label_n(y_true, scores_i, k)
            pk.append(precision_score(y_true, y_predict))
            rk.append(recall_score(y_true, y_predict))

        n = sum(y_true) - 1
        if 0 <= n < len(pk):
            # print(y_true)j
            # print(scores_i)
            print('{}@{}/{}'.format(label_i, n, len(scores_i)), pk[n], rk[n], roc_auc_score(y_true, scores_i))
        else:
            print('{}@{}/{}'.format(label_i, n, len(scores_i)), 0.0, 0.0, 0.0)

        if title is not None:
            fp_save = os.path.join('results_weighted', title)
            plot_curve('{}_{}_precision'.format(title, label_i), 'precision', list(range(1, len(y_true))), pk,
                       path_save=fp_save + '_{}_precision.pdf'.format(label_i))
            plot_curve('{}_{}_recall'.format(title, label_i), 'recall', list(range(1, len(y_true))), rk,
                       path_save=fp_save + '_{}_recall.pdf'.format(label_i))
Exemplo n.º 5
0
def metric_overall_outlier(scores, weights, marks, title=None):
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    y_true = []
    weighted_scores = []
    for i in range(len(scores)):
        score = 0.0
        for w, s, m in zip(weights[i], scores[i], marks[i]):
            score += w * s

        # print(1 if 'n' in marks[i] else 0, score, scores[i], weights[i], marks[i])
        weighted_scores.append(score)
        y_true.append(1 if 1 in marks[i] else 0)

    pk, rk = [], []
    for k in range(1, len(y_true)):
        y_predict = get_label_n(y_true, weighted_scores, k)
        pk.append(precision_score(y_true, y_predict))
        rk.append(recall_score(y_true, y_predict))
    n = sum(y_true)
    print('overall@{}'.format(n), len(y_true), pk[n], rk[n], roc_auc_score(y_true, weighted_scores))

    if title is not None:
        fp_save = os.path.join('results', 'overall_' + title)
        # plot_curve('overall_{}_precision'.format(title), 'precision', list(range(1, len(y_true))), pk,
        #            fp_save=fp_save + '_precision.pdf')
        # plot_curve('overall_{}_recall'.format(title), 'recall', list(range(1, len(y_true))), rk,
        #            fp_save=fp_save + '_recall.pdf')
        plot_precision_recall(
            '', list(range(1, len(y_true))), pk, rk, path_save=fp_save + '.pdf'
        )
 def Predict(self, inp, labels, classifier, folds, name, paramdesc):
     X= inp
     y = labels
     X, y = X[y != 2], y[y != 2]
     n_samples, n_features = X.shape
     
     ###############################################################################
     # Classification and ROC analysis
     
     # Run classifier with cross-validation and plot ROC curves
     cv = StratifiedKFold(y, n_folds=folds)
     
     mean_tpr = 0.0
     mean_fpr = np.linspace(0, 1, 100)
     all_tpr = []
     
     _precision = 0.0
     _recall = 0.0
     _accuracy = 0.0
     _f1 = 0.0
     
     for i, (train, test) in enumerate(cv):
         probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
         pred_ = classifier.predict(X[test])
         _precision += precision_score(y[test], pred_)
         _recall += recall_score(y[test], pred_)
         _accuracy += accuracy_score(y[test], pred_)
         _f1 += f1_score(y[test], pred_)
         # Compute ROC curve and area the curve
         fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
         mean_tpr += interp(mean_fpr, fpr, tpr)
         mean_tpr[0] = 0.0
         roc_auc = auc(fpr, tpr)
         plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
     
     _precision /= folds
     _recall /= folds
     _accuracy /= folds
     _f1 /= folds
     
     
     plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
     
     mean_tpr /= len(cv)
     mean_tpr[-1] = 1.0
     mean_auc = auc(mean_fpr, mean_tpr)
     plt.plot(mean_fpr, mean_tpr, 'k--',
              label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)
     
     plt.xlim([-0.05, 1.05])
     plt.ylim([-0.05, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver operating characteristic - {0}'.format(name))
     plt.legend(loc="lower right")
     plt.savefig(self.configObject['outputdir'] + '/' + name + '.png')
     plt.close()
     
     result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) 
     Announce(result)
Exemplo n.º 7
0
def run():
    paras = create_dataset()

    X = np.array(get_features(paras))
    Y = np.array(get_ys(paras))

    skf = StratifiedKFold(Y, n_folds=10)

    f = open('results/correct.txt', 'w')
    f2 = open('results/wrong.txt', 'w')

    accs = []
    precs = []
    recs = []
    f1s = []

    for train_index, test_index in skf:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        cv = CountVectorizer()
        X_train_counts = cv.fit_transform(X_train)

        tf_transformer = TfidfTransformer(use_idf=True).fit(X_train_counts)
        X_train_tfidf = tf_transformer.transform(X_train_counts)

        clf = DummyClassifier(strategy="most_frequent").fit(
            X_train_counts, y_train)

        X_test_counts = cv.transform(X_test)
        X_test_tfidf = tf_transformer.transform(X_test_counts)

        y_pred = clf.predict(X_test_counts)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        accs.append(acc)
        precs.append(prec)
        recs.append(rec)
        f1s.append(f1)

        print 'Acc \t %s' % acc
        print 'Prec \t %s' % prec
        print 'Recall \t %s' % rec
        print 'F1 \t %s' % f1

        for para, (y_t, y_p) in zip(X_test, zip(y_test, y_pred)):
            if y_t == y_p:
                f.write('%s\n' % para)
            else:
                f2.write('%s\n' % para)

    print 'Avg Acc \t %s \t ' % np.mean(accs)
    print 'Avg Prec \t %s' % np.mean(precs)
    print 'Avg Recall \t %s' % np.mean(recs)
    print 'Avg F1 \t %s' % np.mean(f1s)
Exemplo n.º 8
0
 def metrics(y_true, y_predict):
     logger.info("计算分类指标...")
     F_value = f1_score(y_true, y_predict, average="weighted")
     Recall_value = recall_score(y_true, y_predict, average="weighted")
     Precision_value = precision_score(y_true,
                                       y_predict,
                                       average="weighted")
     return F_value, Recall_value, Precision_value
Exemplo n.º 9
0
def metric_permission_based_outlier(scores, marks, target_permissions, title=None):
    """Metric and print permission based outlier scores, i.e., precision/recall and AUC value.

    :param scores:
        List, scores(i, j) of each widget(i) in each permission(j).
    :param marks:
        List, outlier marks(i, j) of each widget(i) in each permission(j).
        The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier).
    :param target_permissions:
        List of string, the `j`th permission name.
    :param title:
        String, file name used to save the plot, `None` means not to save.

    :return: None
    """
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    for i in range(len(target_permissions)):
        permission_i = target_permissions[i]

        # sort scores in each permission
        scores_i, y_true = [], []
        for j in range(len(scores)):
            if marks[j][i] != 0:
                scores_i.append(scores[j][i])
                y_true.append(1 if marks[j][i] == 1 else 0)

        # no positive or negative labels
        if sum(y_true) == len(scores_i) or sum(y_true) == 0:
            print('{}({}/{}), error'.format(
                permission_i, sum(y_true), len(scores_i)
            ))
            continue

        # compute precision, recall curve and auc value
        pk, rk = [], []
        for k in range(1, len(y_true)):
            y_predict = get_label_n(y_true, scores_i, k)
            pk.append(precision_score(y_true, y_predict))
            rk.append(recall_score(y_true, y_predict))
        auc = roc_auc_score(y_true, scores_i)

        # print top-k precision, recall, and AUC value
        k = sum(y_true)
        print('{}({}/{}), p/r: {}, AUC: {}'.format(
            permission_i, k, len(scores_i), round(pk[k - 1], 4), round(auc, 4)
        ))

        # save plot
        if title is not None:
            path_save = os.path.join('{}-{}.pdf'.format(title, permission_i))
            plot_precision_recall(
                permission_i, list(range(1, len(y_true))), pk, rk, path_save
            )
    def by_class_evaluation(attack_test_y,
                            target_y,
                            p,
                            attack_test_x,
                            labels=None):
        if labels is None:
            labels = np.unique(target_y)

        precisions = [
            precision_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        accuracies = [
            accuracy_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        f1_scores = [
            f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        recalls = [
            recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        c_train_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 1)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 1)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]
        c_test_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 0)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 0)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]

        x = PrettyTable()
        x.float_format = '.2'
        x.add_column("Class", labels)
        x.add_column('Target Accuracy Train', np.round(c_train_accs, 2))
        x.add_column('Target Accuracy Test', np.round(c_test_accs, 2))
        x.add_column("Attack Precision", np.round(precisions, 2))
        x.add_column("Attack Accuracy", np.round(accuracies, 2))
        x.add_column("Attack Recall", np.round(recalls, 2))
        x.add_column("Attack F-1 Score", np.round(f1_scores, 2))
        x.add_column(
            "Percentage of Data",
            np.round(
                np.array([
                    len(target_y[target_y == c]) / len(target_y) * 100
                    for c in np.unique(target_y)
                ]), 2))
        print(x.get_string(title='Per Class Evaluation'))
Exemplo n.º 11
0
def classifier_evaluation(ytrue, ypred):
    """function compute key performance metrics
    """
    from sklearn.metrics.classification import (accuracy_score,
                                                precision_score, recall_score)

    return {
        "accuracy_score": accuracy_score(ytrue, ypred),
        "precision_score": precision_score(ytrue, ypred),
        "recall_score": recall_score(ytrue, ypred)
    }
Exemplo n.º 12
0
    def __evaluate(self, modelFactory, x, y):
        """
        Perform the cross validation
        :param modelFactory: a factory that builds a model
        :param x: the evaluation data
        :param y: the evaluation classes
        """

        #Creating KFold
        kf = KFold(self.folds, shuffle=True, random_state=None)
        print(
            "=============================" + str(self.folds) +
            "-fold Cross-Validation training and testing ============================= \n"
        )
        i = 1
        # If the number of classes is not given, use the classes that we have
        if not self.numClasses:
            self.numClasses = len(set(y))
        # A list of results to be used to see how well the model is doing over the folds
        tableResults = []
        #Loop through the folds separation of data
        for trainIndex, testIndex in kf.split(x):
            # print(type(trainIndex))
            # Build a model adapter using a factory
            model = modelFactory.create()
            # A print to see if it is ok
            print(" ============== Fold ", i, "============")
            trainDocs, testDocs = x[trainIndex], x[testIndex]
            trainCats, testCats = y[trainIndex], y[testIndex]
            # If we want the categories to be represented as a binary array, here is were we do that
            #TODO: Categorical class error representation on valuating the classes returned by the model
            # Using the adapter to fit our model
            model.fit(trainDocs,
                      trainCats,
                      epochs=self.epochs,
                      batch_size=len(trainIndex))
            # Predicting it
            pred = model.predict(testDocs, testCats)
            print(pred)
            # Getting the scores
            accuracy = accuracy_score(testCats, pred)
            recall = recall_score(testCats, pred, average='weighted')
            precision = precision_score(testCats, pred, average='weighted')
            f1 = f1_score(testCats, pred, average='weighted')
            #Appending it to the result table
            tableResults.append({
                'result': 'result',
                'accuracy': accuracy,
                'recall': recall,
                'precision': precision,
                'f1': f1
            })
            i += 1
        self.tableResults = tableResults
Exemplo n.º 13
0
def show_metrics(model, X_enc, y_enc, show_confusion=False):
    pr = model.predict_classes(X_enc)
    yh = y_enc.argmax(2)
    fyh, fpr = decode_results(yh, pr)
    print('Accuracy:', accuracy_score(fyh, fpr))
    print('F1:', f1_score(fyh, fpr, average='weighted'))
    print('Precision (per class: %s)' % labels)
    print(precision_score(fyh, fpr, average=None))
    print('Recall (per class: %s)' % labels)
    print(recall_score(fyh, fpr, average=None))

    if show_confusion:
        print('Confusion matrix:')
        print(confusion_matrix(fyh, fpr))
Exemplo n.º 14
0
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test):
    train_start = datetime.now()
    model.fit(X_train, Y_train)
    train_duration_sec = (datetime.now() - train_start).seconds

    test_start = datetime.now()
    Y_pred = model.predict(X_test)
    test_duration_sec = (datetime.now() - test_start).seconds

    accuracy = accuracy_score(Y_test, Y_pred)
    precision = precision_score(Y_test, Y_pred, average="weighted")
    recall = recall_score(Y_test, Y_pred, average="weighted")
    return dict(accuracy=float(accuracy),
                precision=float(precision),
                recall=float(recall),
                train_duration_sec=train_duration_sec,
                test_duration_sec=test_duration_sec)
Exemplo n.º 15
0
def myaccuracy(raw_file, result_file):
    df = pd.read_csv(result_file,
                     sep='\t',
                     header=None,
                     names=['pred_0', 'pred_1'])
    test_df = pd.read_csv(raw_file,
                          sep='\t',
                          header=None,
                          names=['idx', 'question', 'relation', 'label'])

    df["pred"] = df.apply(lambda row: func(row["pred_1"], row["pred_0"]),
                          axis=1)
    f1 = f1_score(y_true=test_df.label, y_pred=df.pred)
    acc = accuracy_score(y_true=test_df.label, y_pred=df.pred)
    p = precision_score(y_true=test_df.label, y_pred=df.pred)
    r = recall_score(y_true=test_df.label, y_pred=df.pred)
    # print("accuracy: ", acc)
    # print("precision: ", p)
    # print("recall: ", r)
    # print("f1: ", f1)

    # df['idx'] = test_df.idx.map(lambda x: x.split('-')[0])
    df["idx"] = test_df.idx
    df["group_sort"] = df["pred_1"].groupby(df["idx"]).rank(ascending=0,
                                                            method="dense")
    df["candidate"] = test_df.relation

    # test_df['idx'] = test_df.idx.map(lambda x: x.split('-')[0])

    df.drop_duplicates(subset=['idx', 'group_sort'],
                       keep='first',
                       inplace=True)
    true_relation = test_df.loc[test_df["label"] == 1]
    pred_relation = df.loc[(df["group_sort"] == 1.0)]

    # print(pred_relation.tail())
    # print(true_relation.tail())
    new_df = pd.merge(true_relation, pred_relation, how="inner")
    new_df["correct"] = new_df.apply(
        lambda row: row["relation"] == row["candidate"], axis=1)
    c = new_df.loc[new_df["correct"] == True]
    correct = c.idx.count()
    total = new_df.idx.count()
    print("my_accuracy: {}, {}/{}".format(correct / total, correct, total))
Exemplo n.º 16
0
def scores(y_test, predictions, pp, clf):
    print()
    if pp == 'Y':
        print('Scores After Preprocessing :')
    else:
        print('Scores Before Preprocessing :')
    print('Classifier = {clf}'.format(clf=clf))
    print('Accuracy score = {accuracy}'.format(
        accuracy=accuracy_score(y_test, predictions)))
    print('Precision score = {precision}'.format(
        precision=precision_score(y_test, predictions)))
    print('Recall score = {recall}'.format(
        recall=recall_score(y_test, predictions)))
    print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions)))
    print('ROC AUC = {roc_auc}'.format(
        roc_auc=roc_auc_score(y_test, predictions)))
    print(confusion_matrix(y_test, predictions))
    print(classification_report(y_test, predictions))
    print()
Exemplo n.º 17
0
def metric_overall_outlier(scores, marks, title=None):
    """Metric global outlier results, i.e., precision/recall and AUC value.

    :param scores:
        List, summed scores of each widget(i).
    :param marks:
        List, outlier marks(i, j) of each widget(i) in each permission(j).
        The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier).
        If there is one outlier in the related permission, then the widget is outlier.
    :param title:
        String, file name used to save the plot, `None` means not to save.

    :return: None
    """
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    # get global outlier mark
    y_true = [1 if 1 in marks[i] else 0 for i in range(len(scores))]

    # compute precision, recall curve and auc value
    pk, rk = [], []
    for k in range(1, len(y_true)):
        y_predict = get_label_n(y_true, scores, k)
        pk.append(precision_score(y_true, y_predict))
        rk.append(recall_score(y_true, y_predict))
    auc = roc_auc_score(y_true, scores)

    # print top-k precision, recall, and AUC value
    k = sum(y_true)
    print('overall({}/{}), p/r: {}, AUC: {}'.format(
        k, len(y_true), round(pk[k - 1], 4), round(auc, 4)
    ))

    # save plot
    if title is not None:
        path_save = os.path.join('{}.pdf'.format(title))
        plot_precision_recall(
            'Overall', list(range(1, len(y_true))), pk, rk, path_save
        )
Exemplo n.º 18
0
def get_classification_metrics(ground_truth_labels, predicted_labels):
    classification_metric_dict = dict({})
    classification_metric_dict['accuracy'] = accuracy_score(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['precision'] = precision_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['recall'] = recall_score(ground_truth_labels,
                                                        predicted_labels,
                                                        average='weighted')
    classification_metric_dict['f1_score'] = f1_score(ground_truth_labels,
                                                      predicted_labels,
                                                      average='weighted')
    classification_metric_dict['brier_score_loss'] = brier_score_loss(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['matthews_corr_coef'] = matthews_corrcoef(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['jaccard_score'] = jaccard_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['cohen_kappa_score'] = cohen_kappa_score(
        ground_truth_labels, predicted_labels)

    return classification_metric_dict
Exemplo n.º 19
0
    test_documents, "section", class_map, len(ipc_sections))

print(
    "=============================== Predicting test data ==============================="
)
# Predicting the class for each word vector in the database
real = []
pred = []
for doc, ipc in test_embedding_generator:
    result = model.predict_one(doc)
    pred.append(class_map[result])  #adding the result to the predicted vector
    real.append(class_map[numpy.argmax(
        ipc)])  #Adding the real value to de real class vector

#Calculating the metric F1, Precision, Accuracy and Recall
accuracy = accuracy_score(real, pred)
recall = recall_score(real, pred, average='weighted')
precision = precision_score(real, pred, average='weighted')
f1 = f1_score(real, pred, average='weighted')
print("Accuracy " + str(accuracy), "Recall " + str(recall),
      "Precision " + str(precision), "F1 " + str(f1))
result_string += "Accuracy " + str(accuracy) + " Recall " + str(
    recall) + " Precision " + str(precision) + " F1 " + str(f1) + "\n"
f = open(result_file_name, "w")
f.write("Database: " + training_documents_collection)
f.write("embedding matrix: " + str(maxWords) + " " + str(embeddingSize))
f.write("epochs: " + str(epochs))
f.write("layers : " + str(layers))
f.write(result_string)
f.close()
    sklearn_mnb = sklearn_mnb.fit(X_train, y_train)
    sklearn_y_pred = sklearn_mnb.predict(X_test)

    assert (my_y_pred == sklearn_y_pred).all()

    ###### my defined fasttext
    train_data, test_data = train_test_split(processed_data[['label', 'item']],
                                             test_size=0.1,
                                             random_state=2020)

    fasttext = FastText(class_num=3, class_type='multi-class', ngram_range=2)
    fasttext.fit(train_data['item'], train_data['label'], epochs=5)
    y_pred = fasttext.predict(test_data['item'])
    y_true = fasttext.y_encoder.transform(test_data['label'])
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    macro_precision = precision_score(y_true, y_pred, average='macro')
    macro_recall = recall_score(y_true, y_pred, average='macro')

    ##### textCNN
    ## multi-class test
    train_data, test_data = train_test_split(
        processed_data[['subject', 'processed_item']],
        test_size=0.1,
        random_state=2020)

    text_cnn = TextCNN(class_num=4, class_type='multi-class')
    text_cnn.fit(train_data['processed_item'],
                 train_data['subject'],
                 validation_data=(test_data['processed_item'],
                                  test_data['subject']),
                 epochs=2)
        # exit()
        modelCheckpoint = ModelCheckpoint(parameters['modelCheckpointPath'] +
                                          'fold_' + str(i))
        kerasAdapter.fit(dataTrainGenerator,
                         epochs=epochs,
                         batch_size=len(dataTrainGenerator),
                         validationDataGenerator=dataTestGenerator,
                         validationSteps=len(dataTestGenerator),
                         callbacks=[modelCheckpoint, configSaver])
        result = kerasAdapter.predict(dataTestGenerator,
                                      batch_size=parameters['batchSize'])
        testClasses = classes[testIndex]
        metrics = dict()
        metrics['fscore'] = f1_score(testClasses, result, average='weighted')
        metrics['precision'] = precision_score(testClasses,
                                               result,
                                               average='weighted')
        metrics['recall'] = recall_score(testClasses,
                                         result,
                                         average='weighted')
        metrics['auc'] = roc_auc_score(testClasses, result, average='weighted')

        metrics['fscore_b'] = f1_score(testClasses, result)
        metrics['precision_b'] = precision_score(testClasses, result)
        metrics['recall_b'] = recall_score(testClasses, result)
        metrics['auc_b'] = roc_auc_score(testClasses, result)

        metrics['kappa'] = cohen_kappa_score(testClasses, result)
        metrics['accuracy'] = accuracy_score(testClasses, result)
        tn, fp, fn, metrics['tp_rate'] = confusion_matrix(testClasses,
                                                          result).ravel()
Exemplo n.º 22
0
    img_dir = 'C:/Users/Administrator/Desktop/Normal'
    for _ in os.listdir(img_dir):
        res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _))
        print(res)
        if res['desc'] == 'Not Blurry':
            preds.append(0)
        else:
            preds.append(1)

        gts.append(0)

    img_dir = 'C:/Users/Administrator/Desktop/Blur'
    for _ in os.listdir(img_dir):
        res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _))
        print(res)
        if res['desc'] == 'Not Blurry':
            preds.append(0)
        else:
            preds.append(1)

        gts.append(1)

    tok = time.time()
    print('FPS={}'.format(len(os.listdir(img_dir)) / (tok - tik)))

    print(confusion_matrix(gts, preds))
    print('Precision = %f' % precision_score(gts, preds))
    print('Recall = %f' % recall_score(gts, preds))
    print('Accuracy = %f' % accuracy_score(gts, preds))
Exemplo n.º 23
0
#     real.append(class_map[numpy.argmax(ipc)]) #Adding the real value to de real class vector

for doc in test_documents:
    result = model.predict_one(pickle.loads(doc['embedding']))
    pred.append(class_map[result]) #adding the result to the predicted vector
    real.append(doc['ipc_classes'][0][0])
    all_class.append(doc['ipc_classes'])

print(pred)
print(real)

#Calculating the metric F1, Precision, Accuracy and Recall
accuracy = accuracy_score(real, pred)
recall = recall_score(real, pred, average='weighted')
recall_per_class = recall_score(real, pred, average=None)
precision = precision_score(real, pred, average='weighted')
precision_per_class = precision_score(real, pred, average=None)
f1 = f1_score(real, pred, average='weighted')
f1_per_class = f1_score(real, pred, average=None)
results_per_class = dict()
for i in range(0, len(recall_per_class)):
    if not class_map[i] in results_per_class.keys():
        results_per_class[class_map[i]] = []
    results_per_class[class_map[i]].append(recall_per_class[i])
    results_per_class[class_map[i]].append(precision_per_class[i])
    results_per_class[class_map[i]].append(f1_per_class[i])


matrix = confusion_matrix(real, pred, labels=ipc_sections.sort())

#ploting
Exemplo n.º 24
0
    x_train = []
    for i in train:
        y_train.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_train.append(tmp)
        
    y_test = []
    x_test = []  
    for i in test:
        y_test.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_test.append(tmp)
        
    lr.fit(x_train, y_train)
    lrPredTest = lr.predict(x_test)
    lrPrecisionTest = precision_score(y_test, lrPredTest)
    lrRecallTest = recall_score(y_test, lrPredTest)
    lrF1Test = f1_score(y_test, lrPredTest)
    lrAvgPrecision += lrPrecisionTest
    lrAvgRecall += lrRecallTest
    lrAvgF1 += lrF1Test

print "log reg completed in ", time.time() - start, " s"
print "lr:\n Precision {}\n Recall {}\n F1 {}\n".format(lrAvgPrecision / 5, lrAvgRecall / 5, lrAvgF1 / 5)
  

start = time.time()
"""RANDOM FOREST"""
rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=5)

rfAvgPrecision = 0.0
def evaluate_results(result, target_def, n_shadow):
    attack_test_y = result['attack_test_y']
    attack_test_x = result['attack_test_x']
    preds = result['preds']
    target_y = result['target_y']
    target_x = result['target_x']
    labels = result['labels']

    #    INFORMATION ABOUT THE MODEL UNDER ATTACK
    x = PrettyTable(
        ['Model Definition', 'Training Accuracy', 'Testing Accuracy'])
    x.float_format = ".2"

    target_preds = np.argmax(attack_test_x, axis=1)
    train_acc = accuracy_score(target_y[attack_test_y == 1],
                               target_preds[attack_test_y == 1])
    test_acc = accuracy_score(target_y[attack_test_y == 0],
                              target_preds[attack_test_y == 0])

    x.add_row([
        target_def.split(os.path.dirname(os.getcwd()))[-1], train_acc * 100,
        test_acc * 100
    ])

    print(x.get_string(title='Target Model'))

    #    INFORMATION ABOUT THE OVERALL ATTACK EFFECTIVENESS
    cols = ['Num Shadow', 'Accuracy', 'Precision', 'Recall', 'F-1']
    x = PrettyTable(cols)
    x.float_format = ".2"

    p = np.argmax(preds, axis=1)
    x.add_row([
        n_shadow,
        accuracy_score(attack_test_y, p) * 100,
        precision_score(attack_test_y, p) * 100,
        recall_score(attack_test_y, p) * 100,
        f1_score(attack_test_y, p) * 100
    ])
    print(x.get_string(title='Attack Aggregate'))

    # noinspection PyShadowingNames
    def by_class_evaluation(attack_test_y,
                            target_y,
                            p,
                            attack_test_x,
                            labels=None):
        if labels is None:
            labels = np.unique(target_y)

        precisions = [
            precision_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        accuracies = [
            accuracy_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        f1_scores = [
            f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        recalls = [
            recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        c_train_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 1)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 1)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]
        c_test_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 0)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 0)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]

        x = PrettyTable()
        x.float_format = '.2'
        x.add_column("Class", labels)
        x.add_column('Target Accuracy Train', np.round(c_train_accs, 2))
        x.add_column('Target Accuracy Test', np.round(c_test_accs, 2))
        x.add_column("Attack Precision", np.round(precisions, 2))
        x.add_column("Attack Accuracy", np.round(accuracies, 2))
        x.add_column("Attack Recall", np.round(recalls, 2))
        x.add_column("Attack F-1 Score", np.round(f1_scores, 2))
        x.add_column(
            "Percentage of Data",
            np.round(
                np.array([
                    len(target_y[target_y == c]) / len(target_y) * 100
                    for c in np.unique(target_y)
                ]), 2))
        print(x.get_string(title='Per Class Evaluation'))

    by_class_evaluation(attack_test_y,
                        target_y,
                        p,
                        attack_test_x,
                        labels=labels)

    return {
        'attack_test_y': attack_test_y,
        'attack_test_x': attack_test_x,
        'preds': preds,
        'target_y': target_y,
        'target_x': target_x,
        'target_def': target_def,
        'n_shadow': n_shadow,
        'labels': labels
    }
dataframe = pd.read_csv("train.csv",
                             sep=',',header=0,names=column_names,index_col=0,usecols=[0,1,2,3,4,5,6,7,8,10,11] ,nrows =set_sizes[i])

Y = dataframe["Short_or_long"]
X = dataframe[["vendor_id","passenger_count","pickup_longitude","pickup_latitude","dropoff_longitude","dropoff_latitude"]]

X_train = X.head(int(set_sizes[i]*0.7))
X_test = X.tail(int(set_sizes[i]*0.3))

Y_train = Y.head(int(set_sizes[i]*0.7))
Y_test = Y.tail(int(set_sizes[i]*0.3))

h = .02  # step size in the mesh

logreg = linear_model.LogisticRegression(C=1e5)

# we create an instance of Neighbours Classifier and fit the data.
logreg.fit(X_train, Y_train)


pred = logreg.predict(X_test)

# The coefficients
#print('Coefficients: \n', clf.coef_)
# The mean squared error
t=accuracy_score(Y_test, pred)
tt=precision_score(Y_test, pred, average='weighted')
#print(Y_tester_targets.size)
#print(pred_test.size)
print('Accuracy score: %.2f' % t)
print('Precision: %.2f'% tt)
Exemplo n.º 27
0
classifier = SupervisedDBNClassification(hidden_layers_structure=[10, 8],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.01,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=5000,
                                         batch_size=32,
                                         activation_function='relu',
                                         dropout_p=0.2)

classifier.fit(X_train, Y_train)

# Test
Y_pred = classifier.predict(X_test)
a = accuracy_score(Y_test, Y_pred)
print('Done.\nAccuracy: %f' % a)
print('Done.\nPrecision: %f' % precision_score(Y_test, Y_pred))
print('Done.\nRecall: %f' % recall_score(Y_test, Y_pred))
print('Done.\nf1 score: %f' % f1_score(Y_test, Y_pred))
#print('Done.\nf1 score: %f' % classification_report(Y_test, Y_pred))
#print('Done.\nf1 score: %f' % confusion_matrix(Y_test, Y_pred))
cm1 = confusion_matrix(Y_test, Y_pred)
print(cm1)

total1 = sum(sum(cm1))
accuracy1 = (cm1[0, 0] + 1 + cm1[1, 1] + 1) / total1
print('Accuracy : ', accuracy1)

sensitivity1 = cm1[0, 0] / (cm1[0, 0] + cm1[0, 1])
print('Sensitivity : ', sensitivity1)

specificity1 = cm1[1, 1] / (cm1[1, 0] + cm1[1, 1])
Exemplo n.º 28
0
#test_encode = []
#cont = 0
#while cont < 10:
#    test_encode.append(dataset.letra[cont].split())
#    cont += 1

#teste = label_encoder.transform(test_encode[0])

#Rotinas para alimentar o OneHotEncoder
onehot = OneHotEncoder()

int_encoded_fit = int_encoded_fit.reshape(len(int_encoded_fit), 1)
int_encoded_pred = int_encoded_pred.reshape(len(int_encoded_pred), 1)

letra_fit = onehot.fit_transform(int_encoded_fit)
letra_pred = onehot.transform(int_encoded_pred)

#Utilização do SVM
clf.fit(letra_fit, label_train)

prediction = clf.predict(letra_pred)

print()
print("Recall {}".format(
    recall_score(label_test, prediction, average='weighted')))
print("Precision {}".format(
    precision_score(label_test, prediction, average='weighted')))
print("F1 {}".format(f1_score(label_test, prediction, average='weighted')))
print("Accuracy {}".format(accuracy_score(label_test, prediction)))
Exemplo n.º 29
0
def run():
    paras, sents = create_dataset()
    
    X = np.array(get_features(paras))
    Y = np.array(get_ys(paras))
    
    
    print len(X[0])
    
    sents = np.array(sents)
    
    skf = StratifiedKFold(Y, n_folds=10)
    
    f = open('results/correct.txt','w')
    f2 = open('results/wrong.txt','w')
    
    accs = []
    precs = []
    recs = []
    f1s = []
    
    for train_index, test_index in skf:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
    
        sent_train = sents[train_index]
        sent_test = sents[test_index]
        
#         cv = CountVectorizer(stop_words="english", ngram_range=(1,1), min_df = 5)
#         sent_train_counts = cv.fit_transform(sent_train)
#         
#         tf_transformer = TfidfTransformer(use_idf=True).fit(sent_train_counts)
#         sent_train_counts = tf_transformer.transform(sent_train_counts)
#         
#         sent_train_counts = sent_train_counts.toarray()
#         
#         print sent_train_counts.shape
#         print X_train.shape
# 
#         new_train = []
#         for i,j in zip(X_train, sent_train_counts):
#             new_train.append(np.append(i,j))
        
        #fs = SelectKBest(chi2, k=24)
        #X_train = fs.fit_transform(X_train, y_train)
        
        clf = LogisticRegression()
        
        clf.fit(X_train, y_train)
        
        print clf.coef_
        
#         
#         sent_test_counts = cv.transform(sent_test)
#         sent_test_counts = tf_transformer.transform(sent_test_counts)
#         
#         sent_test_counts = sent_test_counts.toarray()
#         
#         new_test = []
#         for i,j in zip(X_test, sent_test_counts):
#             new_test.append(np.append(i,j))
        
        #X_test = fs.transform(X_test)
        
        y_pred = clf.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        
        accs.append(acc)
        precs.append(prec)
        recs.append(rec)
        f1s.append(f1)
        
        print 'Acc \t %s' % acc
        print 'Prec \t %s' % prec
        print 'Recall \t %s' % rec
        print 'F1 \t %s' % f1
        
        for (index,test),(y_t, y_p) in zip(zip(test_index, X_test), zip(y_test, y_pred)):
            if y_t == y_p:
#                 if paras[index]['prev_para']:
#                     f.write('%s\n' % paras[index]['prev_para']['sents'])
                f.write('%s\n' % sents[index])
                f.write('%s\n' % (y_t))
            else:
#                 if paras[index]['prev_para']:
#                     f2.write('%s\n' % paras[index]['prev_para']['sents'])
                f2.write('%s\n' % sents[index])
                f2.write('%s\n' % (y_t))
        
    print 'Avg Acc \t %s \t ' % np.mean(accs)
    print 'Avg Prec \t %s' % np.mean(precs)
    print 'Avg Recall \t %s' % np.mean(recs)
    print 'Avg F1 \t %s' % np.mean(f1s)
Exemplo n.º 30
0
#data = data[np.isfinite(data['pred'])]
print(data)

#first
print("first:")
cnm = confusion_matrix(data['true'], data['pred'])
mat = np.matrix( [[cnm[1][1], cnm[0][1]] , [cnm[0][0], cnm[1][0]]])
print(mat, "\n")

# second 

print("second:")
acc = accuracy_score(data['true'], data['pred'])
print("accuracy: ",round(acc,2))

per = precision_score(data['true'], data['pred'])
print("percision: ",round(per,2))

rec = recall_score(data['true'], data['pred'])
print("recall: ", round(rec,2))

f_m = f1_score(data['true'], data['pred'])
print("f-metr: ", round(f_m,2), "\n")


#third
print("\nthird:")
from sklearn.metrics import roc_auc_score

data = pd.read_csv("D:/Sai/JavaDoc/Cousera/3/3_4/scores.csv")
Exemplo n.º 31
0
    y_train, y_test = classes[train_index], classes[test_index]

    # treino do modelo
    print(f'Gerando o Modelo {i}...')
    classifier = RandomForestClassifier(n_estimators=10,
                                        criterion='gini',
                                        random_state=iteracao).fit(
                                            x_train, y_train)

    # classificando o conjunto de teste
    y_pred = classifier.predict(x_test)

    # metricas de desempenho
    aux_accuracy += accuracy_score(y_test, y_pred)
    aux_f1_score += f1_score(y_test, y_pred)
    aux_precision += precision_score(y_test, y_pred)
    aux_recall += recall_score(y_test, y_pred)
    conf_matrices += np.asarray(confusion_matrix(y_test, y_pred))

    print(f'Modelo {i} finalizado e avaliado.')
    i += 1

# resultados
print(f'\nITERATION #{iteracao} -----------------------')
print(f'Accuracy = {aux_accuracy / k_fold.n_splits}')
print(f'F1 Score = {aux_f1_score / k_fold.n_splits}')
print(f'Precision = {aux_precision / k_fold.n_splits}')
print(f'Recall = {aux_recall / k_fold.n_splits}')
print(f'Examples x Attributes = {tf_idf.shape}')
print(f'Confusion Matrix = \n{np.array(list(conf_matrices))}')
classifier_models = [LogisticRegression(random_state=0), 
                     KNeighborsClassifier(n_neighbors=10, metric='minkowski', p=2), 
                     SVC(kernel='linear', random_state=0), SVC(kernel='rbf', random_state=0), 
                     GaussianNB(), DecisionTreeClassifier(criterion="entropy", random_state=0),
                     RandomForestClassifier(n_estimators=40, criterion="entropy", random_state=0)]

y_prdc_results = [model.fit(x_train, y_train).predict(x_test) for model in classifier_models]

# importing confusion matrix
from sklearn.metrics import confusion_matrix, classification

cm_results = [confusion_matrix(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))]

acc_results = [classification.accuracy_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))]
prec_results = [classification.precision_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))]
recal_results = [classification.recall_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))]
f1_results = [classification.f1_score(y_test, y_prdc_results[i]) for i in range(0, len(classifier_models))]
x_label = ["LR", "KNN", "SVC(L)", "SVC(NL)", "NB", "DT", "RF"]

#Visualization of Results
plt.ylim(0, max(acc_results))
plt.bar(x_label, acc_results)
plt.title("Histogram view of accuracy")
plt.xlabel("Classification Models")
plt.ylabel("Accuracy")
plt.show()

plt.ylim(0, max(prec_results))
plt.bar(x_label, prec_results)
plt.title("Histogram view of precision")
Exemplo n.º 33
0
labels, features = targetFeatureSplit(data)

### it's all yours from here forward!

features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.3, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)

numpos = 0
pred = clf.predict(features_test)
for i, item in enumerate(labels_test):
    print(i, item, pred[i])
    if item == 1 and item == pred[i]:
        numpos = numpos + 1

print("Number of true positives: ", numpos)
print("Precision: ", precision_score(labels_test, pred))
print("Recall: ", recall_score(labels_test, pred))

#print(len(labels_test))
#print(clf.score(features_test, labels_test))

predictions = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1]
true_labels = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0]

print("Number of true positives: ", numpos)
print("Precision: ", precision_score(true_labels, predictions))
print("Recall: ", recall_score(true_labels, predictions))