Пример #1
0
def compare_abod_to_svm():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    emb4 = load_file("embeddings_christian.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    clf = SVC(kernel='linear', probability=True)
    clf2 = ABOD()

    # train user and unknown class
    label_class = np.repeat(1, np.shape(emb1[0:100])[0])
    label_unknown = np.repeat(0, np.shape(emb_lfw)[0])
    training_embeddings = np.concatenate((emb1[0:100], emb_lfw))
    training_labels = np.concatenate((label_class, label_unknown))
    # train svm
    clf.fit(training_embeddings, training_labels)
    # train abod
    clf2.fit(emb1[0:100])

    # test on class
    prediction = clf.predict(emb2[0:100])
    errors = len(emb2[0:100]) - np.sum(prediction)
    print "Error rate: {}%".format(float(errors) / len(emb2[0:100]) * 100.0)

    # test on similar class
    prediction = clf.predict(emb4)
    errors = np.sum(prediction)
    print "Error rate: {}%".format(float(errors) / len(emb4) * 100.0)
Пример #2
0
def test_ABOD():

    clf = ABOD()

    emb1 = load_embeddings("embeddings_elias.pkl")
    emb2 = load_embeddings("embeddings_matthias.pkl")
    emb3 = load_embeddings("embeddings_matthias_big.pkl")
    emb4 = load_embeddings("embeddings_laia.pkl")
    emb5 = load_embeddings("embeddings_christian.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    clf.fit(emb2)

    # class_sample = emb3[100,:]
    # outlier_sample = emb1[30,:]

    # print class_sample

    start = time.time()
    abod_class = clf.predict_approx(emb3)
    print "time: ".format(time.time() - start)

    return
    abod_outliers = clf.predict(emb5)
    step = 0.0001
    start = 0.005
    stop = 0.6
    il = []
    ul = []
    x = np.arange(start, stop, step)
    for thresh in x:
        il.append(
            float(len(abod_class[abod_class < thresh])) / len(abod_class) *
            100.0)
        ul.append(
            float(len(abod_outliers[abod_outliers > thresh])) /
            len(abod_outliers) * 100.0)

    plt.plot(x, il, color='green', label="Inliers")
    plt.plot(x, ul, color='red', label="Outliers")
    plt.title("Classification Error")
    plt.xlabel("Threshold")
    plt.ylabel("Error [%]")
    plt.legend()
    plt.show()

    #
    thresh = 0.2
    print "error il: {}/{} : {}%".format(
        len(abod_class[abod_class < 0.2]), len(abod_class),
        float(len(abod_class[abod_class < 0.2])) / len(abod_class) * 100.0)
    print "error ul: {}/{} : {}%".format(
        len(abod_outliers[abod_outliers > 0.2]), len(abod_outliers),
        float(len(abod_outliers[abod_outliers > 0.2])) / len(abod_outliers) *
        100.0)
Пример #3
0
def test_against_threshold():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    # emb4 = load_file("embeddings_christian.pkl")
    emb4 = load_file("embeddings_christian_clean.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    # random.shuffle(emb1)
    random.shuffle(emb2)
    random.shuffle(emb4)
    # random.shuffle(emb4)

    train = emb1[0:50]
    test = emb2[0:50]
    ul = emb4[0:50]

    # ------ ABOD
    if True:
        print "----------------ABOD-----------------"
        clf = ABOD()
        clf.fit(train)
        pred_abod = clf.predict(ul)
        error_rate = float(len(pred_abod[pred_abod > 0])) / float(
            len(ul)) * 100
        print "Misdetections ABOD (ul): {} - {}%".format(
            len(pred_abod[pred_abod > 0]), error_rate)

        pred_abod = clf.predict(test)
        error_rate = float(len(pred_abod[pred_abod > 0])) / float(
            len(ul)) * 100
        print "Misdetections ABOD (test): {} - {}%".format(
            len(pred_abod[pred_abod < 0]), error_rate)

    # ------ THRESHOLDING
    print "--------------THRESHOLDING-------------------"
    t = BinaryThreshold()
    t.partial_fit(train)

    # test on outliers
    pred_thresh = t.predict(ul, True)
    error_rate = float(len(pred_thresh[pred_thresh > 0])) / float(
        len(pred_thresh)) * 100
    print "Misdetections Thresholding (ul): {}/{} - {}%".format(
        len(pred_thresh[pred_thresh > 0]), len(pred_thresh), error_rate)

    # print np.where(pred_thresh == False)[0]
    # print np.nonzero(pred_thresh == 0)[0]
    # pred_thresh = t.predict(test, True)

    # test on inliers
    pred_thresh = t.predict(test, True)
    print "Misdetections Thresholding (test): {}/{}".format(
        len(np.where(pred_thresh == False)[0]), len(pred_thresh))
Пример #4
0
def cascaded_classifiers():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    emb4 = load_file("embeddings_christian.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    clf = SVC(kernel='linear', probability=True, C=1)
    clf2 = ABOD()

    # random.shuffle(emb1)

    train = emb1[0:50]
    test = emb2
    ul = emb4

    # train user and unknown class
    label_class = np.repeat(1, np.shape(train)[0])
    label_unknown = np.repeat(0, np.shape(emb_lfw)[0])
    training_embeddings = np.concatenate((train, emb_lfw))
    training_labels = np.concatenate((label_class, label_unknown))
    clf.fit(training_embeddings, training_labels)
    clf2.fit(train)

    # --------------------- test on class
    prediction = clf.predict(test)
    errors = len(test) - np.sum(prediction)
    print "SVM Error rate: {}%".format(float(errors) / len(test) * 100.0)
    temp = test
    # filter samples classified as 'unknown'
    filtered = temp[prediction == 0]
    # eval on abod
    abod_values = clf2.predict(filtered)
    errors = abod_values[abod_values < 0]
    print "Total error (inliers classified as outliers): {}%".format(
        float((len(errors)) / float(len(test))))
    print "{}/{} additional inliers have been detected".format(
        len(abod_values[abod_values > 0]), len(filtered))

    # --------------------- test on outlier
    print "-------------testing on outliers----------------"
    prediction = clf.predict(ul)
    errors = np.sum(prediction)
    print "SVM Error rate: {}%".format(float(errors) / len(ul) * 100.0)
    temp = ul
    # filter samples classified as 'inliers'
    filtered = temp[prediction == 1]
    # eval on abod
    abod_values = clf2.predict(filtered)
    errors = abod_values[abod_values > 0]
    print "Total error (outliers not detected): {}%".format(
        float((len(errors)) / float(len(ul))))
    print "{}/{} additional outliers have been detected".format(
        len(abod_values[abod_values < 0]), len(filtered))
Пример #5
0
def eval_on_subspace():
    emb1 = load_embeddings("embeddings_matthias.pkl")
    emb2 = load_embeddings("embeddings_matthias_big.pkl")
    emb3 = load_embeddings("embeddings_laia.pkl")
    emb4 = load_embeddings("embeddings_christian.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    ref = emb1[0:40, :]
    test = emb1[40:60, :]
    ul = emb4[0:10, :]

    clf = ABOD()
    metric = 'euclidean'

    # extract 99.9% subspace
    # basis, mean = ExtractSubspace(ref, 0.9)
    basis, mean = ExtractInverseSubspace(ref, 0.7)

    print "--- reduced dimension to: {}".format(np.size(basis, 1))

    # before
    sep1 = pairwise_distances(ref, test, metric=metric)
    sep2 = pairwise_distances(ref, ul, metric=metric)

    m1 = np.mean(sep1, axis=0)
    m2 = np.mean(sep2, axis=0)

    print "Original Space:"
    print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format(
        sep1.max(), sep2.max())

    clf.fit(ref)
    clf.predict(test)
    clf.predict(ul)

    # ----------------------------------------------

    # project data onto subspace
    ref = ProjectOntoSubspace(ref, mean, basis)
    ul = ProjectOntoSubspace(ul, mean, basis)
    test = ProjectOntoSubspace(test, mean, basis)

    # compare
    sep1 = pairwise_distances(ref, test, metric=metric)
    sep2 = pairwise_distances(ref, ul, metric=metric)

    # meandist inliers
    print "------------------meandist to inliers-----------------------"
    print m1
    print np.mean(sep1, axis=0)
    print "Mean decrease (pos): ", m1 - np.mean(sep1, axis=0)
    print "-----------------------------------------"

    # meandist outliers
    print "------------------meandist to outliers-----------------------"
    print m2
    print np.mean(sep2, axis=0)
    print "Mean decrease (neg): ", m2 - np.mean(sep2, axis=0)

    clf.fit(ref)
    clf.predict(test)
    clf.predict(ul)

    print "Inlier Space:"
    print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format(
        sep1.max(), sep2.max())
Пример #6
0
def ROC(clf):

    # PARAMETERS
    nr_training_samples = 5
    nr_test_samples = 400

    save_csv = True
    combine_scenes = False

    # ---------------------------------------------

    emb0 = load_embeddings("embeddings_matthias.pkl")
    emb1 = load_embeddings("matthias_test.pkl")
    emb2 = load_embeddings("matthias_test2.pkl")
    emb3 = load_embeddings("embeddings_christian_clean.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    class_ds1 = emb1
    class_ds2 = emb2
    outlier_ds = emb_lfw

    # combine the two scene datasets
    if combine_scenes:
        num_samples_each = np.max([len(class_ds1), len(class_ds2)])
        class_ds_combined = np.concatenate(
            (class_ds1[0:num_samples_each], class_ds2[0:num_samples_each]))
    else:
        class_ds_combined = class_ds1
    # shuffle
    random.shuffle(class_ds_combined)

    # ---------------------------------------------

    # fit
    # clf = svm.OneClassSVM(kernel='linear')
    clf = ABOD()
    clf.fit(class_ds_combined[0:nr_training_samples])

    # true labels
    labels = np.concatenate(
        (np.repeat(1, nr_test_samples / 2), np.repeat(2, nr_test_samples / 2)))

    test_samples = np.concatenate(
        (emb2[0:nr_test_samples / 2], emb3[0:nr_test_samples / 2]))

    # scores which are thresholded
    scores = clf.decision_function(test_samples)

    fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1)
    auc_val = auc(fpr, tpr)

    # ---------------------------------------------

    print "AUC: {}".format(auc_val)
    print "tpr: ", tpr
    print "fpr: ", fpr
    print "thresholds: ", thresholds

    precision, recall, _ = precision_recall_curve(labels, scores, pos_label=1)
    # print "Precision: ", precision
    # print "Recall: ", recall

    plt.plot(recall, precision)

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()
Пример #7
0
def test_lmnn0():

    d0 = load_data('embeddings_matthias.pkl')
    d1 = load_data('embeddings_christian.pkl')

    d0_train = d0[0:30,:]
    d1_train = d1[0:30,:]
    d0_test = d0[30:40,:]
    d1_test = d1[30:40,:]

    # ---------- train
    labels_train = np.concatenate((np.repeat(0, len(d0_train)), np.repeat(1, len(d1_train))))
    data_train = np.concatenate((d0_train, d1_train))

    lmnn = LMNN(k=3, learn_rate=1e-6)
    start = time.time()
    lmnn.fit(data_train, labels_train)
    print "Fitting took {} seconds".format(time.time()-start)

    # ---------- test

    print "---- Evaluation in original space: Metric against Class 0"
    print "     Smaller valuer = better choice"
    cos_dist_orig00 = np.mean(pairwise_distances(d0_test, d0_train, metric='cosine'))
    cos_dist_orig01 = np.mean(pairwise_distances(d0_test, d1_train, metric='cosine'))
    cos_dist_orig10 = np.mean(pairwise_distances(d1_test, d0_train, metric='cosine'))
    cos_dist_orig11 = np.mean(pairwise_distances(d1_test, d1_train, metric='cosine'))
    print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01)
    print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11)

    print "---- Evaluation in learned space:"
    print "     Smaller valuer = better choice"
    cos_dist_orig00 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d0_train), metric='cosine'))
    cos_dist_orig01 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d1_train), metric='cosine'))
    cos_dist_orig10 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d0_train), metric='cosine'))
    cos_dist_orig11 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d1_train), metric='cosine'))
    print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01)
    print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11)


    print "===========================ABOD===================================="

    clf0_orig = ABOD()
    clf1_orig = ABOD()
    clf0_opt = ABOD()
    clf1_opt = ABOD()

    # fit classifiers
    clf0_orig.fit(d0_train)
    clf1_orig.fit(d1_train)
    clf0_opt.fit(lmnn.transform(d0_train))
    clf1_opt.fit(lmnn.transform(d1_train))

    # predict
    print "\n-----------ABOD values in original space:------------------\n\n"
    clf0_orig.predict(d0_test)
    clf0_orig.predict(d1_test)
    clf1_orig.predict(d0_test)
    clf1_orig.predict(d1_test)

    # predict
    print "\n-----------ABOD values in custom space:------------------\n\n"
    clf0_opt.predict(lmnn.transform(d0_test))
    clf0_opt.predict(lmnn.transform(d1_test))
    clf1_opt.predict(lmnn.transform(d0_test))
    clf1_opt.predict(lmnn.transform(d1_test))