示例#1
0
def FashionMNIST(base_dir):
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    print("Reading Fashion MNIST dataset...")
    start = time()

    train_X_path = base_dir + "X_train.npy"
    train_y_path = base_dir + "y_train.npy"
    test_X_path = base_dir + "X_test.npy"
    test_y_path = base_dir + "y_test.npy"

    if not os.path.exists(train_X_path):
        print("\tSaving train data")
        X_train, y_train = data.LoadFashionMNIST('train', base_dir + 'orig')
        new_idx = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[new_idx], y_train[new_idx]
        X_train_base = np.copy(X_train)
        y_train_base = np.copy(y_train)

        X_test, y_test = data.LoadFashionMNIST('test', base_dir + 'orig')
        X_test_base = np.copy(X_test)
        y_test_base = np.copy(y_test)

        np.save(train_X_path, X_train_base)
        np.save(train_y_path, y_train_base)
        np.save(test_X_path, X_test_base)
        np.save(test_y_path, y_test_base)
    else:
        print("\tReading train data")
        X_train = np.load(train_X_path)
        y_train = np.load(train_y_path)
        X_test = np.load(test_X_path)
        y_test = np.load(test_y_path)
    print("\ttime ", time() - start)

    projection_size = 60000
    X_proj = np.copy(X_train[:projection_size])
    new_shape = (X_proj.shape[0], X_proj.shape[1] * X_proj.shape[2])
    X_proj = np.reshape(X_proj, new_shape)

    tsne_proj_path = base_dir + "tsne_proj.npy"
    if not os.path.exists(tsne_proj_path):
        # Uses t-SNE to project projection_size points from the dataset
        print("\tt-SNE projecting Fashion MNIST dataset...")
        start = time()
        # tsne = manifold.TSNE(n_components=2, perplexity=35.0)
        # tsne = TSNE(n_components=2, perplexity=35.0, n_jobs=4)
        tsne = TSNE(n_components=2,
                    random_state=420,
                    perplexity=10.0,
                    n_iter=1000,
                    n_iter_without_progress=300,
                    n_jobs=4)
        proj_tsne = tsne.fit_transform(X_proj)
        print("\ttime ", time() - start)
        proj_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
        proj_tsne = proj_scaler.fit_transform(proj_tsne)
        np.save(tsne_proj_path, proj_tsne)
    else:
        print("\tReading projection")
        proj_tsne = np.load(tsne_proj_path)

    umap_proj_path = base_dir + "umap_proj.npy"
    if not os.path.exists(umap_proj_path):
        print("\tUMAP projecting Fashion MNIST dataset...")
        start = time()
        proj_umap = UMAP(n_components=2,
                         n_neighbors=10,
                         min_dist=0.5,
                         random_state=420).fit_transform(X_proj)

        print("\ttime ", time() - start)
        proj_scaler2 = preprocessing.MinMaxScaler(feature_range=(0, 1))
        proj_umap = proj_scaler2.fit_transform(proj_umap)
        np.save(umap_proj_path, proj_umap)
    else:
        proj_umap = np.load(umap_proj_path)

    train_y_proj_path = base_dir + "y_proj_true.npy"
    if not os.path.exists(train_y_proj_path):
        np.save(train_y_proj_path, y_train_base[:projection_size])

    subset_size = 20000

    print("ILAMP - tSNE")
    start = time()
    ilamp_tsne_path = base_dir + "ilamp_tsne.joblib"
    if not os.path.exists(ilamp_tsne_path):
        k_ilamp = 20
        ilamp_tsne = ILAMP(n_neighbors=k_ilamp)
        ilamp_tsne.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        ilamp_tsne.save(ilamp_tsne_path)
    else:
        ilamp_tsne = ILAMP()
        ilamp_tsne.load(ilamp_tsne_path)
    print("\ttime ", time() - start)

    print("ILAMP - UMAP")
    start = time()
    ilamp_umap_path = base_dir + "ilamp_umap.joblib"
    if not os.path.exists(ilamp_umap_path):
        k_ilamp = 25
        ilamp_umap = ILAMP(n_neighbors=k_ilamp)
        ilamp_umap.fit(X_proj[:subset_size], proj_umap[:subset_size])
        ilamp_umap.save(ilamp_umap_path)
    # else:
    #     ilamp_umap = ILAMP()
    #     ilamp_umap.load(ilamp_umap_path)
    print("\ttime ", time() - start)

    print("RBFInv - Control Points tSNE")
    start = time()
    irbfcp_tsne_path = base_dir + "irbfcp_tsne.joblib"
    if not os.path.exists(irbfcp_tsne_path):
        EPS = 50000
        irbfcp_tsne = RBFInv(num_ctrl=200,
                             mode='rols',
                             kernel='gaussian',
                             eps=EPS,
                             normalize_c=True,
                             normalize_d=True)
        irbfcp_tsne.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbfcp_tsne.save(irbfcp_tsne_path)
    # else:
    #     irbf_cp = joblib.load((irbfcp_path)
    print("\ttime ", time() - start)

    print("RBFInv - Control Points UMAP")
    start = time()
    irbfcp_umap_path = base_dir + "irbfcp_umap.joblib"
    if not os.path.exists(irbfcp_umap_path):
        EPS = 50000
        irbfcp_umap = RBFInv(num_ctrl=200,
                             mode='rols',
                             kernel='gaussian',
                             eps=EPS,
                             normalize_c=True,
                             normalize_d=True)
        irbfcp_umap.fit(X_proj[:subset_size], proj_umap[:subset_size])
        irbfcp_umap.save(irbfcp_umap_path)
    # else:
    #     irbf_cp = joblib.load((irbfcp_path)
    print("\ttime ", time() - start)

    # irbfn_tsne_path = base_dir + "irbfn_tsne.joblib"
    # print("RBFInv - Neighbors tSNE")
    # start = time()
    # if not os.path.exists(irbfn_tsne_path):
    #     EPS = 5000000
    #     irbfn_tsne = RBFInv(num_ctrl=20, mode='neighbors', eps=EPS,
    #                         kernel='gaussian', normalize_c=True,
    #                         normalize_d=True)
    #     irbfn_tsne.fit(X_proj[:subset_size], proj_tsne[:subset_size])
    #     irbfn_tsne.save(irbfn_tsne_path)
    #     # joblib.dump(irbf_neighbors, irbfn_path)
    # # else:
    # #     irbf_neighbors = joblib.load(irbfn_path)
    # print("\ttime ", time() - start)

    print("RBFInv - Cluster tSNE")
    start = time()
    irbfc_tsne_path = base_dir + "irbfc_tsne.joblib"
    if not os.path.exists(irbfc_tsne_path):
        EPS = 50000
        irbfc_tsne = RBFInv(num_ctrl=50,
                            mode='cluster',
                            eps=EPS,
                            kernel='gaussian',
                            normalize_c=True,
                            normalize_d=True)
        irbfc_tsne.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbfc_tsne.save(irbfc_tsne_path)
        # joblib.dump(irbf_cluster, irbfc_path)
    # else:
    #     irbf_cluster = joblib.load(irbfc_path)
    print("\ttime ", time() - start)

    print("RBFInv - Cluster UMAP")
    start = time()
    irbfc_umap_path = base_dir + "irbfc_umap.joblib"
    if not os.path.exists(irbfc_umap_path):
        EPS = 50000
        irbfc_umap = RBFInv(num_ctrl=50,
                            mode='cluster',
                            eps=EPS,
                            kernel='gaussian',
                            normalize_c=True,
                            normalize_d=True)
        irbfc_umap.fit(X_proj[:subset_size], proj_umap[:subset_size])
        irbfc_umap.save(irbfc_umap_path)
        # joblib.dump(irbf_cluster, irbfc_path)
    # else:
    #     irbf_cluster = joblib.load(irbfc_path)
    print("\ttime ", time() - start)

    print("NNInv tSNE")
    start = time()
    nninv_tsne_path = base_dir + "nninv_tsne.joblib"
    if not os.path.exists(nninv_tsne_path):
        nninv_tsne = NNInv()
        nninv_tsne.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        nninv_tsne.save(nninv_tsne_path, base_dir + 'nninv_tsne_keras.hdf5')
    # else:
    #     nninv = NNInv()
    #     nninv.load(nninv_path)
    print("\ttime ", time() - start)

    print("NNInv UMAP")
    start = time()
    nninv_umap_path = base_dir + "nninv_umap.joblib"
    if not os.path.exists(nninv_umap_path):
        nninv_umap = NNInv()
        nninv_umap.fit(X_proj[:subset_size], proj_umap[:subset_size])
        nninv_umap.save(nninv_umap_path, base_dir + 'nninv_umap_keras.hdf5')
    # else:
    #     nninv = NNInv()
    #     nninv.load(nninv_path)
    print("\ttime ", time() - start)

    print("Training classifier CNN...")
    start = time()
    clf_path = base_dir + "fm_cnn.hdf5"
    pred_path = base_dir + "y_pred_clf.npy"
    if not os.path.exists(clf_path):
        input_shape = (X_train.shape[1], X_train.shape[2], 1)
        X_train = X_train.reshape((X_train.shape[0], ) + input_shape)
        X_test = X_test.reshape((X_test.shape[0], ) + input_shape)
        y_train = keras.utils.to_categorical(y_train, 10)
        y_test = keras.utils.to_categorical(y_test, 10)

        X_proj = X_proj.reshape((X_proj.shape[0], ) + input_shape)

        clf1 = CNN_FM()
        clf1.fit(X_train,
                 y_train,
                 batch_size=64,
                 epochs=10,
                 verbose=1,
                 validation_data=(X_test, y_test))
        accuracy = clf1.evaluate(X_test, y_test, verbose=0)
        print("\tAccuracy on test data: ", accuracy)
        clf1.save(clf_path)
        y_proj_pred = np.argmax(clf1.predict(X_proj), axis=1)
        np.save(pred_path, y_proj_pred)
    # else:
    #     clf1 = keras.models.load_model(clf_path)
    #     input_shape = (X_train.shape[1], X_train.shape[2], 1)
    #     X_train = X_train.reshape((X_train.shape[0],) + input_shape)
    #     X_test = X_test.reshape((X_test.shape[0],) + input_shape)
    #     y_train = keras.utils.to_categorical(y_train, 10)
    #     y_test = keras.utils.to_categorical(y_test, 10)
    #     X_proj = X_proj.reshape((X_proj.shape[0],) + input_shape)
    #     accuracy = clf1.evaluate(X_test, y_test, verbose=0)
    #     print("\tAccuracy on test data: ", accuracy)
    #     y_proj_pred = np.argmax(clf1.predict(X_proj), axis=1)
    print("\ttime ", time() - start)

    print("Saving data for Fashion MNIST...")
    start = time()

    clfs = [clf_path]
    inv_projs = [
        ilamp_tsne_path, irbfcp_tsne_path, irbfc_tsne_path, nninv_tsne_path,
        ilamp_umap_path, irbfcp_umap_path, irbfc_umap_path, nninv_umap_path
    ]
    preds = [pred_path]

    data_json = {
        'X_train': train_X_path,
        'y_train': train_y_path,
        'X_test': test_X_path,
        'y_test': test_y_path,
        'projs': [tsne_proj_path, umap_proj_path],
        'inv_projs': inv_projs,
        'y_preds': preds,
        'y_true': train_y_proj_path,
        'clfs': clfs
    }

    with open(base_dir + "fm.json", 'w') as outfile:
        json.dump(data_json, outfile)
    print("\tFinished saving data...", time() - start)
示例#2
0
def Cifar10(base_dir='data/cifar10/'):
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    print("Reading Cifar-10 dataset...")
    start = time()

    train_X_path = base_dir + "X_train.npy"
    train_y_path = base_dir + "y_train.npy"
    test_X_path = base_dir + "X_test.npy"
    test_y_path = base_dir + "y_test.npy"
    if not os.path.exists(train_X_path):
        X_train, y_train, X_test, y_test = data.LoadCifar10(base_dir)
        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        # TODO: normalize to [0, 1] - simpler to normalize inv_proj and to
        # adversarial attacks
        mean = np.mean(X_train, axis=(0, 1, 2, 3))
        std = np.std(X_train, axis=(0, 1, 2, 3))
        X_train = (X_train - mean) / (std + 1e-7)
        X_test = (X_test - mean) / (std + 1e-7)

        new_idx = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[new_idx], y_train[new_idx]
        X_train_base = np.copy(X_train)
        y_train_base = np.copy(y_train)

        X_test_base = np.copy(X_test)
        y_test_base = np.copy(y_test)
        np.save(train_X_path, X_train_base)
        np.save(train_y_path, y_train_base)
        np.save(test_X_path, X_test_base)
        np.save(test_y_path, y_test_base)
    else:
        X_train = np.load(train_X_path)
        y_train = np.load(train_y_path)
        X_test = np.load(test_X_path)
        y_test = np.load(test_y_path)
    print("\ttime ", time() - start)

    projection_size = X_train.shape[0]

    X_proj = np.copy(X_train[:projection_size])
    N, H, W, C = X_proj.shape
    new_shape = (N, H * W * C)
    X_proj = np.reshape(X_proj, new_shape)

    print("t-SNE projecting Cifar-10 dataset...")
    start = time()

    proj_path = base_dir + "tsne_proj.npy"
    if not os.path.exists(proj_path):
        # tsne = manifold.TSNE(n_components=2, perplexity=40.0)
        tsne = TSNE(n_components=2, perplexity=40.0, n_jobs=8)
        proj_tsne = tsne.fit_transform(X_proj)
        proj_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
        proj_tsne = proj_scaler.fit_transform(proj_tsne)
        np.save(proj_path, proj_tsne)
    else:
        proj_tsne = np.load(proj_path)
    print("\ttime ", time() - start)

    subset_size = 15000
    print("ILAMP")
    start = time()
    ilamp_path = base_dir + "ilamp.joblib"
    if not os.path.exists(ilamp_path):
        k_ilamp = 30
        ilamp = ILAMP(n_neighbors=k_ilamp)
        ilamp.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        ilamp.save(ilamp_path)
        # joblib.dump(ilamp, ilamp_path)
    # else:
    #     ilamp = joblib.load(ilamp_path)
    print("\ttime ", time() - start)

    print("RBFInv - Control Points")
    start = time()
    irbfcp_path = base_dir + "irbf_cp.joblib"
    if not os.path.exists(irbfcp_path):
        EPS = 50000
        irbf_cp = RBFInv(num_ctrl=200,
                         mode='rols',
                         kernel='gaussian',
                         eps=EPS,
                         normalize_c=True,
                         normalize_d=True)
        irbf_cp.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_cp.save(irbfcp_path)
        # joblib.dump(irbf_cp, irbfcp_path)
    # else:
    #     irbf_cp = joblib.load(irbfcp_path)
    print("\ttime ", time() - start)

    print("RBFInv - Neighbors")
    start = time()
    irbfn_path = base_dir + "irbf_neighbors.joblib"
    if not os.path.exists(irbfn_path):
        EPS = 5000000
        irbf_neighbors = RBFInv(num_ctrl=20,
                                mode='neighbors',
                                eps=EPS,
                                kernel='gaussian',
                                normalize_c=True,
                                normalize_d=True)
        irbf_neighbors.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_neighbors.save(irbfn_path)
        # joblib.dump(irbf_neighbors, irbfn_path)
    # else:
    #     irbf_neighbors = joblib.load(irbfn_path)
    print("\ttime ", time() - start)

    print("RBFInv - Cluster")
    start = time()
    irbfc_path = base_dir + "irbf_cluster.joblib"
    if not os.path.exists(irbfc_path):
        EPS = 50000
        irbf_cluster = RBFInv(num_ctrl=50,
                              mode='cluster',
                              eps=EPS,
                              kernel='gaussian',
                              normalize_c=True,
                              normalize_d=True)
        irbf_cluster.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_cluster.save(irbfc_path)
        # joblib.dump(irbf_cluster, irbfc_path)
    # else:
    #     irbf_cluster = joblib.load(irbfc_path)
    print("\ttime ", time() - start)

    print("NNInv")
    start = time()
    nninv_path = base_dir + "nninv.joblib"
    if not os.path.exists(nninv_path):
        nninv = NNInv()
        nninv.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        nninv.save(nninv_path, base_dir + 'nninv_keras.hdf5')
    # else:
    #     nninv = NNInv()
    #     nninv.load(nninv_path)
    print("\ttime ", time() - start)

    print("Training classifier CNN...")
    start = time()
    clf_path = base_dir + "cifar_cnn.hdf5"
    pred_path = base_dir + "y_pred_clf.npy"
    X_proj = X_proj.reshape((X_proj.shape[0], ) + (H, W, C))
    input_shape = (H, W, C)
    X_train = X_train.reshape((X_train.shape[0], ) + input_shape)
    X_test = X_test.reshape((X_test.shape[0], ) + input_shape)
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)
    if not os.path.exists(clf_path):
        datagen = ImageDataGenerator(rotation_range=15,
                                     width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     horizontal_flip=True)
        datagen.fit(X_train)
        clf = CNN_Cifar()
        batch_size = 64
        steps = X_train.shape[0] // batch_size
        clf.fit_generator(datagen.flow(X_train, y_train,
                                       batch_size=batch_size),
                          steps_per_epoch=steps,
                          epochs=125,
                          verbose=1,
                          validation_data=(X_test, y_test),
                          callbacks=[LearningRateScheduler(lr_schedule)])
        accuracy = clf.evaluate(X_test, y_test, verbose=0)
        y_proj_pred = np.argmax(clf.predict(X_proj), axis=1)
        np.save(pred_path, y_proj_pred)
        print("\tAccuracy on test data: ", accuracy)
        clf.save(clf_path)
    # else:
    #     clf = keras.models.load_model(clf_path)
    #     accuracy = clf.evaluate(X_test, y_test, verbose=0)
    #     y_proj_pred = np.argmax(clf.predict(X_proj), axis=1)
    print("\ttime ", time() - start)

    print("Saving data for Cifar-10...")
    start = time()

    clfs = [clf_path]
    inv_projs = [ilamp_path, irbfcp_path, irbfn_path, irbfc_path, nninv_path]
    preds = [pred_path]

    data_json = {
        'X_train': train_X_path,
        'y_train': train_y_path,
        'X_test': test_X_path,
        'y_test': test_y_path,
        'projs': [proj_path],
        'inv_projs': inv_projs,
        'y_preds': preds,
        'clfs': clfs
    }

    with open(base_dir + "cifar.json", 'w') as outfile:
        json.dump(data_json, outfile)
    print("\tFinished saving data...", time() - start)
示例#3
0
tsne_proj = tsne.fit_transform(X_proj)
tsne_proj = scaler.fit_transform(tsne_proj)
print("\ttime: ", time.time() - s)

print("UMAP Projection")
s = time.time()
umap_proj = UMAP(n_components=2, random_state=420, n_neighbors=5,
                 min_dist=0.3).fit_transform(X_proj)
umap_proj = scaler.fit_transform(umap_proj)
print("\ttime: ", time.time() - s)

subset_size = 4659
print("\n\nILAMP tSNE")
s = time.time()
k_ilamp = 20
ilamp_tsne = ILAMP(n_neighbors=k_ilamp)
ilamp_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size])
ilamp_tsne_path = base_dir + "ilamp_tsne.joblib"
ilamp_tsne.save(ilamp_tsne_path)
print("\ttime: ", time.time() - s)

print("\n\nILAMP UMAP")
s = time.time()
ilamp_umap = ILAMP(n_neighbors=k_ilamp)
ilamp_umap.fit(X_proj[:subset_size], umap_proj[:subset_size])
ilamp_umap_path = base_dir + "ilamp_umap.joblib"
ilamp_umap.save(ilamp_umap_path)
print("\ttime: ", time.time() - s)

# print("\n\nNNInv TSNE")
# s = time.time()
示例#4
0
def MNIST(base_dir):
    print("Reading MNIST dataset...")
    start = time()

    train_X_path = base_dir + "X_train.npy"
    train_y_path = base_dir + "y_train.npy"
    test_X_path = base_dir + "X_test.npy"
    test_y_path = base_dir + "y_test.npy"
    if not os.path.exists(train_X_path):
        X_train, y_train = data.LoadMNISTData('train', base_dir + 'orig/')
        new_idx = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[new_idx], y_train[new_idx]
        X_train_base = np.copy(X_train)
        y_train_base = np.copy(y_train)

        X_test, y_test = data.LoadMNISTData('test', base_dir + 'orig/')
        X_test_base = np.copy(X_test)
        y_test_base = np.copy(y_test)

        np.save(train_X_path, X_train_base)
        np.save(train_y_path, y_train_base)
        np.save(test_X_path, X_test_base)
        np.save(test_y_path, y_test_base)
    else:
        X_train = np.load(train_X_path)
        y_train = np.load(train_y_path)
        X_test = np.load(test_X_path)
        y_test = np.load(test_y_path)

    print("\tFinished reading dataset...", time() - start)

    projection_size = 60000
    X_proj = np.copy(X_train[:projection_size])
    new_shape = (X_proj.shape[0], X_proj.shape[1] * X_proj.shape[2])
    X_proj = np.reshape(X_proj, new_shape)

    proj_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    # Uses LAMP to project projection_size points from the dataset
    # print("LAMP projecting MNIST dataset...")
    # start = time()
    # proj_path1 = base_dir + "lamp_proj.npy"
    # if not os.path.exists(proj_path1):
    #     proj_lamp = lamp.lamp2d(X_proj, 150, 10.0)
    #     proj_lamp = proj_scaler.fit_transform(proj_lamp)
    #     np.save(proj_path1, proj_lamp)
    # else:
    #     proj_lamp = np.load(proj_path1)
    # print("\tFinished projecting...", time() - start)

    # Uses t-SNE to project projection_size points from the dataset
    print("tSNE projection")
    start = time()
    proj_path2 = base_dir + "tsne_proj.npy"
    if not os.path.exists(proj_path2):
        # tsne = manifold.TSNE(n_components=2, perplexity=20.0)
        tsne = TSNE(n_components=2, perplexity=20.0, n_jobs=8)
        proj_tsne = tsne.fit_transform(X_proj)
        proj_tsne = proj_scaler.fit_transform(proj_tsne)

        train_y_proj_path = base_dir + "y_proj_true.npy"
        np.save(train_y_proj_path, y_train_base[:projection_size])
        np.save(proj_path2, proj_tsne)
    else:
        proj_tsne = np.load(proj_path2)
    print("\tProjection finished: ", time() - start)

    subset_size = 15000
    print("ILAMP")
    start = time()
    ilamp_path = base_dir + "ilamp.joblib"
    if not os.path.exists(ilamp_path):
        k_ilamp = 20
        ilamp = ILAMP(n_neighbors=k_ilamp)
        ilamp.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        ilamp.save(ilamp_path)
    # else:
    #     ilamp = joblib.load(ilamp_path)
    print("\ttime ", time() - start)

    print("RBFInv - Control Points")
    start = time()
    irbfcp_path = base_dir + "irbf_cp.joblib"
    if not os.path.exists(irbfcp_path):
        EPS = 50000
        irbf_cp = RBFInv(num_ctrl=400,
                         mode='rols',
                         kernel='gaussian',
                         eps=EPS,
                         normalize_c=True,
                         normalize_d=True)
        irbf_cp.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_cp.save(irbfcp_path)
        # joblib.dump(irbf_cp, irbfcp_path)
    # else:
    #     irbf_cp = joblib.load(irbfcp_path)
    print("\ttime ", time() - start)

    print("RBFInv - Neighbors")
    start = time()
    irbfn_path = base_dir + "irbf_neighbors.joblib"
    if not os.path.exists(irbfn_path):
        EPS = 5000000
        irbf_neighbors = RBFInv(num_ctrl=20,
                                mode='neighbors',
                                kernel='gaussian',
                                eps=EPS,
                                normalize_c=True,
                                normalize_d=True)
        irbf_neighbors.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_neighbors.save(irbfn_path)
        # joblib.dump(irbf_neighbors, irbfn_path)
    # else:
    #     irbf_neighbors = joblib.load(irbfn_path)
    print("\ttime ", time() - start)

    print("RBFInv - Cluster")
    start = time()
    irbfc_path = base_dir + "irbf_cluster.joblib"
    if not os.path.exists(irbfc_path):
        EPS = 50000
        irbf_cluster = RBFInv(num_ctrl=50,
                              mode='cluster',
                              kernel='gaussian',
                              eps=EPS,
                              normalize_c=True,
                              normalize_d=True)
        irbf_cluster.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        irbf_cluster.save(irbfc_path)
        # joblib.dump(irbf_cluster, irbfc_path)
    # else:
    #     irbf_cluster = joblib.load(irbfc_path)
    print("\ttime ", time() - start)

    print("NNInv")
    start = time()
    nninv_path = base_dir + "nninv.joblib"
    if not os.path.exists(nninv_path):
        nninv = NNInv()
        nninv.fit(X_proj[:subset_size], proj_tsne[:subset_size])
        nninv.save(nninv_path, base_dir + 'nninv_keras.hdf5')
        # joblib.dump(nninv, nninv_path)
    # else:
    #     nninv = NNInv()
    #     nninv.load(nninv_path)
    print("\ttime ", time() - start)

    input_shape = (X_train.shape[1], X_train.shape[2], 1)
    X_train = X_train.reshape((X_train.shape[0], ) + input_shape)
    X_test = X_test.reshape((X_test.shape[0], ) + input_shape)
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)

    X_proj = X_proj.reshape((X_proj.shape[0], ) + input_shape)

    clf1_path = base_dir + "mnist_cnn1.hdf5"
    pred_path1 = base_dir + "y_pred_clf1.npy"
    # pred_path2 = base_dir + "y_pred_clf2.npy"
    # pred_path3 = base_dir + "y_pred_clf3.npy"
    # pred_path4 = base_dir + "y_pred_clf4.npy"
    # pred_path5 = base_dir + "y_pred_clf5.npy"
    if not os.path.exists(clf1_path):
        print("Training classifier CNN...")
        start = time()
        clf1 = CNNModel(input_shape, 10)
        clf1.fit(X_train,
                 y_train,
                 batch_size=128,
                 epochs=14,
                 verbose=1,
                 validation_data=(X_test, y_test))
        print("\tAccuracy on test data: ",
              clf1.evaluate(X_test, y_test, verbose=0))
        print("\tFinished training classifier...", time() - start)
        clf1.save(base_dir + "mnist_cnn1.hdf5")
        y_proj_pred1 = np.argmax(clf1.predict(X_proj), axis=1)
        np.save(pred_path1, y_proj_pred1)

        # print("Training classifier CNN 2...")
        # start = time()
        # clf2 = CNNModel2(input_shape, 10)

        # print("\tEpoch 1:")
        # clf2.fit(X_train, y_train, batch_size=128, epochs=1, verbose=1,
        #          validation_data=(X_test, y_test))
        # clf2.save(base_dir + "mnist_cnn2_1e.hdf5")
        # y_proj_pred2 = np.argmax(clf2.predict(X_proj), axis=1)
        # print("\tAccuracy on test data: ", clf2.evaluate(X_test, y_test, verbose=0))

        # print("\tEpoch 5:")
        # clf2.fit(X_train, y_train, batch_size=128, epochs=4, verbose=1,
        #          validation_data=(X_test, y_test))
        # clf2.save(base_dir + "mnist_cnn2_5e.hdf5")
        # y_proj_pred3 = np.argmax(clf2.predict(X_proj), axis=1)
        # print("\tAccuracy on test data: ", clf2.evaluate(X_test, y_test, verbose=0))

        # print("\tEpoch 10:")
        # clf2.fit(X_train, y_train, batch_size=128, epochs=5, verbose=1,
        #          validation_data=(X_test, y_test))
        # clf2.save(base_dir + "mnist_cnn2_10e.hdf5")
        # y_proj_pred4 = np.argmax(clf2.predict(X_proj), axis=1)
        # print("\tAccuracy on test data: ", clf2.evaluate(X_test, y_test, verbose=0))

        # print("\tEpoch 50:")
        # clf2.fit(X_train, y_train, batch_size=128, epochs=40, verbose=1,
        #          validation_data=(X_test, y_test))
        # clf2.save(base_dir + "mnist_cnn2_50e.hdf5")
        # y_proj_pred5 = np.argmax(clf2.predict(X_proj), axis=1)
        # print("\tAccuracy on test data: ", clf2.evaluate(X_test, y_test, verbose=0))

        # np.save(pred_path2, y_proj_pred2)
        # np.save(pred_path3, y_proj_pred3)
        # np.save(pred_path4, y_proj_pred4)
        # np.save(pred_path5, y_proj_pred5)

        print("\tFinished training classifier...", time() - start)
    # else:
    #     # TODO: load clf, predict and save y

    print("Saving data for MNIST...")
    clfs = [base_dir + "mnist_cnn1.hdf5"]
    # base_dir + "mnist_cnn2_1e.hdf5",
    # base_dir + "mnist_cnn2_5e.hdf5",
    # base_dir + "mnist_cnn2_10e.hdf5",
    # base_dir + "mnist_cnn2_50e.hdf5"]

    inv_projs = [ilamp_path, irbfcp_path, irbfn_path, irbfc_path, nninv_path]
    # preds = [pred_path1, pred_path2, pred_path3, pred_path4, pred_path5]
    preds = [pred_path1]

    data_json = {
        'X_train': train_X_path,
        'y_train': train_y_path,
        'X_test': test_X_path,
        'y_test': test_y_path,
        # 'proj1'    : proj_path1,
        # 'proj2'    : proj_path2,
        'projs': [proj_path2],
        'inv_projs': inv_projs,
        'y_preds': preds,
        'y_true': train_y_proj_path,
        'clfs': clfs
    }

    with open(base_dir + "mnist.json", 'w') as outfile:
        json.dump(data_json, outfile)

    print("\tFinished saving data...", time() - start)
示例#5
0
def distnd_adv(X, X_proj, clf, grid_size, inv_proj=None):
    if inv_proj is None:
        inv_proj = ILAMP()
        inv_proj.fit(X, X_proj)

    cells_orig = build_grid(X_proj, grid_size)

    num_features = X.shape[1]
    # list of samples generated by inverse projection
    invproj_samples = []
    # 2D points used to create back projection
    syn_proj = []

    import time
    print("generating inverse projection samples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            if len(cells_orig[row][col]) > 0:
                continue

            coords = np.array([(col + 0.5) / grid_size,
                               (row + 0.5) / grid_size])
            sample = inv_proj.transform([coords], normalize=True)[0]
            invproj_samples.append(sample)
            syn_proj.append(coords)
    print("\ttime: ", time.time() - s)

    invproj_samples = np.array(invproj_samples)
    syn_proj = np.array(syn_proj)

    num_syn = invproj_samples.shape[0]
    num_orig = X.shape[0]
    num_total = num_orig + num_syn
    X_all = np.zeros((num_total, num_features))
    X_all[:num_orig] = X
    X_all[num_orig:] = invproj_samples

    X_proj_all = np.zeros((num_total, 2))
    X_proj_all[:num_orig] = X_proj
    X_proj_all[num_orig:] = syn_proj
    cells = build_grid(X_proj_all, grid_size)

    print("predicting all samples")
    s = time.time()
    y_all = clf.Predict(X_all)
    print("\ttime: ", time.time() - s)

    # foolbox model
    # TODO: compute bounds from X_min and X_max
    # TODO: make CLF class compute the adversarial model
    model = foolbox.models.KerasModel(clf.clf, bounds=(0.0, 1.0))
    attack = foolbox.attacks.FGSM(model)
    # attack_fallback = foolbox.attacks.BoundaryAttack(model)

    print("constructing annoy structure")
    s = time.time()
    t = annoy.AnnoyIndex(num_features)
    for i in range(num_total):
        t.add_item(i, X_all[i])
    t.build(num_total // 1000)
    print("\ttime: ", time.time() - s)

    dist_nd_adv = np.zeros((grid_size, grid_size))

    print("computing distance to boundary nd by adversarial examples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            # print("row, col: ", row,  col)
            # s0 = time.time()
            sample_idx = cells[row][col][0]
            sample = X_all[sample_idx]
            # label_sample = clf.Predict(np.array([sample]))[0]
            sample_label = y_all[sample_idx]

            sample = sample.reshape(clf.shape)
            adversarial = attack(sample, sample_label)
            if adversarial is None:
                print("adversarial is None: ", row, col, "bisection")
                # adversarial = attack_fallback(sample, sample_label)
                # if adversarial is None:
                adv_idx = closest_diff_label(X_all, y_all, sample_idx,
                                             sample_label, t)
                if adv_idx == -1:
                    print("problem on: ", row, col)
                dist = dist_nd_bisection(X_all[sample_idx], X_all[adv_idx],
                                         clf)
                dist_nd_adv[row, col] = dist
                continue

            adversarial_label = np.argmax(model.predictions(adversarial))
            if sample_label == adversarial_label:
                print("error on: ", row, col)
                continue
            dist_nd_adv[row, col] = np.linalg.norm(sample - adversarial)
            # print("\ttime: ", time.time() - s0)

    print("\ttime: ", time.time() - s)
    return dist_nd_adv
示例#6
0
def distnd2(X, X_proj, clf, grid_size, inv_proj=None):
    if inv_proj is None:
        inv_proj = ILAMP()
        inv_proj.fit(X, X_proj)

    cells_orig = build_grid(X_proj, grid_size)

    num_features = X.shape[1]
    ilamp_samples = []
    ilamp_proj = []

    import time
    print("generating ilamp samples")
    s = time.time()
    for row in range(grid_size):
        for col in range(grid_size):
            if len(cells_orig[row][col]) == 0:
                coords = np.array([(col + 0.5) / grid_size,
                                   (row + 0.5) / grid_size])
                sample = inv_proj.transform([coords], normalize=True)[0]
                ilamp_samples.append(sample)
                ilamp_proj.append(coords)
    print("\ttime: ", time.time() - s)

    ilamp_samples = np.array(ilamp_samples)
    ilamp_proj = np.array(ilamp_proj)

    num_syn = ilamp_samples.shape[0]
    num_orig = X.shape[0]
    num_total = num_orig + num_syn
    X_all = np.zeros((num_total, num_features))
    X_all[:num_orig] = X
    X_all[num_orig:] = ilamp_samples

    X_proj_all = np.zeros((num_total, 2))
    X_proj_all[:num_orig] = X_proj
    X_proj_all[num_orig:] = ilamp_proj
    print("predicting all samples")
    s = time.time()
    y_all = clf.Predict(X_all)
    print("\ttime: ", time.time() - s)
    # print("computing distances nd")

    dist_nd_2 = np.zeros((grid_size, grid_size))
    cells = build_grid(X_proj_all, grid_size)

    print("constructing annoy structure")
    s = time.time()
    t = annoy.AnnoyIndex(num_features)
    for i in range(num_total):
        t.add_item(i, X_all[i])
    t.build(num_total // 1000)
    print("\ttime: ", time.time() - s)

    # print("constructing kdtree")
    # s = time.time()
    # tree = KDTree(X_all, leaf_size=100, metric='euclidean')
    # print("\ttime: ", time.time() - s)

    # distances_all = distance.cdist(X_all, X_all)
    print("computing distances between nd samples")
    s = time.time()
    for row in range(grid_size):
        # print("[distance_nd_2] row: ", row)
        for col in range(grid_size):
            # print("row, col: ", row,  col)
            # s0 = time.time()
            sample_idx = cells[row][col][0]
            sample = X_all[cells[row][col][0]]
            # label_sample = clf.Predict(np.array([sample]))[0]
            label_sample = y_all[sample_idx]

            # distances_sample = distance.cdist([sample], X_all)[0]
            # sorted_idx = np.argsort(distances_sample)
            # found = False
            # for idx in sorted_idx:
            #     if label_sample != y_all[idx]:
            #         dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[idx], clf)
            #         found = True
            #         break
            # if found is False:
            #     print("error on pixel ", row, col)

            # print("row, col: ", row,  col)
            # s0 = time.time()

            # FIXME: this query makes no sense: all nodes are returned.
            # Should instead take all nodes that label is different and sort
            # them by distance.
            # X_diff = X_all[y_all != label_sample]
            # distances = distance_matrix([sample], X_diff).ravel()
            # distances = distance.cdist([sample], X_diff)[0]

            # idx = np.argmin(distances)
            # FIXME: this matrix can be computed once: distance betweeen
            # all the points and the select only the lines and columns with
            # different labels
            # dist_nd_2[row, col] = dist_nd_bisection(sample, X_diff[idx], clf)

            # dist, ind = tree.query([sample], k=num_total//3)
            # found = 0
            # # print("\tlooking for samples with different label")
            # # s = time.time()
            # for i in range(len(ind[0])):
            #     idx = ind[0][i]
            #     label_idx = clf.Predict(np.array([X_all[idx]]))[0]
            #     if label_idx != label_sample:
            #         dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[idx],
            #                                                 clf)
            #         found += 1
            #         break
            # if found == 0:
            #     print("error on pixel ", row, col)
            # print("\ttime: ", time.time() - s0)

            found = False
            num_n_prev = 0
            target_idx = -1
            while found is False:
                num_n = num_n_prev + 1000
                nns = t.get_nns_by_item(sample_idx, num_n)
                for nn in nns[num_n_prev:num_n]:
                    if y_all[nn] != label_sample:
                        found = True
                        target_idx = nn
                        break
                num_n_prev = num_n

            if target_idx == -1:
                print("error on pixel ", row, col)
            dist_nd_2[row, col] = dist_nd_bisection(sample, X_all[target_idx],
                                                    clf)
            # print("\ttime: ", time.time() - s0)

    print("\ttime: ", time.time() - s)
    return dist_nd_2
示例#7
0
def distnd(dmap, X, proj, clf=None, inv_proj=None):
    if inv_proj is None:
        inv_proj = ILAMP()
        inv_proj.fit(X, proj)

    grid_size = dmap.shape[0]
    H, W = grid_size, grid_size

    import time
    print("computing boundary cells")
    s = time.time()
    on_db = boundary_cells(dmap)
    print("\ttime: ", time.time() - s)
    print("computing distance transform")
    s = time.time()
    _, inds = ndimage.distance_transform_edt(on_db, return_indices=True)
    db_map = np.dstack((inds[0], inds[1]))
    print("\ttime: ", time.time() - s)

    # Refine db_map: by the way the boundary map is constructed, there is no
    # guarantee that a cell and its closet boundary as stored in on_db will
    # have different labels.
    # The following lines adjust that.
    print("refining boundaries")
    s = time.time()
    refine_boundaries(dmap, db_map)
    # for row in range(H):
    #     for col in range(W):
    #         cell_hue = dmap[row, col, 0]
    #         db = db_map[row, col]
    #         other_hue = dmap[db[0], db[1], 0]
    #         if hue_cmp(cell_hue, other_hue) != 0:
    #             continue
    #         neighbors = get_neighbors(db[0], db[1], H, W, n8=True)
    #         for n in neighbors:
    #             n_hue = dmap[n[0], n[1], 0]
    #             if hue_cmp(cell_hue, n_hue) == 1:
    #                 db_map[row, col] = n
    #                 break
    print("\ttime: ", time.time() - s)

    cells = build_grid(proj, grid_size)
    dist_nd = np.zeros((grid_size, grid_size))
    print("computing nd distances")
    s = time.time()
    for row in range(H):
        # print("[dist_nd] row: ", row)
        for col in range(W):
            db_r, db_c = db_map[row, col, 0], db_map[row, col, 1]
            dist_nd[row, col] = distance_nd(row,
                                            col,
                                            db_r,
                                            db_c,
                                            grid_size,
                                            X,
                                            cells,
                                            proj,
                                            inv_proj=inv_proj,
                                            clf=clf)
    print("\ttime: ", time.time() - s)
    return dist_nd
示例#8
0
def main():
    if len(sys.argv) < 2:
        print("Usage: ./fashion_mnist.py <base_dir>")
        sys.exit(0)
    base_dir = sys.argv[1]

    print("Load dataset\n\n")
    s = time.time()
    X_train, y_train = data.LoadMNISTData('train', base_dir + 'orig/')

    train_y_path = base_dir + "y_train.npy"
    np.save(train_y_path, y_train)

    X_test, y_test = data.LoadMNISTData('test', base_dir + 'orig/')

    X_nd = np.copy(X_train)
    X_nd = X_nd.reshape((X_nd.shape[0], X_nd.shape[1] * X_nd.shape[2]))

    projection_size = 60000
    X_proj = np.copy(X_train[:projection_size])
    new_shape = (X_proj.shape[0], X_proj.shape[1] * X_proj.shape[2])
    X_proj = np.reshape(X_proj, new_shape)
    print("\ttime: ", time.time() - s)

    scaler = MinMaxScaler(feature_range=(0, 1))

    print("TSNE Projection")
    s = time.time()
    tsne = TSNE(n_components=2,
                random_state=420,
                perplexity=25.0,
                n_iter=3000,
                n_iter_without_progress=300,
                n_jobs=4)
    tsne_proj = tsne.fit_transform(X_proj)
    tsne_proj = scaler.fit_transform(tsne_proj)
    print("\ttime: ", time.time() - s)

    print("UMAP Projection")
    s = time.time()
    umap_proj = UMAP(n_components=2,
                     random_state=420,
                     n_neighbors=5,
                     min_dist=0.3).fit_transform(X_proj)
    umap_proj = scaler.fit_transform(umap_proj)
    print("\ttime: ", time.time() - s)

    subset_size = 15000
    print("\n\nILAMP tSNE")
    s = time.time()
    k_ilamp = 20
    ilamp_tsne = ILAMP(n_neighbors=k_ilamp)
    ilamp_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size])
    ilamp_tsne_path = base_dir + "ilamp_tsne.joblib"
    ilamp_tsne.save(ilamp_tsne_path)
    print("\ttime: ", time.time() - s)

    print("\n\nILAMP UMAP")
    s = time.time()
    ilamp_umap = ILAMP(n_neighbors=k_ilamp)
    ilamp_umap.fit(X_proj[:subset_size], umap_proj[:subset_size])
    ilamp_umap_path = base_dir + "ilamp_umap.joblib"
    ilamp_umap.save(ilamp_umap_path)
    print("\ttime: ", time.time() - s)

    print("\n\nRBFInv CTRL PTS TSNE")
    s = time.time()
    EPS = 50000
    irbfcp_tsne = RBFInv(num_ctrl=400,
                         mode='rols',
                         kernel='gaussian',
                         eps=EPS,
                         normalize_c=True,
                         normalize_d=True)
    irbfcp_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size])
    irbfcp_tsne_path = base_dir + "irbfcp_tsne.joblib"
    irbfcp_tsne.save(irbfcp_tsne_path)
    print("\ttime: ", time.time() - s)

    print("\n\nRBFInv CTRL PTS UMAP")
    s = time.time()
    EPS = 50000
    irbfcp_umap = RBFInv(num_ctrl=400,
                         mode='rols',
                         kernel='gaussian',
                         eps=EPS,
                         normalize_c=True,
                         normalize_d=True)
    irbfcp_umap.fit(X_proj[:subset_size], umap_proj[:subset_size])
    irbfcp_umap_path = base_dir + "irbfcp_umap.joblib"
    irbfcp_umap.save(irbfcp_umap_path)
    print("\ttime: ", time.time() - s)

    print("\n\nRBFInv CLUSTER TSNE")
    s = time.time()
    EPS = 50000
    irbfc_tsne = RBFInv(num_ctrl=50,
                        mode='cluster',
                        kernel='gaussian',
                        eps=EPS,
                        normalize_c=True,
                        normalize_d=True)
    irbfc_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size])
    irbfc_tsne_path = base_dir + "irbfc_tsne.joblib"
    irbfc_tsne.save(irbfc_tsne_path)
    print("\ttime: ", time.time() - s)

    print("\n\nRBFInv CLUSTER UMAP")
    s = time.time()
    EPS = 50000
    irbfc_umap = RBFInv(num_ctrl=50,
                        mode='cluster',
                        kernel='gaussian',
                        eps=EPS,
                        normalize_c=True,
                        normalize_d=True)
    irbfc_umap.fit(X_proj[:subset_size], tsne_proj[:subset_size])
    irbfc_umap_path = base_dir + "irbfc_umap.joblib"
    irbfc_umap.save(irbfc_umap_path)
    print("\ttime: ", time.time() - s)

    print("\n\nNNInv TSNE")
    s = time.time()
    nninv_tsne = NNInv()
    nninv_tsne.fit(X_proj[:subset_size], tsne_proj[:subset_size])
    nninv_tsne_path = base_dir + "nninv_tsne.joblib"
    nninv_tsne.save(nninv_tsne_path, base_dir + 'nninv_tsne_keras.hdf5')
    print("\ttime: ", time.time() - s)

    print("\n\nNNInv UMAP")
    s = time.time()
    nninv_umap = NNInv()
    nninv_umap.fit(X_proj[:subset_size], umap_proj[:subset_size])
    nninv_umap_path = base_dir + "nninv_umap.joblib"
    nninv_umap.save(nninv_umap_path, base_dir + 'nninv_umap_keras.hdf5')
    print("\ttime: ", time.time() - s)

    input_shape = (X_train.shape[1], X_train.shape[2], 1)
    X_train = X_train.reshape((X_train.shape[0], ) + input_shape)
    X_test = X_test.reshape((X_test.shape[0], ) + input_shape)
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)

    X_proj = X_proj.reshape((X_proj.shape[0], ) + input_shape)

    print("\n\nTraining classifier")
    s = time.time()
    clf_keras = CNNModel(input_shape, 10)
    clf_keras.fit(X_train,
                  y_train,
                  batch_size=128,
                  epochs=14,
                  verbose=1,
                  validation_data=(X_test, y_test))
    print("\tAccuracy on test data: ",
          clf_keras.evaluate(X_test, y_test, verbose=0))
    clf_keras_path = base_dir + "mnist_cnn.hdf5"
    clf_keras.save(clf_keras_path)
    y_proj_pred = np.argmax(clf_keras.predict(X_proj), axis=1)
    pred_path = base_dir + "y_pred_clf.npy"
    np.save(pred_path, y_proj_pred)

    clf = CLF(clf=clf_keras,
              clf_type='keras_cnn',
              clf_path=clf_keras_path,
              shape=input_shape)
    clf_path = base_dir + 'mnist_cnn.json'
    clf.save_json(clf_path)
    print("\ttime: ", time.time() - s)

    N = 1
    R = 500

    print("\n\nGRID ILAMP tSNE")
    s = time.time()
    grid_ilamp_tsne_path = base_dir + 'grid_ilamp_tsne.joblib'
    save_grid(grid_ilamp_tsne_path, R, N, clf, ilamp_tsne, X_nd, tsne_proj,
              clf_path, ilamp_tsne_path)
    ui_grid_ilamp_tsne_path = base_dir + 'mnist_500_' + 'ui_ilamp_tsne.json'
    save_json_ui(ui_grid_ilamp_tsne_path, grid_ilamp_tsne_path, clf_path,
                 "ilamp", ilamp_tsne_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID ILAMP UMAP")
    s = time.time()
    grid_ilamp_umap_path = base_dir + 'grid_ilamp_umap.joblib'
    save_grid(grid_ilamp_umap_path, R, N, clf, ilamp_umap, X_nd, umap_proj,
              clf_path, ilamp_umap_path)
    ui_grid_ilamp_umap_path = base_dir + 'mnist_500_' + 'ui_ilamp_umap.json'
    save_json_ui(ui_grid_ilamp_umap_path, grid_ilamp_umap_path, clf_path,
                 "ilamp", ilamp_umap_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID NNInv tSNE")
    s = time.time()
    grid_nninv_tsne_path = base_dir + 'grid_nninv_tsne.joblib'
    save_grid(grid_nninv_tsne_path, R, N, clf, nninv_tsne, X_nd, tsne_proj,
              clf_path, nninv_tsne_path)
    ui_grid_nninv_tsne_path = base_dir + 'mnist_500_' + '_ui_nninv_tsne.json'
    save_json_ui(ui_grid_nninv_tsne_path, grid_nninv_tsne_path, clf_path,
                 "nninv", nninv_tsne_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID NNInv UMAP")
    s = time.time()
    grid_nninv_umap_path = base_dir + 'grid_nninv_umap.joblib'
    save_grid(grid_nninv_umap_path, R, N, clf, nninv_umap, X_nd, umap_proj,
              clf_path, nninv_umap_path)
    ui_grid_nninv_umap_path = base_dir + 'mnist_500_' + 'ui_nninv_umap.json'
    save_json_ui(ui_grid_nninv_umap_path, grid_nninv_umap_path, clf_path,
                 "nninv", nninv_umap_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID RBFInv CTRL PTS tSNE")
    s = time.time()
    grid_irbfcp_tsne_path = base_dir + 'grid_irbfcp_tsne.joblib'
    save_grid(grid_irbfcp_tsne_path, R, N, clf, irbfcp_tsne, X_nd, tsne_proj,
              clf_path, irbfcp_tsne_path)
    ui_grid_irbfcp_tsne_path = base_dir + 'mnist_500_' + 'ui_irbfcp_tsne.json'
    save_json_ui(ui_grid_irbfcp_tsne_path, grid_irbfcp_tsne_path, clf_path,
                 "rbf", irbfcp_tsne_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID RBFInv CTRL PTS UMAP")
    s = time.time()
    grid_irbfcp_umap_path = base_dir + 'grid_irbfcp_umap.joblib'
    save_grid(grid_irbfcp_umap_path, R, N, clf, irbfcp_umap, X_nd, umap_proj,
              clf_path, irbfcp_umap_path)
    ui_grid_irbfcp_umap_path = base_dir + 'mnist_500_' + 'ui_irbfcp_umap.json'
    save_json_ui(ui_grid_irbfcp_umap_path, grid_irbfcp_umap_path, clf_path,
                 "rbf", irbfcp_umap_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID RBFInv CLUSTER tSNE")
    s = time.time()
    grid_irbfc_tsne_path = base_dir + 'grid_irbfc_tsne.joblib'
    save_grid(grid_irbfc_tsne_path, R, N, clf, irbfc_tsne, X_nd, tsne_proj,
              clf_path, irbfc_tsne_path)
    ui_grid_irbfc_tsne_path = base_dir + 'mnist_500_' + 'ui_irbfc_tsne.json'
    save_json_ui(ui_grid_irbfc_tsne_path, grid_irbfc_tsne_path, clf_path,
                 "rbf", irbfc_tsne_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)

    print("\n\nGRID RBFInv CLUSTER UMAP")
    s = time.time()
    grid_irbfc_umap_path = base_dir + 'grid_irbfc_umap.joblib'
    save_grid(grid_irbfc_umap_path, R, N, clf, irbfc_umap, X_nd, umap_proj,
              clf_path, irbfc_umap_path)
    ui_grid_irbfc_umap_path = base_dir + 'mnist_500_' + 'ui_irbfc_umap.json'
    save_json_ui(ui_grid_irbfc_umap_path, grid_irbfc_umap_path, clf_path,
                 "rbf", irbfc_umap_path, train_y_path, pred_path)
    print("\ttime: ", time.time() - s)
示例#9
0
    def FromJSON(self, filepath):
        with open(filepath) as f:
            data_json = json.load(f)

        grid = Grid()
        grid.load(data_json['grid'], data_json['clf'], data_json['inv_proj'])

        # self.inv_proj = grid.inv_proj
        # FIXME: guardar o tipo da projeção inversa (ilamp, rbf ou nn)
        inv_proj_type = data_json['inv_proj_type']
        if inv_proj_type == 'ilamp':
            self.inv_proj = ILAMP()
        elif inv_proj_type == 'rbf':
            self.inv_proj = RBFInv()
        elif inv_proj_type == 'nninv':
            self.inv_proj = NNInv()
        self.inv_proj.load(data_json['inv_proj'])

        self.X = np.copy(grid.X_nd)
        global IMG_SIZE
        IMG_SIZE = int(np.sqrt(self.X.shape[1]))

        self.y = np.load(data_json['y_true'])
        self.y_pred = np.load(data_json['y_pred'])

        self.proj = np.copy(grid.X_2d)

        self.dmap_hsv = grid.dmap
        # self.dmap = HSV2RGB(self.dmap_hsv)
        self.dmap = hsv2rgb(self.dmap_hsv)

        if grid.dist_2d is not None:
            self.dist_2d = grid.dist_2d
            self.dist_nd = grid.dist_nd
            self.dist_nd2 = grid.dist_nd2
            self.dist_nd3 = grid.dist_nd3

        global GRID_SIZE
        GRID_SIZE = grid.R
        print('grid_size: ', GRID_SIZE)

        self.proj_norm = np.copy(self.proj)
        self.proj *= GRID_SIZE
        self.nbrs = NearestNeighbors(n_neighbors=NUM_NEIGHBORS + 1,
                                     algorithm='kd_tree')
        self.nbrs.fit(self.X)

        # nearest neighbors for the projected points to do the "brushing" thing
        self.proj_nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree')
        self.proj_nbrs.fit(self.proj)

        # FIXME: make it work with 0 missclf
        self.miss_idx = np.arange(self.proj.shape[0], dtype=int)
        self.miss_idx = self.miss_idx[self.y != self.y_pred]
        if len(self.miss_idx) > 0:
            self.miss_proj = self.proj[self.miss_idx]
            self.miss_proj_nbrs = NearestNeighbors(n_neighbors=1,
                                                   algorithm='kd_tree')
            self.miss_proj_nbrs.fit(self.miss_proj)

        return "features" in data_json