n_clusters = 6
    # sub = []
    # kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(X[0])
    # X[0]['label'] = kmeans.labels_
    # for i in range(n_clusters):
    #     data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1)
    #     sub.append(npc(data_sub))
    # sub = np.asarray(sub)
    # print(sub[:,0].mean())

    plot_kmeans(n_clusters)

    # %%
    from keras_dec import DeepEmbeddingClustering

    c = DeepEmbeddingClustering(n_clusters=6, input_dim=X[0].shape[1])
    #c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000)
    c.initialize(X[0], finetune_iters=10000, layerwise_pretrain_iters=5000)
    # c.cluster(X[0], y=np.random.randint(10,size=X[0].shape[0]))
    c.cluster(X[0])
    labels = c.DEC.predict_classes(X[0])
    plot_cluster(labels, n_clusters=6)
    X[0]['label'] = labels
    sub = []
    for i in range(n_clusters):
        data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1)
        sub.append(npc(data_sub))
    sub = np.asarray(sub)
    print(sub[:, 0].mean())
    # %%
示例#2
0
    parser.add_argument('--n_clusters', default=5, type=int)
    parser.add_argument('--input_dim', type=int)

    parser.add_argument('--finetune_iters', default=500, type=int)
    parser.add_argument('--layerwise_pretrain_iters', default=1000, type=int)
    parser.add_argument('--iter_max', default=1000, type=int)
    parser.add_argument('--tol', default=0.001, type=float)

    parser.add_argument('--output_file', default='../data/cluster.csv')

    args = parser.parse_args()

    df_X = pd.read_csv(args.input_file, sep='\t', header=None)
    X = df_X.values
    print(len(X))
    dec = DeepEmbeddingClustering(n_clusters=args.n_clusters,
                                  input_dim=len(df_X.columns))
    dec.initialize(X,
                   finetune_iters=args.finetune_iters,
                   layerwise_pretrain_iters=args.layerwise_pretrain_iters)
    pred_y = dec.cluster(X, y=None, tol=args.tol, iter_max=args.iter_max)

    print(pred_y)

    d = {
        'pred_y': pred_y,
    }

    df_pred_clusters = pd.DataFrame(d)
    df_pred_clusters.to_csv(args.output_file, index=False, header=False)
示例#3
0
from keras_dec import DeepEmbeddingClustering
from keras.datasets import mnist
import numpy as np


# mnist data loading & reshape function -------------------------------------------
def get_mnist():
    np.random.seed(1234)  # set seed for deterministic ordering
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_all = np.concatenate((x_train, x_test), axis=0)
    Y = np.concatenate((y_train, y_test), axis=0)
    X = x_all.reshape(-1, x_all.shape[1] * x_all.shape[2])

    p = np.random.permutation(X.shape[0])
    X = X[p].astype(np.float32) * 0.02
    Y = Y[p]
    return X, Y


# call -----------------------------------------------------------------------------
X, Y = get_mnist()

# DEC clustering call --------------------------------------------------------------
c = DeepEmbeddingClustering(n_clusters=10, input_dim=784)
c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000)
c.cluster(X, y=Y)
示例#4
0
from keras_dec import DeepEmbeddingClustering
from keras.datasets import mnist
import numpy as np
import pandas as pd


def get_mnist():
    np.random.seed(1234)  # set seed for deterministic ordering
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_all = np.concatenate((x_train, x_test), axis=0)
    Y = np.concatenate((y_train, y_test), axis=0)
    X = x_all.reshape(-1, x_all.shape[1] * x_all.shape[2])

    p = np.random.permutation(X.shape[0])
    X = X[p].astype(np.float32) * 0.02
    Y = Y[p]
    return X, Y


X, Y = get_mnist()

c = DeepEmbeddingClustering(n_clusters=len(np.unique(Y)), input_dim=784)
c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000)
pred_y = c.cluster(X, y=None, iter_max=1)
assert len(pred_y) == len(Y)

d = {'pred_y': pred_y, 'actual_y': Y}
df = pd.DataFrame(d)
df.to_csv('data/example_cluster.csv', index=False)

print(pred_y)
示例#5
0
from keras_dec import DeepEmbeddingClustering
from keras.datasets import mnist
import numpy as np


def preproc(X):
    # 1/d * ||x_i||2**2 = 1.0
    return (X.T / X.mean(1)).T


(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.asarray([x.flatten() for x in X_train], dtype='float32')
X_test = np.asarray([x.flatten() for x in X_test], dtype='float32')

X_train = preproc(X_train)
X_test = preproc(X_test)

c = DeepEmbeddingClustering(n_clusters=10, input_dim=784)
c.initialize(X_train, nb_epoch=200)
c.cluster(X_train, y=y_train)
示例#6
0
basepath = "/home/ubuntu/efs/SLAV_Data/SML/"
os.chdir(os.path.join(basepath))
X2 = np.load('SML_Data.npzdBcMZU-numpy.npy')

r1, g1, b1 = 250, 250, 250  # Original value
r2, g2, b2 = 0, 0, 0  # Value that we want to replace it with

red, green, blue = X2[:, :, :, 0], X2[:, :, :, 1], X2[:, :, :, 2]
mask = (red == r1) & (green == g1) & (blue == b1)
X2[:, :, :, :3][mask] = [r2, g2, b2]

r1, g1, b1 = 255, 255, 255  # Original value
r2, g2, b2 = 0, 0, 0  # Value that we want to replace it with

red, green, blue = X2[:, :, :, 0], X2[:, :, :, 1], X2[:, :, :, 2]
mask = (red == r1) & (green == g1) & (blue == b1)
X2[:, :, :, :3][mask] = [r2, g2, b2]

X3 = X2.reshape(X2.shape[0], -1)
X4 = X3.astype(np.float32)
X4 = X4 / 255
rbst_scale = preprocessing.MaxAbsScaler().fit(X4)
X5 = rbst_scale.transform(X4)
X2 = None
X3 = None
X4 = None
c = DeepEmbeddingClustering(n_clusters=10, input_dim=196608)
c.initialize(X5, finetune_iters=1000, layerwise_pretrain_iters=500)
c.cluster(X5)