n_clusters = 6 # sub = [] # kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(X[0]) # X[0]['label'] = kmeans.labels_ # for i in range(n_clusters): # data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1) # sub.append(npc(data_sub)) # sub = np.asarray(sub) # print(sub[:,0].mean()) plot_kmeans(n_clusters) # %% from keras_dec import DeepEmbeddingClustering c = DeepEmbeddingClustering(n_clusters=6, input_dim=X[0].shape[1]) #c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000) c.initialize(X[0], finetune_iters=10000, layerwise_pretrain_iters=5000) # c.cluster(X[0], y=np.random.randint(10,size=X[0].shape[0])) c.cluster(X[0]) labels = c.DEC.predict_classes(X[0]) plot_cluster(labels, n_clusters=6) X[0]['label'] = labels sub = [] for i in range(n_clusters): data_sub = data_sc[data_sc['label'] == i].drop(['label'], axis=1) sub.append(npc(data_sub)) sub = np.asarray(sub) print(sub[:, 0].mean()) # %%
parser.add_argument('--n_clusters', default=5, type=int) parser.add_argument('--input_dim', type=int) parser.add_argument('--finetune_iters', default=500, type=int) parser.add_argument('--layerwise_pretrain_iters', default=1000, type=int) parser.add_argument('--iter_max', default=1000, type=int) parser.add_argument('--tol', default=0.001, type=float) parser.add_argument('--output_file', default='../data/cluster.csv') args = parser.parse_args() df_X = pd.read_csv(args.input_file, sep='\t', header=None) X = df_X.values print(len(X)) dec = DeepEmbeddingClustering(n_clusters=args.n_clusters, input_dim=len(df_X.columns)) dec.initialize(X, finetune_iters=args.finetune_iters, layerwise_pretrain_iters=args.layerwise_pretrain_iters) pred_y = dec.cluster(X, y=None, tol=args.tol, iter_max=args.iter_max) print(pred_y) d = { 'pred_y': pred_y, } df_pred_clusters = pd.DataFrame(d) df_pred_clusters.to_csv(args.output_file, index=False, header=False)
from keras_dec import DeepEmbeddingClustering from keras.datasets import mnist import numpy as np # mnist data loading & reshape function ------------------------------------------- def get_mnist(): np.random.seed(1234) # set seed for deterministic ordering (x_train, y_train), (x_test, y_test) = mnist.load_data() x_all = np.concatenate((x_train, x_test), axis=0) Y = np.concatenate((y_train, y_test), axis=0) X = x_all.reshape(-1, x_all.shape[1] * x_all.shape[2]) p = np.random.permutation(X.shape[0]) X = X[p].astype(np.float32) * 0.02 Y = Y[p] return X, Y # call ----------------------------------------------------------------------------- X, Y = get_mnist() # DEC clustering call -------------------------------------------------------------- c = DeepEmbeddingClustering(n_clusters=10, input_dim=784) c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000) c.cluster(X, y=Y)
from keras_dec import DeepEmbeddingClustering from keras.datasets import mnist import numpy as np import pandas as pd def get_mnist(): np.random.seed(1234) # set seed for deterministic ordering (x_train, y_train), (x_test, y_test) = mnist.load_data() x_all = np.concatenate((x_train, x_test), axis=0) Y = np.concatenate((y_train, y_test), axis=0) X = x_all.reshape(-1, x_all.shape[1] * x_all.shape[2]) p = np.random.permutation(X.shape[0]) X = X[p].astype(np.float32) * 0.02 Y = Y[p] return X, Y X, Y = get_mnist() c = DeepEmbeddingClustering(n_clusters=len(np.unique(Y)), input_dim=784) c.initialize(X, finetune_iters=100000, layerwise_pretrain_iters=50000) pred_y = c.cluster(X, y=None, iter_max=1) assert len(pred_y) == len(Y) d = {'pred_y': pred_y, 'actual_y': Y} df = pd.DataFrame(d) df.to_csv('data/example_cluster.csv', index=False) print(pred_y)
from keras_dec import DeepEmbeddingClustering from keras.datasets import mnist import numpy as np def preproc(X): # 1/d * ||x_i||2**2 = 1.0 return (X.T / X.mean(1)).T (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = np.asarray([x.flatten() for x in X_train], dtype='float32') X_test = np.asarray([x.flatten() for x in X_test], dtype='float32') X_train = preproc(X_train) X_test = preproc(X_test) c = DeepEmbeddingClustering(n_clusters=10, input_dim=784) c.initialize(X_train, nb_epoch=200) c.cluster(X_train, y=y_train)
basepath = "/home/ubuntu/efs/SLAV_Data/SML/" os.chdir(os.path.join(basepath)) X2 = np.load('SML_Data.npzdBcMZU-numpy.npy') r1, g1, b1 = 250, 250, 250 # Original value r2, g2, b2 = 0, 0, 0 # Value that we want to replace it with red, green, blue = X2[:, :, :, 0], X2[:, :, :, 1], X2[:, :, :, 2] mask = (red == r1) & (green == g1) & (blue == b1) X2[:, :, :, :3][mask] = [r2, g2, b2] r1, g1, b1 = 255, 255, 255 # Original value r2, g2, b2 = 0, 0, 0 # Value that we want to replace it with red, green, blue = X2[:, :, :, 0], X2[:, :, :, 1], X2[:, :, :, 2] mask = (red == r1) & (green == g1) & (blue == b1) X2[:, :, :, :3][mask] = [r2, g2, b2] X3 = X2.reshape(X2.shape[0], -1) X4 = X3.astype(np.float32) X4 = X4 / 255 rbst_scale = preprocessing.MaxAbsScaler().fit(X4) X5 = rbst_scale.transform(X4) X2 = None X3 = None X4 = None c = DeepEmbeddingClustering(n_clusters=10, input_dim=196608) c.initialize(X5, finetune_iters=1000, layerwise_pretrain_iters=500) c.cluster(X5)