metavar='path', type=str, help='the path to save the image results ') args = my_parser.parse_args() image_directory = args.image_directory feature_directory = args.feature_directory results_directory = args.results_directory files_path = image_utils.loadFilePaths(image_directory) for file in list(os.listdir(feature_directory)): if file.endswith('.npy'): print(f'{feature_directory}/{file}') features = image_utils.loadFeatures( os.path.join(feature_directory, file)) print("tsne....") tsne_transformed = TSNE(n_components=3, n_jobs=-1).fit_transform(features) output_df = runHierarchicalClustering( tsne_transformed, f'{results_directory}/{file}-hierarchical-tsne') output_df.to_csv( f'{results_directory}/{file}-hierarchical-tsne.csv') print("pca....") pca_dims = PCA().fit(features) cumsum = np.cumsum(pca_dims.explained_variance_ratio_) d = np.argmax(cumsum >= 0.95) + 1 print(d) if (d == 1):
if __name__ == "__main__": init = 'glorot_uniform' pretrain_optimizer = 'adam' # setting parameters update_interval = 140 pretrain_epochs = 300 #pretrain_epochs=1 init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform') # [-limit, limit], limit=sqrt(1./fan_in) pretrain_optimizer = SGD(lr=1, momentum=0.9) #x, y = load_mnist() #n_clusters = len(np.unique(y)) features=image_utils.loadFeatures('image_features_autoencoder_helper/ideology__grayscale_.npy') data=features data = data.astype('float32') / 255. x=data y=None print(x.shape) n_clusters=500 # load the gray scale features in the variable x define y to None dec = DEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, init=init) dec.pretrain(x=x, y=y, optimizer=pretrain_optimizer,epochs=pretrain_epochs, batch_size=256,save_dir='') print("DONE pretraining") dec.compile(optimizer=SGD(0.01, 0.9), loss='kld') y_pred = dec.fit(x, y=y, tol=0.001, maxiter=2e4, batch_size=256,update_interval=update_interval, save_dir='') np.save("dec_predictions.npy",y_pred) print("Saved") #saving the y_pred labels and making the pdf out of it