def train_final_vae(self, model_config): model_config["name"] = model_config["name"] + "_FULL" model_dir = self.get_model_dir(model_config["name"]) create_dir(model_dir) model_config["model_dir"] = model_dir n_epochs = 2 if self.debug else 200 full_dataset = Dataset.concatenate(*self.datasets) final_vae = VAE(model_config) final_vae.train(full_dataset, epochs=n_epochs, batch_size=50, validation_dataset=full_dataset) latent_reps = final_vae.encode(full_dataset.features) results = np.hstack((np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1))) header = ["cell_ids"] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("cell_type") header.append("cell_subtype") header = np.array(header) results = np.vstack((header, results)) save_data_table( results, model_config["model_dir"] + "/latent_representations.txt")
def train_vae(self, case_config): model_config = self.get_model_config(case_config) create_dir(model_config["model_dir"]) avg_valid_loss = 0.0 for k in range(0, 10): train_dataset = Dataset.concatenate(*(self.datasets[:k] + self.datasets[(k + 1):])) valid_dataset = self.datasets[k] # Start training! vae = VAE(model_config) if self.debug: epochs = 2 else: epochs = 100 vae.train(train_dataset, epochs=epochs, batch_size=50, validation_dataset=valid_dataset) fold_valid_loss = vae.evaluate(valid_dataset) self.logger.info("{}|Fold #{} Loss = {:f}".format( model_config["name"], k + 1, fold_valid_loss)) avg_valid_loss += fold_valid_loss if self.debug: break avg_valid_loss /= 10 self.logger.info("{}|Avg Validation Loss = {:f}".format( model_config["name"], avg_valid_loss)) self.case_counter += 1 return { "status": STATUS_OK, "loss": avg_valid_loss, "name": model_config["name"], "model_config": model_config }
def train_final_ae(self, model_config): model_config["name"] = model_config["name"] + "_FULL" model_dir = self.get_model_dir(model_config["name"]) create_dir(model_dir) model_config["model_dir"] = model_dir n_epochs = 2 if self.debug else 100 full_dataset = Dataset.concatenate(*self.datasets) self.logger.info("Training Final AE: " + model_config["name"]) final_ae = AE(model_config) final_ae.train(full_dataset, epochs=n_epochs, batch_size=50, validation_dataset=full_dataset) loss = final_ae.evaluate(full_dataset) self.logger.info("{}|Loss = {:f}".format(model_config["name"], loss)) self.logger.info("Creating latent represenations...") latent_reps = final_ae.encode(full_dataset.features) results = np.hstack((np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1))) header = ["cell_ids"] for l in range( 1, int(model_config["encoder_layers"][-1].split(":")[1]) + 1): header.append("dim{}".format(l)) header.append("cell_type") header.append("cell_subtype") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt")
def train_final_model(self, model_config, batch_size=None): model_dir = self.get_model_dir(model_config["name"]) create_dir(model_dir) model_config["model_dir"] = model_dir if batch_size is None: if "batch_size" in model_config: batch_size = model_config["batch_size"] else: raise Exception("No batch size specified \ for model training") full_dataset = Dataset.concatenate(*self.datasets) if self.logger is not None: self.logger.info("Training Final Model: {}".format( model_config["name"])) model = self.model_class(model_config) if self.debug: self.epochs = 2 train_history = model.train(full_dataset, epochs=self.epochs, batch_size=batch_size) metrics = model.evaluate(full_dataset) if self.logger is not None: for k, v in metrics.items(): self.logger.info("{}|{} = {:f}".format(model_config["name"], k, v)) return { "model": model, "train_history": train_history, "dataset": full_dataset, "metrics": metrics }
def run(self): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype("float32") - 127.5) / 127.5 x_train = np.expand_dims(x_train, axis=3) x_test = (x_test.astype("float32") - 127.5) / 127.5 x_test = np.expand_dims(x_test, axis=3) train_dataset = Dataset(x_train, y_train, flatten=False, to_one_hot=False) # test_dataset = Dataset(x_test, y_test, # flatten=False, to_one_hot=False) model_name = "MNIST_GAN" model_dir = self.get_model_dir(model_name) create_dir(model_dir) model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (28, 28, 1), "generator_layers": [ "Dense:1024:activation='tanh'", "Dense:128*7*7", "BatchNormalization", "Activation:'tanh'", "Reshape:(7, 7, -1)", "UpSampling2D:size=(2,2)", "Conv2D:64:5:padding='same':activation='tanh'", "UpSampling2D:size=(2,2)", "Conv2D:1:5:padding='same'", "Activation:'tanh'" ], "discriminator_layers": [ "Conv2D:64:5:padding='same':activation='tanh'", "MaxPooling2D:pool_size=(2,2)", "Conv2D:128:5:padding='same':activation='tanh'", "MaxPooling2D:pool_size=(2,2)", "Flatten", "Dense:1024:activation='tanh'", "Dense:1:activation='sigmoid'" ], "prior_size": 64, "discriminator_loss": "binary_crossentropy", "gan_optimizer": "adam:lr=1e-4", "discriminator_optimizer": "adam:lr=1e-3" } if self.debug: iterations = 3 else: iterations = 5000 gan = GAN(model_config) g_loss, d_loss_real, d_loss_gen = gan.train(train_dataset, iterations, batch_size=64) print("Generator Loss: ", g_loss) print("Discriminator Loss (Real): ", d_loss_real) print("Discriminator Loss (Generated): ", d_loss_gen) print("Finished training GAN")
def train_case_model(self, case_config, batch_size=None, loss_metric="loss"): model_config = self.get_model_config(case_config) create_dir(model_config["model_dir"]) if self.logger is not None: self.logger.info("Training %s..." % model_config["name"]) status = STATUS_OK avg_valid_metrics = {} for k in range(0, self.n_folds): train_dataset = Dataset.concatenate(*(self.datasets[:k] + self.datasets[(k + 1):])) valid_dataset = self.datasets[k] model = self.model_class(model_config) if batch_size is None: if "batch_size" in model_config: batch_size = model_config["batch_size"] elif "batch_size" in case_config: batch_size = case_config["batch_size"] else: raise Exception("No batch size specified \ for model training") if self.debug: self.epochs = 2 model.train(train_dataset, epochs=self.epochs, batch_size=batch_size, validation_dataset=valid_dataset) fold_valid_metrics = model.evaluate(valid_dataset) if not isinstance(fold_valid_metrics, dict): raise TypeError("Evaluate method of model must return a " "dictionary of metric names and values") if np.any(np.isnan(list(fold_valid_metrics.values()))) or \ np.any(np.isinf(list(fold_valid_metrics.values()))): for key in fold_valid_metrics.keys(): avg_valid_metrics[key] = None status = STATUS_FAIL break else: for name, value in fold_valid_metrics.items(): if name in avg_valid_metrics: avg_valid_metrics[name] += value else: avg_valid_metrics[name] = value if self.logger is not None: self.logger.info("{}|Fold #{}|{} = {:f}".format( model_config["name"], k + 1, name, value)) if self.debug: break if status != STATUS_FAIL: for name, metric in avg_valid_metrics.items(): metric /= self.n_folds avg_valid_metrics[name] = metric if self.logger is not None: self.logger.info("{}|Avg {} = {:f}".format( model_config["name"], name, metric)) self.case_counter += 1 return { "status": status, "model_config": model_config, "loss": avg_valid_metrics[loss_metric], "avg_valid_metrics": avg_valid_metrics }
def run(self): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype("float32") - 127.5) / 255 x_test = (x_test.astype("float32") - 127.5) / 255 train_dataset = Dataset(x_train, y_train, flatten=True, to_one_hot=False) test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False) model_name = "MNIST_AAE" model_type = UnsupervisedClusteringAdversarialAutoencoder model_dir = self.get_model_dir(model_name) create_dir(model_dir) aae_model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784,), "encoder_layers": [ "Dense:256:activation='relu'", ], "z_latent_distribution": "gaussian:2", "z_prior_distribution": "gaussian:2:mean=0.0:stddev=5.0", "output_distribution": "mean_gaussian:784", "z_discriminator_layers": [ "Dense:50:activation='relu'", "Dense:25:activation='relu'" ], "autoencoder_optimizer": "adam:lr=0.001", "z_discriminator_optimizer": "adam:lr=0.001", "autoencoder_callbacks": { "file_logger": {"file": "autoencoder_model.training.log"} }, "z_discriminator_callbacks": { "file_logger": {"file": "discriminator_model.training.log"} } } kadurin_aae_model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784,), "encoder_layers": [ "Dense:256:activation='relu'", ], "z_latent_distribution": "gaussian:2", "z_prior_distribution": "gaussian:2:mean=0.0:stddev=5.0", "output_distribution": "mean_gaussian:784", "z_discriminator_layers": [ "Dense:50:activation='relu'", "Dense:25:activation='relu'" ], "discriminative_power": 0.6, "autoencoder_optimizer": "adam:lr=0.0001", "z_discriminator_optimizer": "adam:lr=0.0001", "z_combined_callbacks": { "file_logger": {"file": "combined_model.training.log"} }, "z_discriminator_callbacks": { "file_logger": {"file": "discriminator_model.training.log"} } } unsupervised_clustering_aae_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784,), "encoder_layers": [ "Dense:256:activation='relu'" ], "z_latent_distribution": "gaussian:2", "z_prior_distribution": "gaussian:2:mean=0.0:stddev=5.0", "n_clusters": 10, "output_distribution": "mean_gaussian:784", "z_discriminator_layers": [ "Dense:50:activation='relu'", "Dense:25:activation='relu'" ], "y_discriminator_layers": [ "Dense:50:activation='relu'", "Dense:25:activation='relu'" ], "autoencoder_optimizer": "adam:lr=0.0001", "z_discriminator_optimizer": "adam:lr=0.0001", "y_discriminator_optimizer": "adam:lr=0.0001", "autoencoder_callbacks": { "file_logger": {"file": "autoencoder_model.training.log"} } # "z_discriminator_callbacks": {}, # "y_discriminator_callbacks": {}, # "z_adversarial_callbacks": {}, # "y_adversarial_callbacks": {} } if self.debug: epochs = 5 else: epochs = 50 aae = model_type(unsupervised_clustering_aae_config) aae.train(train_dataset, epochs=epochs, batch_size=100, validation_dataset=test_dataset, verbose=2) latent_space = aae.encode(test_dataset.features) style, clusters = latent_space[0], latent_space[1] clusters = aae.cluster(test_dataset.features) # results = np.hstack(( # latent_space, # np.expand_dims(test_dataset.labels, axis=1) # )) # header = [] # for l in range(1, 3): # header.append("dim{}".format(l)) # header.append("digit") # header = np.array(header) # # results = np.vstack((header, results)) # # self.logger.info("Saving results") # save_data_table( # results, # model_config["model_dir"] + "/latent_representations.txt") print("ARI: ", adjusted_rand_score(test_dataset.labels, clusters)) print("Clusters:", np.unique(clusters, return_counts=True)) plt.figure(figsize=(6, 6)) plt.scatter(style[:, 0], style[:, 1], c=y_test, cmap="rainbow") plt.colorbar() plt.show()
delimiter = str(delimiter) if six.PY2 else delimiter with open(filepath, "w") as f: writer = csv.writer( f, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL) for r in data: writer.writerow(r) cell_ids, features, cell_types, cell_subtypes = load_data() datasets = stratified_kfold( features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=5, convert_labels_to_int=True) full_dataset = Dataset.concatenate(*datasets) n_epochs = 200 final_vae = VAE(model_config) final_vae.train(full_dataset, epochs=n_epochs, batch_size=model_config["batch_size"]) loss = final_vae.evaluate(full_dataset) print(loss) latent_reps = final_vae.encode(full_dataset.features) results = np.hstack(( np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1) ))
def run(self): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype("float32") - 127.5) / 255 x_test = (x_test.astype("float32") - 127.5) / 255 train_dataset = Dataset(x_train, y_train, flatten=True, to_one_hot=False) test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False) model_name = "MNIST_VAE" model_dir = self.get_model_dir(model_name) create_dir(model_dir) model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784, ), "continuous": True, "encoder_layers": ["Dense:256:activation='elu'", "BatchNormalization"], "latent_size": 2, "optimizer": "adam" } if self.debug: epochs = 3 else: epochs = 50 vae = VAE(model_config) vae.train(train_dataset, epochs=epochs, batch_size=100, validation_dataset=test_dataset) latent_reps = vae.encode(test_dataset.features) results = np.hstack( (latent_reps, np.expand_dims(test_dataset.labels, axis=1))) header = [] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("digit") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt") plt.figure(figsize=(6, 6)) plt.scatter(latent_reps[:, 0], latent_reps[:, 1], c=y_test, cmap="rainbow") plt.colorbar() plt.show()