def train_final_vae(self, model_config): model_config["bernoulli"] = False model_config["tensorboard"] = True model_config["checkpoint"] = True model_config["early_stopping_metric"] = "loss" model_config["checkpoint_metric"] = "loss" results = self.train_final_model(model_config) final_vae = results["model"] full_dataset = results["dataset"] self.logger.info("Encoding latent represenations...") latent_reps = final_vae.encode(full_dataset.features) results = np.hstack(( np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1) )) header = ["cell_ids"] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("cell_type") header.append("cell_subtype") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt")
def run(self, debug=False): self.logger.info("EXPERIMENT START") trials, _, best_loss_case_config = self.run_hyperopt( self.train_case_model) self.logger.info("Finished hyperopt optimization") # Save experiment results losses = [] experiment_results = [[ "model_name", "encoder_layers", "latent_size", "optimizer", "batch_size", "cv_reconstruction_loss", "cv_kl_divergence_loss", "cv_total_loss" ]] for result in trials.results: if None not in result["avg_valid_metrics"].values(): losses.append(( result["model_config"], result["avg_valid_metrics"]["reconstruction_loss"], result["avg_valid_metrics"]["kl_divergence_loss"], result["avg_valid_metrics"]["loss"])) experiment_results.append([ result["model_config"]["name"], result["model_config"]["encoder_layers"], result["model_config"]["latent_size"], result["model_config"]["optimizer"], result["model_config"]["batch_size"], result["avg_valid_metrics"]["reconstruction_loss"], result["avg_valid_metrics"]["kl_divergence_loss"], result["avg_valid_metrics"]["loss"] ]) save_data_table( experiment_results, self.experiment_dir + "/experiment_results.txt") self.logger.info("Saved experiment results") # Train the final VAE using the best model configs best_loss_model_config = self.get_model_config(best_loss_case_config) best_loss_model_config["name"] = "UsokinVAE_BestTotalLoss" best_recon_loss_model_config = sorted(losses, key=lambda x: x[1])[0][0] best_recon_loss_model_config["name"] = "UsokinVAE_BestReconLoss" best_kl_loss_model_config = sorted(losses, key=lambda x: x[2])[0][0] best_kl_loss_model_config["name"] = "UsokinVAE_BestKLDivergenceLoss" self.train_final_vae(best_loss_model_config) self.train_final_vae(best_recon_loss_model_config) self.train_final_vae(best_kl_loss_model_config) self.logger.info("EXPERIMENT END")
def run(self): self.logger.info("EXPERIMENT START") trials, _, best_loss_case_config = self.run_hyperopt( self.train_case_model) self.logger.info("Finished hyperopt optimization") # Save experiment results losses = [] experiment_results = [[ "model_name", "encoder_layers", "latent_size", "discriminator_layers", "ae_optimizer", "disc_optimizer", "batch_size", "cv_ae_loss", "cv_disc_loss_prior", "cv_disc_loss_posterior", "cv_disc_loss", "cv_adv_loss", "cv_total_loss" ]] for result in trials.results: if None not in result["avg_valid_metrics"].values(): losses.append(( result["model_config"], result["avg_valid_metrics"]["loss"])) experiment_results.append([ result["model_config"]["name"], # TODO: Format the encoder layers better result["model_config"]["encoder_layers"], result["model_config"]["latent_size"], result["model_config"]["discriminator_layers"], result["model_config"]["autoencoder_optimizer"], result["model_config"]["discriminator_optimizer"], result["model_config"]["batch_size"], result["avg_valid_metrics"]["ae_loss"], result["avg_valid_metrics"]["disc_loss_prior"], result["avg_valid_metrics"]["disc_loss_posterior"], result["avg_valid_metrics"]["disc_loss"], result["avg_valid_metrics"]["adv_loss"], result["avg_valid_metrics"]["loss"] ]) save_data_table( experiment_results, self.experiment_dir + "/experiment_results.txt") self.logger.info("Saved experiment results") # Train the final AAE using the best model config best_loss_model_config = self.get_model_config(best_loss_case_config) best_loss_model_config["name"] = "PollenAAE_Final" self.train_final_aae(best_loss_model_config) self.logger.info("EXPERIMENT END")
def run(self): self.logger.info("EXPERIMENT START") trials, _, best_loss_case_config = self.run_hyperopt( self.train_case_model) self.logger.info("Finished hyperopt optimization") # Save experiment results losses = [] experiment_results = [[ "model_name", "n_layers", "encoder_layers", "latent_size", "optimizer", "batch_size", "cv_reconstruction_loss", "cv_kl_divergence_loss", "cv_total_loss" ]] for result in trials.results: if None not in result["avg_valid_metrics"].values(): losses.append(( result["model_config"], result["avg_valid_metrics"]["reconstruction_loss"], result["avg_valid_metrics"]["kl_divergence_loss"], result["avg_valid_metrics"]["loss"])) experiment_results.append([ result["model_config"]["name"], len(result["model_config"]["encoder_layers"]) / 2, "|".join(result["model_config"]["encoder_layers"]), result["model_config"]["latent_size"], result["model_config"]["optimizer"], result["model_config"]["batch_size"], result["avg_valid_metrics"]["reconstruction_loss"], result["avg_valid_metrics"]["kl_divergence_loss"], result["avg_valid_metrics"]["loss"] ]) save_data_table( experiment_results, self.experiment_dir + "/experiment_results.txt") self.logger.info("Saved experiment results") # Train the final VAE using the best model config best_loss_model_config = self.get_model_config(best_loss_case_config) best_loss_model_config["name"] = "MelanomaVAE_Final" self.train_final_vae(best_loss_model_config) self.logger.info("EXPERIMENT END")
def train_final_vae(self, model_config): model_config["autoencoder_callbacks"]["tensorboard"] = True model_config["autoencoder_callbacks"]["checkpoint"] = { "metric": "loss", "file": "autoencoder_model.weights.h5" } model_config["autoencoder_callbacks"]["early_stopping"]["metric"] \ = "loss" results = self.train_final_model(model_config) final_vae = results["model"] full_dataset = results["dataset"] self.logger.info("Encoding latent represenations...") latent_reps = final_vae.encode(full_dataset.features) results = np.hstack(( np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1) )) header = ["cell_ids"] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("cell_type") header.append("cell_subtype") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt") self.logger.info("Saving losses") metrics = final_vae.evaluate(full_dataset) save_data_table( [["metric", "value"], ["total_loss", metrics["loss"]], ["reconstruction_loss", metrics["reconstruction_loss"]], ["kl_divergence_loss", metrics["kl_divergence_loss"]]], model_config["model_dir"] + "/final_losses.txt" )
def run(self): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype("float32") - 127.5) / 255 x_test = (x_test.astype("float32") - 127.5) / 255 train_dataset = Dataset(x_train, y_train, flatten=True, to_one_hot=False) test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False) model_name = "MNIST_VAE" model_dir = self.get_model_dir(model_name) create_dir(model_dir) model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784, ), "continuous": True, "encoder_layers": ["Dense:256:activation='elu'", "BatchNormalization"], "latent_size": 2, "optimizer": "adam" } if self.debug: epochs = 3 else: epochs = 50 vae = VAE(model_config) vae.train(train_dataset, epochs=epochs, batch_size=100, validation_dataset=test_dataset) latent_reps = vae.encode(test_dataset.features) results = np.hstack( (latent_reps, np.expand_dims(test_dataset.labels, axis=1))) header = [] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("digit") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt") plt.figure(figsize=(6, 6)) plt.scatter(latent_reps[:, 0], latent_reps[:, 1], c=y_test, cmap="rainbow") plt.colorbar() plt.show()