def run(self, debug=False): cell_ids, features, cell_types, cell_subtypes = self.load_data() self.datasets = stratified_kfold(features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=10, convert_labels_to_int=True) trials = Trials() search_space = self.generate_search_space() n_evals = 1 if self.debug else 30 best = fmin(self.train_vae, space=search_space, algo=tpe.suggest, max_evals=n_evals, trials=trials) self.logger.info("Finished hyperopt optimization") best_model_config = self.get_model_config( space_eval(search_space, best)) self.train_final_vae(best_model_config) experiment_results = [["model_name", "10foldcv_loss"]] for result in trials.results: experiment_results.append([result["name"], result["loss"]]) save_data_table(experiment_results, self.experiment_dir + "/experiment_results.txt")
def __init__(self, n_genes, debug=False): super(TrainUsoskinVAE, self).__init__(debug=debug) self.input_size = n_genes self.setup_dir() self.setup_logger() self.setup_hyperopt(n_evals=N_EVALS) cell_ids, features, cell_types, cell_subtypes = self.load_data() self.datasets = stratified_kfold( features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=N_FOLDS, convert_labels_to_int=True) self.logger.info( "Loaded {}g Usoskin dataset".format( self.input_size)) self.setup_cross_validation(n_folds=N_FOLDS, datasets=self.datasets, model_class=VAE, epochs=MAX_EPOCHS)
def __init__(self, debug=False): super(Experiment, self).__init__(debug) self.experiment_name = "train_usokin-100g-2layer-vae" if self.debug: self.experiment_name = "DEBUG_" + self.experiment_name self.setup_dir() self.setup_logger() self.setup_hyperopt(n_evals=50) self.input_size = 100 cell_ids, features, cell_types, cell_subtypes = self.load_data() self.datasets = stratified_kfold( features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=5, convert_labels_to_int=True) self.logger.info("Loaded 100g, standardized Usokin dataset") self.setup_cross_validation(n_folds=5, datasets=self.datasets, model_class=VAE)
if root is not None: filepath = root + "/" + filepath delimiter = str(delimiter) if six.PY2 else delimiter with open(filepath, "w") as f: writer = csv.writer( f, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL) for r in data: writer.writerow(r) cell_ids, features, cell_types, cell_subtypes = load_data() datasets = stratified_kfold( features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=5, convert_labels_to_int=True) full_dataset = Dataset.concatenate(*datasets) n_epochs = 200 final_vae = VAE(model_config) final_vae.train(full_dataset, epochs=n_epochs, batch_size=model_config["batch_size"]) loss = final_vae.evaluate(full_dataset) print(loss) latent_reps = final_vae.encode(full_dataset.features) results = np.hstack(( np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1),