def load_sample_data(path, samples_selected=['sample1'], batch_names=['batch1', 'batch2']): """ Function to load and merge data for samples and convert it to a desired format """ df_full = None for sample in samples_selected: x1_train, x1_test, x2_train, x2_test = load_data_basic( path, sample=sample, batch_names=batch_names, panel=None) tmp_ = pd.concat([x1_train, x2_train]) if df_full is None: df_full = tmp_ else: df_full = pd.concat([df_full, tmp_], axis=0) metadata_batch = [i.split('_')[0] for i in df_full.index] metadata_cell = [i.split('_')[-1] for i in df_full.index] metadata_sample = [i.split('_')[1] for i in df_full.index] df_full['metadata_batch'] = metadata_batch df_full['metadata_celltype'] = metadata_cell df_full['metadata_sample'] = metadata_sample df_full = df_full.dropna(axis=1) df_full = df_full.reset_index(drop=True) return (df_full)
# If at save interval => save generated image samples if epoch % sample_interval == 0: print('generating plots and saving outputs') gx1 = self.generator.predict(x1_train_df) self.generator.save(os.path.join('models_' + self.modelname, fname, 'generator' + str(epoch))) save_info.save_dataframes(epoch, x1_train_df, x2_train_df, gx1, fname, dir_name='output_'+self.modelname) save_info.save_scores(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, dir_name='output_'+self.modelname) #save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, umap=False, # dir_name='figures_'+self.modelname, autoencoder=False, modelname=self.modelname) if __name__ == '__main__': # sample5, sample75, sample65 import os from loading_and_preprocessing.data_loader import load_data_basic import argparse parser = argparse.ArgumentParser() parser.add_argument('sample', type=str) parser.add_argument('path', type=str) args = parser.parse_args() sample_name = 'sample' + args.sample #path = '/cluster/home/hthrainsson/chevrier_data_pooled_full_panels.parquet' #path = r'C:\Users\heida\Documents\ETH\Deep Learning\chevrier_data_pooled_full_panels.parquet' x1_train, x1_test, x2_train, x2_test = load_data_basic(args.path, sample=sample_name, batch_names=['batch1', 'batch3'], seed=42, panel=None, upsample=True) gan = GAN('residual_gan_full_panels', x1_train.shape[1]) gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25)
training_metrics["epoch"].append(epoch) training_metrics["d_loss"].append(d_loss[0]) training_metrics["d_accuracy"].append(d_loss[1]) training_metrics["g_loss"].append(g_loss) # If at save interval => save generated image samples if epoch % sample_interval == 0: print('generating plots and saving outputs') gx1 = self.generator.predict(x1_train_df) self.generator.save(os.path.join('models_' + self.modelname, fname, 'generator' + str(epoch))) save_info.save_dataframes(epoch, x1_train_df, x2_train_df, gx1, fname, dir_name='output_'+self.modelname) save_info.save_scores(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, dir_name='output_'+self.modelname) #save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, umap=False, # dir_name='figures_'+self.modelname, autoencoder=False, modelname=self.modelname) if __name__ == '__main__': import os from loading_and_preprocessing.data_loader import load_data_basic path = '/cluster/home/hthrainsson/chevrier_data_pooled_full_panels.parquet' # path = r'C:\Users\heida\Documents\ETH\Deep Learning\chevrier_data_pooled_full_panels.parquet' x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample='sample5', batch_names=['batch1', 'batch3'], seed=42, panel=None, upsample=True) gan = GAN('residual_gan_wo_residuals_full_panels', x1_train.shape[1]) gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25) x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample='sample10', batch_names=['batch1', 'batch3'], seed=42, panel=None, upsample=True) gan = GAN('residual_gan_wo_residuals_full_panels', x1_train.shape[1]) gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25)
gx1, plot_model, fname, umap=False) return plot_model if __name__ == '__main__': import os from loading_and_preprocessing.data_loader import load_data_basic, load_data_cytof # path = r'C:\Users\heida\Documents\ETH\Deep Learning\2019_DL_Class_old\code_ADAE_\chevrier_data_pooled_panels.parquet' path = r'C:\Users\Public\PycharmProjects\deep\Legacy_2019_DL_Class\data\chevrier_data_pooled_panels.parquet' # x1_train, x1_test, x2_train, x2_test = load_data_cytof(path, patient_id='rcc7', n=10000) x1_train, x1_test, x2_train, x2_test = load_data_basic( path, sample='sample5', batch_names=['batch1', 'batch2'], seed=42, panel='tcell') # path = r'C:\Users\Public\PycharmProjects\deep\2019_DL_Class\loading_and_preprocessing' # path = path + '/toy_data_gamma_small.parquet' # '/toy_data_gamma_large.parquet' #x1_train, x1_test, x2_train, x2_test = load_data_basic(path, patient='sample1', batch_names=['batch1', 'batch2'], # seed=42, n_cells_to_select=0) gan = GAN(x1_train.shape[1]) gan.train(x1_train, x2_train, epochs=3000, batch_size=64, sample_interval=50)
gx1, training_metrics, fname, dir_name='output_' + self.modelname, model_description=model_description) # save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, plot_model, fname, # dir_name='output_' + self.modelname) return plot_model if __name__ == '__main__': import os from loading_and_preprocessing.data_loader import load_data_basic, load_data_cytof path = '..\data\chevrier_samples_5_65_75.parquet' samples = ['sample5', 'sample65', 'sample75'] modelname = 'gan_autoencoder_diamond_narrower_full' for s in samples: x1_train, x1_test, x2_train, x2_test = load_data_basic( path, sample=s, batch_names=['batch1', 'batch3'], seed=42, panel=None) gan = GAN(x1_train.shape[1], modelname) gan.train(x1_train, x2_train, epochs=3000, batch_size=64, sample_interval=50)
#!/usr/bin/env python import sys import os sys.path.append(os.path.dirname(sys.path[0])) from vanilla_gan.gan_batches_optimized import GAN from loading_and_preprocessing.data_loader import load_data_basic path = 'data\chevrier_samples_5_65_75.parquet' sample_names = ['sample5', 'sample65', 'sample75'] batch_names = ['batch1', 'batch3'] for sample_name in sample_names: x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample=sample_name, batch_names=batch_names, seed=42, panel=None, upsample=True) gan = GAN(x1_train.shape[1], modelname='gan_vanilla_full') gan.train(x1_train, x2_train, epochs=1000, batch_size=64, sample_interval=50)