示例#1
0
def load_sample_data(path,
                     samples_selected=['sample1'],
                     batch_names=['batch1', 'batch2']):
    """
    Function to load and merge data for samples and convert it to a desired format
    """
    df_full = None
    for sample in samples_selected:
        x1_train, x1_test, x2_train, x2_test = load_data_basic(
            path, sample=sample, batch_names=batch_names, panel=None)

        tmp_ = pd.concat([x1_train, x2_train])
        if df_full is None:
            df_full = tmp_
        else:
            df_full = pd.concat([df_full, tmp_], axis=0)

    metadata_batch = [i.split('_')[0] for i in df_full.index]
    metadata_cell = [i.split('_')[-1] for i in df_full.index]
    metadata_sample = [i.split('_')[1] for i in df_full.index]
    df_full['metadata_batch'] = metadata_batch
    df_full['metadata_celltype'] = metadata_cell
    df_full['metadata_sample'] = metadata_sample
    df_full = df_full.dropna(axis=1)
    df_full = df_full.reset_index(drop=True)
    return (df_full)
示例#2
0
            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                print('generating plots and saving outputs')
                gx1 = self.generator.predict(x1_train_df)
                self.generator.save(os.path.join('models_' + self.modelname, fname, 'generator' + str(epoch)))
                save_info.save_dataframes(epoch, x1_train_df, x2_train_df, gx1, fname, dir_name='output_'+self.modelname)
                save_info.save_scores(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, dir_name='output_'+self.modelname)
                #save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, umap=False,
                #                         dir_name='figures_'+self.modelname, autoencoder=False, modelname=self.modelname)


if __name__ == '__main__':
    # sample5, sample75, sample65
    import os
    from loading_and_preprocessing.data_loader import load_data_basic
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('sample', type=str)
    parser.add_argument('path', type=str)
    args = parser.parse_args()
    sample_name = 'sample' + args.sample

    #path = '/cluster/home/hthrainsson/chevrier_data_pooled_full_panels.parquet'
    #path = r'C:\Users\heida\Documents\ETH\Deep Learning\chevrier_data_pooled_full_panels.parquet'
    x1_train, x1_test, x2_train, x2_test = load_data_basic(args.path, sample=sample_name,
                                                           batch_names=['batch1', 'batch3'], seed=42, panel=None,
                                                           upsample=True)
    gan = GAN('residual_gan_full_panels', x1_train.shape[1])
    gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25)
            training_metrics["epoch"].append(epoch)
            training_metrics["d_loss"].append(d_loss[0])
            training_metrics["d_accuracy"].append(d_loss[1])
            training_metrics["g_loss"].append(g_loss)

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                print('generating plots and saving outputs')
                gx1 = self.generator.predict(x1_train_df)
                self.generator.save(os.path.join('models_' + self.modelname, fname, 'generator' + str(epoch)))
                save_info.save_dataframes(epoch, x1_train_df, x2_train_df, gx1, fname, dir_name='output_'+self.modelname)
                save_info.save_scores(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, dir_name='output_'+self.modelname)
                #save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, training_metrics, fname, umap=False,
                #                         dir_name='figures_'+self.modelname, autoencoder=False, modelname=self.modelname)


if __name__ == '__main__':
    import os
    from loading_and_preprocessing.data_loader import load_data_basic
    path = '/cluster/home/hthrainsson/chevrier_data_pooled_full_panels.parquet'
    # path = r'C:\Users\heida\Documents\ETH\Deep Learning\chevrier_data_pooled_full_panels.parquet'
    x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample='sample5', batch_names=['batch1', 'batch3'],
                                                           seed=42, panel=None, upsample=True)
    gan = GAN('residual_gan_wo_residuals_full_panels', x1_train.shape[1])
    gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25)

    x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample='sample10', batch_names=['batch1', 'batch3'],
                                                           seed=42, panel=None, upsample=True)
    gan = GAN('residual_gan_wo_residuals_full_panels', x1_train.shape[1])
    gan.train(x1_train, x2_train, epochs=600, batch_size=64, sample_interval=25)
                                         gx1,
                                         plot_model,
                                         fname,
                                         umap=False)
        return plot_model


if __name__ == '__main__':
    import os
    from loading_and_preprocessing.data_loader import load_data_basic, load_data_cytof
    # path = r'C:\Users\heida\Documents\ETH\Deep Learning\2019_DL_Class_old\code_ADAE_\chevrier_data_pooled_panels.parquet'
    path = r'C:\Users\Public\PycharmProjects\deep\Legacy_2019_DL_Class\data\chevrier_data_pooled_panels.parquet'
    # x1_train, x1_test, x2_train, x2_test = load_data_cytof(path, patient_id='rcc7', n=10000)
    x1_train, x1_test, x2_train, x2_test = load_data_basic(
        path,
        sample='sample5',
        batch_names=['batch1', 'batch2'],
        seed=42,
        panel='tcell')

    # path = r'C:\Users\Public\PycharmProjects\deep\2019_DL_Class\loading_and_preprocessing'
    # path = path + '/toy_data_gamma_small.parquet'  # '/toy_data_gamma_large.parquet'
    #x1_train, x1_test, x2_train, x2_test = load_data_basic(path, patient='sample1', batch_names=['batch1', 'batch2'],
    # seed=42, n_cells_to_select=0)
    gan = GAN(x1_train.shape[1])
    gan.train(x1_train,
              x2_train,
              epochs=3000,
              batch_size=64,
              sample_interval=50)
                                      gx1,
                                      training_metrics,
                                      fname,
                                      dir_name='output_' + self.modelname,
                                      model_description=model_description)
                # save_plots.plot_progress(epoch, x1_train_df, x2_train_df, gx1, plot_model, fname,
                # dir_name='output_' + self.modelname)

        return plot_model


if __name__ == '__main__':
    import os
    from loading_and_preprocessing.data_loader import load_data_basic, load_data_cytof
    path = '..\data\chevrier_samples_5_65_75.parquet'
    samples = ['sample5', 'sample65', 'sample75']
    modelname = 'gan_autoencoder_diamond_narrower_full'
    for s in samples:
        x1_train, x1_test, x2_train, x2_test = load_data_basic(
            path,
            sample=s,
            batch_names=['batch1', 'batch3'],
            seed=42,
            panel=None)
        gan = GAN(x1_train.shape[1], modelname)
        gan.train(x1_train,
                  x2_train,
                  epochs=3000,
                  batch_size=64,
                  sample_interval=50)
示例#6
0
#!/usr/bin/env python
import sys
import os
sys.path.append(os.path.dirname(sys.path[0]))
from vanilla_gan.gan_batches_optimized import GAN
from loading_and_preprocessing.data_loader import load_data_basic
path = 'data\chevrier_samples_5_65_75.parquet'
sample_names = ['sample5', 'sample65', 'sample75']
batch_names = ['batch1', 'batch3']

for sample_name in sample_names:
    x1_train, x1_test, x2_train, x2_test = load_data_basic(path, sample=sample_name,
                                                           batch_names=batch_names, seed=42, panel=None,
                                                           upsample=True)
    gan = GAN(x1_train.shape[1], modelname='gan_vanilla_full')
    gan.train(x1_train, x2_train, epochs=1000, batch_size=64, sample_interval=50)