header=0,
                            sep="\t",
                            index_col=0)
assert compendium_data.shape[1] == template_data.shape[1]
# -

# ## Train: VAE training and reproducibility

# +
# Create VAE directories
output_dirs = [
    os.path.join(base_dir, dataset_name, "models"),
    os.path.join(base_dir, dataset_name, "logs"),
]
NN_architecture = params["NN_architecture"]

for each_dir in output_dirs:
    new_dir = os.path.join(each_dir, NN_architecture)
    os.makedirs(new_dir, exist_ok=True)
# -

# Train VAE on new compendium data
train_vae_modules.train_vae(config_filename, normalized_compendium_filename)

# Test reproducibility
expected_log = "data/test_vae_logs.tsv"
actual_log = "logs/NN_2500_30/tybalt_2layer_30latent_stats.tsv"
assert pd.read_csv(actual_log,
                   sep="\t")["val_loss"].values[-1] < 15000, pd.read_csv(
                       actual_log, sep="\t")["val_loss"].values[-1]
if os.path.exists(experiment_id_file) == False:
    utils.create_experiment_id_file(metadata_file, normalized_data_file,
                                    experiment_id_file, config_file)

# ## Train VAE

# In[9]:

# Directory containing log information from VAE training
vae_log_dir = os.path.join(base_dir, dataset_name, "logs", train_architecture)

# In[10]:

# Train VAE
if len(os.listdir(vae_log_dir)) == 0:
    train_vae_modules.train_vae(config_file, normalized_data_file)

# ## Run simulation experiment without noise correction

# In[11]:

# Run simulation without correction
corrected = False
pipeline.run_simulation(config_file, normalized_data_file, corrected,
                        experiment_id_file)

# ## Run simulation with correction applied

# In[12]:

# Run simulation without correction