def train_autoencoder_all(methylation_array, latent_dimension): val_size = int(methylation_array.shape[0] * 0.1) val_set = AutoencoderGenerator(methylation_array.iloc[:val_size, :]) train_set = AutoencoderGenerator(methylation_array.iloc[val_size:, :]) # Autoencoder training encoder = Giskard(methylation_array.shape[1], latent_dimension=latent_dimension, model_serialization_path="../data/models/") encoder.fit(train_set, val_set, 500, callbacks=[ EarlyStopping(monitor="val_loss", min_delta=0.05, patience=10) ]) return encoder
for m in pam50_mirnas: for gene in mirnas[m]: if gene[2] > 0.8: over_rate_genes.append(gene[0]) # dataset = pickle.load(open("../data/mrna_exp_all.pkl", "rb"))[over_rate_mrna] dataset = dataset[over_rate_genes] # Generation of training and validation set val_size = int(dataset.shape[0] * 0.1) validation_set = AutoencoderGenerator(dataset.iloc[:val_size, :]) training_set = AutoencoderGenerator(dataset.iloc[val_size:, :]) # Autoencoder training mrna_encoder = Giskard(dataset.shape[1], latent_dimension=ld, model_serialization_path="../data/models/") mrna_encoder.fit( training_set, validation_set, 2000, callbacks=[EarlyStopping(monitor="val_loss", min_delta=0.05, patience=10)]) # Creating an embedded representation of the mRNA methylation array mrna_to_encode = pickle.load(open("../data/mrna_exp_ma.pkl", "rb")) mrna_to_encode["beta"] = mrna_to_encode["beta"].rename( columns=lambda g: g.split('.')[0])[over_rate_genes] mrna_dataset = mrna_encoder.encode_methylation_array(mrna_to_encode) pickle.dump(mrna_dataset, open("../data/mrna_embedded_pam.pkl", "wb")) # Just a check on ground truth