def objective(params): print(params) x_train_scaled = np.concatenate((x_train[:, 0:68]*params['kmer_scale'], x_train[:, 68:85]*params['mean_scale'], x_train[:, 85:102]*params['std_scale'], x_train[:, 102:119]*params['len_scale'], x_train[:, 119:]*params['signal_scale']), axis=1) x_test_scaled = np.concatenate((x_test[:, 0:68]*params['kmer_scale'], x_test[:, 68:85]*params['mean_scale'], x_test[:, 85:102]*params['std_scale'], x_test[:, 102:119]*params['len_scale'], x_test[:, 119:]*params['signal_scale']), axis=1) encoder, decoder, vae = load_vae_dna_model_deepsignal(int(params['latent_dim']), params['rc_scale'], params['vae_lr'], params['kmer_loss_scale']) try: supervised_size = 10000 es = EarlyStopping(monitor='val_loss', mode='min', patience=20) vae.fit(x_train_scaled[0:int(len(x_train) * 0.8)], validation_data=(x_train_scaled[int(len(x_train) * 0.8):], None), epochs=args.vae_epochs, batch_size=args.vae_batch_size, verbose=0, callbacks=[es]) predictor = load_vae_predictor(int(params['latent_dim'])) # Prepared predictor training input x_train_predictor = encoder.predict(x_train_scaled[0:supervised_size]) es = EarlyStopping(monitor='val_loss', mode='min', patience=10) predictor.fit(x_train_predictor, y_train[0:supervised_size], epochs=args.predictor_epochs, validation_split=0.2, batch_size=args.predictor_batch_size, callbacks=[es], verbose=0) # Test model x_test_predictor = encoder.predict(x_test_scaled) predictions = predictor.predict(x_test_predictor) test_results = compute_metrics_standardized(predictions, y_test) if test_results[0] < 0.51: encoding_cluster_plt = plot_label_clusters(encoder, x_train, y_train) save_results(args.output_filename, test_results, encoding_cluster_plt, encoder, predictor) print_results(test_results) return test_results[0] * -1 except: return 0
for idx, row in enumerate(x_val_five_mer): label = val_five_mer_label[idx] model_idx = model_names.index(label) encoding = models[model_idx].predict(np.expand_dims(row, axis=0)) x_val_encoding.append(encoding[0]) x_val_encoding = np.array(x_val_encoding) x_val_encoding_diff = np.zeros(x_val_encoding.shape) for i, five_mer in enumerate(y_val_five_mer): five_mer_idx = np.where(five_mer==1)[0][0] x_val_encoding_diff[i] = (non_mod_encoding_map[five_mer_idx] - x_val_encoding[i]) ** 2 diff = np.mean((non_mod_encoding_map[five_mer_idx] - x_val_encoding[i]) ** 2) if diff > threshold[five_mer_idx]: predictions.append(1) else: predictions.append(0) test_results = compute_metrics_standardized(predictions, y_val) print_results(test_results) kmeans = KMeans(n_clusters=2, random_state=0).fit(x_val_encoding_diff) predictions = kmeans.labels_ print(accuracy_score(y_val, predictions)) print(accuracy_score(y_val, 1-predictions)) ''' predictor = load_vae_predictor(32) x_test_five_mer, _ = extract_five_mer_data(x_test) x_test_predictor = model_t.predict(x_test_five_mer[0:10000]) es = EarlyStopping(monitor='val_loss', mode='min', patience=10) predictor.fit(x_test_predictor, y_test[0:10000], epochs=args.predictor_epochs, validation_split=0.2, batch_size=args.predictor_batch_size, callbacks=[es]) # Evaluate x_val_five_mer, _ = extract_five_mer_data(x_val) x_val_predictor = model_t.predict(x_val_five_mer[0:10000])
print(vae.decoder.predict(vae.encoder.predict(x_train[0:5, :]))[:, 24:44]) predictor = vae.predictor encoder = vae.encoder plot_label_clusters(args.output_filename, encoder, x_train, y_train) # Train predictor predictor_size = int(len(x_train)/10) x_train_mean, x_train_sd, _ = encoder.predict(x_train[0:predictor_size]) x_train = np.concatenate((x_train_mean, x_train_sd), axis=1) predictor.fit(x_train, y_train[0:predictor_size], epochs=30, batch_size=128) # Test model x_test_mean, x_test_sd, _ = encoder.predict(x_test) x_test = np.concatenate((x_test_mean, x_test_sd), axis=1) pred_out = predictor.predict(x_test) accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized( pred_out, y_test) # Save model #save_vae_model_dna(encoder, predictor, min_values, max_values) # Print results print(f"\tAccuracy : {accuracy_val:.3f}") print(f"\tSensitivity : {sensitivity_val:.3f}") print(f"\tSpecificity : {specificity_val:.3f}") print(f"\tPrecision : {precision_val:.3f}") print(f"\tAUC : {au_roc_val:.3f}") print(f"{cm_val}") test_x_10, test_y_10 = load_multiple_reads_data(args) plot_label_clusters_10(args.output_filename, encoder, test_x_10, test_y_10)
vae.compile(optimizer=keras.optimizers.Adam()) vae.fit(x_train, epochs=30, batch_size=128) # Visualize cluster encoder = vae.encoder predictor = vae.predictor plot_label_clusters(args.output_filename, encoder, x_train, y_train) # Train predictor x_train_mean, x_train_sd, _ = encoder.predict(x_train[0:5000]) x_train = np.concatenate((x_train_mean, x_train_sd), axis=1) predictor.fit(x_train, y_train[0:5000], epochs=30, batch_size=128) # Test model x_test_mean, x_test_sd, _ = encoder.predict(x_test) x_test = np.concatenate((x_test_mean, x_test_sd), axis=1) pred_out = predictor.predict(x_test) accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized( pred_out, y_test) # Save model save_vae_model_rna(args, encoder, predictor) # Print results print(f"\tAccuracy : {accuracy_val:.3f}") print(f"\tSensitivity : {sensitivity_val:.3f}") print(f"\tSpecificity : {specificity_val:.3f}") print(f"\tPrecision : {precision_val:.3f}") print(f"\tAUC : {au_roc_val:.3f}") print(f"{cm_val}")
from utils.arguments import parse_args from utils.data import load_dna_data_gan from utils.evaluate import compute_metrics_standardized from utils.gan_model import load_deep_signal_supervised import numpy as np import tensorflow as tf args = parse_args() np.random.seed(args.seed) tf.compat.v1.set_random_seed(args.seed) x_train, x_test, y_test, x_val, y_val = load_dna_data_gan(args) model = load_deep_signal_supervised(args) model.fit(x_test, y_test, epochs=150, batch_size=512, validation_data=(x_val, y_val)) y_predicted = np.squeeze(model.predict_on_batch(x_val)) results = (compute_metrics_standardized(y_predicted, y_val)) print(results)
def train(args, generator, discriminator, GAN, x_train, x_test, y_test, x_val, y_val): # Adversarial Training epochs = args.epochs batch_size = args.batch_size v_freq = args.v_freq latent_dim = args.latent_dim d_loss, g_loss, best_cm = [], [], [] best_au_roc_val, best_accuracy, best_sensitivity, best_specificity, best_precision, best_au_roc = 0, 0, 0, 0, 0, 0 print('===== Start of Adversarial Training =====') for epoch in range(epochs): try: with trange(x_train.shape[0] // batch_size, ascii=True, desc='Epoch {}'.format(epoch + 1)) as t: for _ in t: # Train Discriminator loss_temp = [] set_trainability(discriminator, True) K.set_value(gamma, [1]) x, y = D_data(int(batch_size / 2), generator, 'normal', x_train, latent_dim) loss_temp.append(discriminator.train_on_batch(x, y)) set_trainability(discriminator, True) K.set_value(gamma, [args.gamma]) x, y = D_data(int(batch_size), generator, 'gen', x_train, latent_dim) loss_temp.append(discriminator.train_on_batch(x, y)) d_loss.append(sum(loss_temp) / len(loss_temp)) # Train Generator set_trainability(discriminator, False) x = noise_data(batch_size, latent_dim) y = np.ones(batch_size) y[:] = args.alpha g_loss.append(GAN.train_on_batch(x, y)) t.set_postfix(G_loss=g_loss[-1], D_loss=d_loss[-1]) except KeyboardInterrupt: # hit control-C to exit break if (epoch + 1) % v_freq == 0: # Check for the best validation results y_predicted = 1 - np.squeeze(discriminator.predict_on_batch(x_val)) x = noise_data(batch_size, latent_dim) print(generator.predict_on_batch(x)[0, 24:44]) accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized( y_predicted, y_val) if au_roc_val > best_au_roc_val: best_au_roc_val = au_roc_val # Save the best test results y_predicted = 1 - np.squeeze( discriminator.predict_on_batch(x_test)) best_accuracy, best_sensitivity, best_specificity, best_precision, best_au_roc, best_cm = compute_metrics_standardized( y_predicted, y_test) save_gan_model(args, discriminator) print(f"\tAccuracy : {accuracy_val:.3f}") print(f"\tSensitivity : {sensitivity_val:.3f}") print(f"\tSpecificity : {specificity_val:.3f}") print(f"\tPrecision : {precision_val:.3f}") print(f"\tAUC : {au_roc_val:.3f}") print(f"{cm_val}") print(f"\tGen. Loss: {g_loss[-1]:.3f}\n\tDisc. Loss: {d_loss[-1]:.3f}") print('===== End of Adversarial Training =====') print(f"\tBest accuracy : {best_accuracy:.3f}") print(f"\tBest sensitivity : {best_sensitivity:.3f}") print(f"\tBest specificity : {best_specificity:.3f}") print(f"\tBest precision : {best_precision:.3f}") print(f"\tBest AUC : {best_au_roc:.3f}") print(f"{best_cm}") results = (best_accuracy, best_sensitivity, best_specificity, best_precision, best_au_roc) return results