예제 #1
0
def objective(params):
    print(params)
    x_train_scaled = np.concatenate((x_train[:, 0:68]*params['kmer_scale'], x_train[:, 68:85]*params['mean_scale'], x_train[:, 85:102]*params['std_scale'], x_train[:, 102:119]*params['len_scale'], x_train[:, 119:]*params['signal_scale']), axis=1)
    x_test_scaled = np.concatenate((x_test[:, 0:68]*params['kmer_scale'], x_test[:, 68:85]*params['mean_scale'], x_test[:, 85:102]*params['std_scale'], x_test[:, 102:119]*params['len_scale'], x_test[:, 119:]*params['signal_scale']), axis=1)
    encoder, decoder, vae = load_vae_dna_model_deepsignal(int(params['latent_dim']), params['rc_scale'], params['vae_lr'], params['kmer_loss_scale'])
    try:
        supervised_size = 10000
        es = EarlyStopping(monitor='val_loss', mode='min', patience=20)
        vae.fit(x_train_scaled[0:int(len(x_train) * 0.8)], validation_data=(x_train_scaled[int(len(x_train) * 0.8):], None), epochs=args.vae_epochs, batch_size=args.vae_batch_size, verbose=0, callbacks=[es])

        predictor = load_vae_predictor(int(params['latent_dim']))
        # Prepared predictor training input
        x_train_predictor = encoder.predict(x_train_scaled[0:supervised_size])

        es = EarlyStopping(monitor='val_loss', mode='min', patience=10)
        predictor.fit(x_train_predictor, y_train[0:supervised_size], epochs=args.predictor_epochs, validation_split=0.2,
                      batch_size=args.predictor_batch_size, callbacks=[es], verbose=0)

        # Test model
        x_test_predictor = encoder.predict(x_test_scaled)
        predictions = predictor.predict(x_test_predictor)
        test_results = compute_metrics_standardized(predictions, y_test)
        if test_results[0] < 0.51:
            encoding_cluster_plt = plot_label_clusters(encoder, x_train, y_train)
            save_results(args.output_filename, test_results, encoding_cluster_plt, encoder, predictor)
        print_results(test_results)
        return test_results[0] * -1
    except:
        return 0
예제 #2
0
    for idx, row in enumerate(x_val_five_mer):
        label = val_five_mer_label[idx]
        model_idx = model_names.index(label)
        encoding = models[model_idx].predict(np.expand_dims(row, axis=0))
        x_val_encoding.append(encoding[0])
    x_val_encoding = np.array(x_val_encoding)
    x_val_encoding_diff = np.zeros(x_val_encoding.shape)
    for i, five_mer in enumerate(y_val_five_mer):
        five_mer_idx = np.where(five_mer==1)[0][0]
        x_val_encoding_diff[i] = (non_mod_encoding_map[five_mer_idx] - x_val_encoding[i]) ** 2
        diff = np.mean((non_mod_encoding_map[five_mer_idx] - x_val_encoding[i]) ** 2)
        if diff > threshold[five_mer_idx]:
            predictions.append(1)
        else:
            predictions.append(0)
    test_results = compute_metrics_standardized(predictions, y_val)
    print_results(test_results)

    kmeans = KMeans(n_clusters=2, random_state=0).fit(x_val_encoding_diff)
    predictions = kmeans.labels_
    print(accuracy_score(y_val, predictions))
    print(accuracy_score(y_val, 1-predictions))
    '''
    predictor = load_vae_predictor(32)
    x_test_five_mer, _ = extract_five_mer_data(x_test)
    x_test_predictor = model_t.predict(x_test_five_mer[0:10000])
    es = EarlyStopping(monitor='val_loss', mode='min', patience=10)
    predictor.fit(x_test_predictor, y_test[0:10000], epochs=args.predictor_epochs, validation_split=0.2, batch_size=args.predictor_batch_size, callbacks=[es])
    # Evaluate
    x_val_five_mer, _ = extract_five_mer_data(x_val)
    x_val_predictor = model_t.predict(x_val_five_mer[0:10000])
예제 #3
0
print(vae.decoder.predict(vae.encoder.predict(x_train[0:5, :]))[:, 24:44])
predictor = vae.predictor
encoder = vae.encoder
plot_label_clusters(args.output_filename, encoder, x_train, y_train)

# Train predictor
predictor_size = int(len(x_train)/10)
x_train_mean, x_train_sd, _ = encoder.predict(x_train[0:predictor_size])
x_train = np.concatenate((x_train_mean, x_train_sd), axis=1)
predictor.fit(x_train, y_train[0:predictor_size], epochs=30, batch_size=128)

# Test model
x_test_mean, x_test_sd, _ = encoder.predict(x_test)
x_test = np.concatenate((x_test_mean, x_test_sd), axis=1)
pred_out = predictor.predict(x_test)
accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized(
    pred_out, y_test)

# Save model
#save_vae_model_dna(encoder, predictor, min_values, max_values)

# Print results
print(f"\tAccuracy    : {accuracy_val:.3f}")
print(f"\tSensitivity : {sensitivity_val:.3f}")
print(f"\tSpecificity : {specificity_val:.3f}")
print(f"\tPrecision   : {precision_val:.3f}")
print(f"\tAUC         : {au_roc_val:.3f}")
print(f"{cm_val}")

test_x_10, test_y_10 = load_multiple_reads_data(args)
plot_label_clusters_10(args.output_filename, encoder, test_x_10, test_y_10)
예제 #4
0
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(x_train, epochs=30, batch_size=128)

# Visualize cluster
encoder = vae.encoder
predictor = vae.predictor
plot_label_clusters(args.output_filename, encoder, x_train, y_train)

# Train predictor
x_train_mean, x_train_sd, _ = encoder.predict(x_train[0:5000])
x_train = np.concatenate((x_train_mean, x_train_sd), axis=1)
predictor.fit(x_train, y_train[0:5000], epochs=30, batch_size=128)

# Test model
x_test_mean, x_test_sd, _ = encoder.predict(x_test)
x_test = np.concatenate((x_test_mean, x_test_sd), axis=1)
pred_out = predictor.predict(x_test)
accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized(
    pred_out, y_test)

# Save model
save_vae_model_rna(args, encoder, predictor)

# Print results
print(f"\tAccuracy    : {accuracy_val:.3f}")
print(f"\tSensitivity : {sensitivity_val:.3f}")
print(f"\tSpecificity : {specificity_val:.3f}")
print(f"\tPrecision   : {precision_val:.3f}")
print(f"\tAUC         : {au_roc_val:.3f}")
print(f"{cm_val}")
예제 #5
0
from utils.arguments import parse_args
from utils.data import load_dna_data_gan
from utils.evaluate import compute_metrics_standardized
from utils.gan_model import load_deep_signal_supervised

import numpy as np
import tensorflow as tf

args = parse_args()
np.random.seed(args.seed)
tf.compat.v1.set_random_seed(args.seed)

x_train, x_test, y_test, x_val, y_val = load_dna_data_gan(args)
model = load_deep_signal_supervised(args)

model.fit(x_test,
          y_test,
          epochs=150,
          batch_size=512,
          validation_data=(x_val, y_val))
y_predicted = np.squeeze(model.predict_on_batch(x_val))
results = (compute_metrics_standardized(y_predicted, y_val))

print(results)
예제 #6
0
def train(args, generator, discriminator, GAN, x_train, x_test, y_test, x_val,
          y_val):
    # Adversarial Training
    epochs = args.epochs
    batch_size = args.batch_size
    v_freq = args.v_freq
    latent_dim = args.latent_dim

    d_loss, g_loss, best_cm = [], [], []
    best_au_roc_val, best_accuracy, best_sensitivity, best_specificity, best_precision, best_au_roc = 0, 0, 0, 0, 0, 0

    print('===== Start of Adversarial Training =====')
    for epoch in range(epochs):
        try:
            with trange(x_train.shape[0] // batch_size,
                        ascii=True,
                        desc='Epoch {}'.format(epoch + 1)) as t:
                for _ in t:
                    # Train Discriminator
                    loss_temp = []
                    set_trainability(discriminator, True)
                    K.set_value(gamma, [1])
                    x, y = D_data(int(batch_size / 2), generator, 'normal',
                                  x_train, latent_dim)
                    loss_temp.append(discriminator.train_on_batch(x, y))
                    set_trainability(discriminator, True)
                    K.set_value(gamma, [args.gamma])
                    x, y = D_data(int(batch_size), generator, 'gen', x_train,
                                  latent_dim)
                    loss_temp.append(discriminator.train_on_batch(x, y))
                    d_loss.append(sum(loss_temp) / len(loss_temp))

                    # Train Generator
                    set_trainability(discriminator, False)
                    x = noise_data(batch_size, latent_dim)
                    y = np.ones(batch_size)
                    y[:] = args.alpha
                    g_loss.append(GAN.train_on_batch(x, y))
                    t.set_postfix(G_loss=g_loss[-1], D_loss=d_loss[-1])
        except KeyboardInterrupt:
            # hit control-C to exit
            break

        if (epoch + 1) % v_freq == 0:
            # Check for the best validation results
            y_predicted = 1 - np.squeeze(discriminator.predict_on_batch(x_val))
            x = noise_data(batch_size, latent_dim)
            print(generator.predict_on_batch(x)[0, 24:44])
            accuracy_val, sensitivity_val, specificity_val, precision_val, au_roc_val, cm_val = compute_metrics_standardized(
                y_predicted, y_val)

            if au_roc_val > best_au_roc_val:
                best_au_roc_val = au_roc_val
                # Save the best test results
                y_predicted = 1 - np.squeeze(
                    discriminator.predict_on_batch(x_test))
                best_accuracy, best_sensitivity, best_specificity, best_precision, best_au_roc, best_cm = compute_metrics_standardized(
                    y_predicted, y_test)
                save_gan_model(args, discriminator)

            print(f"\tAccuracy    : {accuracy_val:.3f}")
            print(f"\tSensitivity : {sensitivity_val:.3f}")
            print(f"\tSpecificity : {specificity_val:.3f}")
            print(f"\tPrecision   : {precision_val:.3f}")
            print(f"\tAUC         : {au_roc_val:.3f}")
            print(f"{cm_val}")
        print(f"\tGen. Loss: {g_loss[-1]:.3f}\n\tDisc. Loss: {d_loss[-1]:.3f}")

    print('===== End of Adversarial Training =====')
    print(f"\tBest accuracy    : {best_accuracy:.3f}")
    print(f"\tBest sensitivity : {best_sensitivity:.3f}")
    print(f"\tBest specificity : {best_specificity:.3f}")
    print(f"\tBest precision   : {best_precision:.3f}")
    print(f"\tBest AUC         : {best_au_roc:.3f}")
    print(f"{best_cm}")
    results = (best_accuracy, best_sensitivity, best_specificity,
               best_precision, best_au_roc)
    return results