Пример #1
0
def run(use_deep_CNN, use_RNN, label, golden_results):
    seq_length = 100
    num_sequences = 200
    test_fraction = 0.2
    num_epochs = 1
    sequences = np.array([''.join(random.choice('ACGT') for base in range(seq_length)) for sequence in range(num_sequences)])
    labels = np.random.choice((True, False), size=num_sequences)[:, None]
    encoded_sequences = one_hot_encode(sequences)
    X_train, X_test, y_train, y_test = train_test_split(
        encoded_sequences, labels, test_size=test_fraction)
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    random_order = np.arange(len(X_train))
    np.random.shuffle(random_order)
    X_train = X_train[random_order]
    y_train = y_train[random_order]
    hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN,
                       'num_filters': (45,), 'pool_width': 25, 'conv_width': (10,),
                       'L1': 0, 'dropout': 0.2, 'num_epochs': num_epochs}
    if use_deep_CNN:
        hyperparameters.update({'num_filters': (45, 50, 50), 'conv_width': (10, 8, 5)})
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_test, y_test))
    results = model.test(X_test, y_test).results[0]
    assert np.allclose(tuple(results.values()), tuple(golden_results.values())), \
        '{}: result = {}, golden = {}'.format(label, results, golden_results)
Пример #2
0
def run(use_deep_CNN, use_RNN, label, golden_first_sequence, golden_results):
    seq_length = 100
    num_sequences = 200
    num_positives = 100
    num_negatives = num_sequences - num_positives
    GC_fraction = 0.4
    test_fraction = 0.2
    num_epochs = 1
    sequences, labels, embeddings = simulate_single_motif_detection(
        'SPI1_disc1', seq_length, num_positives, num_negatives, GC_fraction)
    assert sequences[0] == golden_first_sequence, 'first sequence = {}, golden = {}'.format(
        sequences[0], golden_first_sequence)
    encoded_sequences = one_hot_encode(sequences)
    X_train, X_test, y_train, y_test = train_test_split(
        encoded_sequences, labels, test_size=test_fraction)
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    random_order = np.arange(len(X_train))
    np.random.shuffle(random_order)
    X_train = X_train[random_order]
    y_train = y_train[random_order]
    hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN,
                       'num_filters': (45,), 'pool_width': 25, 'conv_width': (10,),
                       'L1': 0, 'dropout': 0.2, 'num_epochs': num_epochs}
    if use_deep_CNN:
        hyperparameters.update({'num_filters': (45, 50, 50), 'conv_width': (10, 8, 5)})
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_test, y_test))
    results = model.test(X_test, y_test).results[0]
    assert np.allclose(tuple(results.values()), tuple(golden_results.values())), \
        '{}: result = {}, golden = {}'.format(label, results, golden_results)
Пример #3
0
def run(use_deep_CNN, use_RNN, label, golden_results):
    import random
    np.random.seed(1)
    random.seed(1)
    from dragonn.models import SequenceDNN
    from simdna.simulations import simulate_single_motif_detection
    from dragonn.utils import one_hot_encode, reverse_complement
    from sklearn.cross_validation import train_test_split
    seq_length = 50
    num_sequences = 100
    num_positives = 50
    num_negatives = num_sequences - num_positives
    GC_fraction = 0.4
    test_fraction = 0.2
    validation_fraction = 0.2
    num_epochs = 1

    sequences, labels = simulate_single_motif_detection(
        'SPI1_disc1', seq_length, num_positives, num_negatives, GC_fraction)
    encoded_sequences = one_hot_encode(sequences)
    X_train, X_test, y_train, y_test = train_test_split(
        encoded_sequences, labels, test_size=test_fraction)
    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train, y_train, test_size=validation_fraction)
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    random_order = np.arange(len(X_train))
    np.random.shuffle(random_order)
    X_train = X_train[random_order]
    y_train = y_train[random_order]
    hyperparameters = {
        'seq_length': seq_length,
        'use_RNN': use_RNN,
        'num_filters': (45, ),
        'pool_width': 25,
        'conv_width': (10, ),
        'L1': 0,
        'dropout': 0.2,
        'num_epochs': num_epochs
    }
    if use_deep_CNN:
        hyperparameters.update({
            'num_filters': (45, 50, 50),
            'conv_width': (10, 8, 5)
        })
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    results = model.test(X_test, y_test).results[0]
    assert np.allclose(tuple(results.values()), tuple(golden_results.values())), \
        '{}: result = {}, golden = {}'.format(label, results, golden_results)
def train_test_dnn_vary_parameter(prefix,
                                  model_parameters,
                                  param_name,
                                  param_values,
                                  X_train=None,
                                  y_train=None,
                                  X_valid=None,
                                  y_valid=None,
                                  X_test=None,
                                  y_test=None):
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    dnn_results = []
    for param_value in param_values:
        model_parameters[param_name] = param_value
        ofname_infix = dict2string(model_parameters)
        ofname_prefix = "%s.%s" % (prefix, ofname_infix)
        model_fname = "%s.arch.json" % (ofname_prefix)
        weights_fname = "%s.weights.h5" % (ofname_prefix)
        try:
            logger.debug("Checking for model files {} and {}...".format(
                model_fname, weights_fname))
            dnn = SequenceDNN.load(model_fname, weights_fname)
            logger.debug("Model files found. Loaded model successfully!")
        except:
            logger.debug("Model files not found. Training model...")
            dnn = SequenceDNN(**model_parameters)
            logger.info("training with %s %s .." % (param_name, param_value))
            dnn.train(X_train, y_train, (X_valid, y_valid))
            dnn.save(ofname_prefix)
        dnn_results.append(dnn.test(X_test, y_test))

    return dnn_results
Пример #5
0
def main_train(pos_sequences=None,
               neg_sequences=None,
               prefix=None,
               model_file=None,
               weights_file=None):
    # encode fastas
    print("loading sequence data...")
    X_pos = encode_fasta_sequences(pos_sequences)
    y_pos = np.array([[True]]*len(X_pos))
    X_neg = encode_fasta_sequences(neg_sequences)
    y_neg = np.array([[False]]*len(X_neg))
    X = np.concatenate((X_pos, X_neg))
    y = np.concatenate((y_pos, y_neg))
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)
    if model_file is not None and weights_file is not None: # load  model
        print("loading model...")
        model = SequenceDNN.load(model_file, weights_file)
    else: # initialize model
        print("initializing model...")
        model = SequenceDNN(seq_length=X_train.shape[-1])
    # train
    print("starting model training...")
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    valid_result = model.test(X_valid, y_valid)
    print("final validation metrics:")
    print(valid_result)
    # save
    print("saving model files..")
    model.save("%s.model.json" % (prefix), "%s.weights.hd5" % (prefix))
    print("Done!")
Пример #6
0
def main_train(pos_sequences=None,
               neg_sequences=None,
               prefix=None,
               arch_file=None,
               weights_file=None,
               **kwargs):
    kwargs = {key: value for key, value in kwargs.items() if value is not None}
    # encode fastas
    print("loading sequence data...")
    X_pos = encode_fasta_sequences(pos_sequences)
    y_pos = np.array([[True]] * len(X_pos))
    X_neg = encode_fasta_sequences(neg_sequences)
    y_neg = np.array([[False]] * len(X_neg))
    X = np.concatenate((X_pos, X_neg))
    y = np.concatenate((y_pos, y_neg))
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)
    if arch_file is not None:  # load  model
        print("loading model...")
        model = SequenceDNN.load(arch_file, weights_file)
    else:  # initialize model
        print("initializing model...")
        model = SequenceDNN(seq_length=X_train.shape[-1], **kwargs)
    # train
    print("starting model training...")
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    valid_result = model.test(X_valid, y_valid)
    print("final validation metrics:")
    print(valid_result)
    # save
    print("saving model files..")
    model.save(prefix)
    print("Done!")
def train_test_dnn_vary_parameter(prefix,
                                  model_parameters,
                                  param_name,
                                  param_values,
                                  X_train=None, y_train=None,
                                  X_valid=None, y_valid=None,
                                  X_test=None, y_test=None):
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    dnn_results = []
    for param_value in param_values:
        model_parameters[param_name] = param_value
        ofname_infix = dict2string(model_parameters)
        ofname_prefix = "%s.%s" % (prefix, ofname_infix)
        model_fname = "%s.arch.json" % (ofname_prefix)
        weights_fname = "%s.weights.hd5" % (ofname_prefix)
        try:
            logger.debug("Checking for model files {} and {}...".format(model_fname, weights_fname))
            dnn = SequenceDNN.load(model_fname, weights_fname)
            logger.debug("Model files found. Loaded model successfully!")
        except:
            logger.debug("Model files not found. Training model...")
            dnn = SequenceDNN(**model_parameters)
            logger.info("training with %s %s .." % (param_name, param_value))
            dnn.train(X_train, y_train, (X_valid, y_valid))
            dnn.save(model_fname, weights_fname)
        dnn_results.append(dnn.test(X_test, y_test))
        
    return dnn_results
def train_test_dnn_vary_data_size(prefix,
                                  model_parameters=None,
                                  X_train=None,
                                  y_train=None,
                                  X_valid=None,
                                  y_valid=None,
                                  X_test=None,
                                  y_test=None,
                                  train_set_sizes=None):
    dnn_results = []
    for train_set_size in train_set_sizes:
        ofname_infix = dict2string(model_parameters)
        ofname_infix = "%s.train_set_size_%s" % (ofname_infix,
                                                 str(train_set_size))
        ofname_prefix = "%s.%s" % (prefix, ofname_infix)
        model_fname = "%s.arch.json" % (ofname_prefix)
        weights_fname = "%s.weights.h5" % (ofname_prefix)
        try:
            logger.debug("Checking for model files {} and {}...".format(
                model_fname, weights_fname))
            best_dnn = SequenceDNN.load(model_fname, weights_fname)
            logger.debug("Model files found. Loaded model successfully!")
        except:
            logger.debug("Model files not found. Training model...")
            # try 3 attempts, take best auROC, save that model
            X_train_subset = X_train[:train_set_size]
            X_train_subset = np.concatenate(
                (X_train_subset, reverse_complement(X_train_subset)))
            y_train_subset = np.concatenate(
                (y_train[:train_set_size], y_train[:train_set_size]))
            best_auROC = 0
            best_dnn = None
            for random_seed in [1, 2, 3]:
                np.random.seed(random_seed)
                random.seed(random_seed)
                dnn = SequenceDNN(**model_parameters)
                logger.info("training with %i examples.." % (train_set_size))
                dnn.train(X_train_subset, y_train_subset, (X_valid, y_valid))
                result = dnn.test(X_test, y_test)
                auROCs = [
                    result.results[i]["auROC"]
                    for i in range(y_valid.shape[-1])
                ]
                # get average auROC across tasks
                mean_auROC = sum(auROCs) / len(auROCs)
                if mean_auROC > best_auROC:
                    best_auROC = mean_auROC
                    dnn.save(ofname_prefix)
                    best_dnn = dnn
        dnn_results.append(best_dnn.test(X_test, y_test))
    # reset to original random seed
    np.random.seed(1)
    random.seed(1)
    return dnn_results
Пример #9
0
def run(use_deep_CNN, use_RNN, label, golden_results):
    seq_length = 100
    num_sequences = 200
    test_fraction = 0.2
    num_epochs = 1
    sequences = np.array([
        ''.join(random.choice('ACGT') for base in range(seq_length))
        for sequence in range(num_sequences)
    ])
    labels = np.random.choice((True, False), size=num_sequences)[:, None]
    encoded_sequences = one_hot_encode(sequences)
    X_train, X_test, y_train, y_test = train_test_split(
        encoded_sequences, labels, test_size=test_fraction)
    X_train = np.concatenate((X_train, reverse_complement(X_train)))
    y_train = np.concatenate((y_train, y_train))
    random_order = np.arange(len(X_train))
    np.random.shuffle(random_order)
    X_train = X_train[random_order]
    y_train = y_train[random_order]
    hyperparameters = {
        'seq_length': seq_length,
        'use_RNN': use_RNN,
        'num_filters': (45, ),
        'pool_width': 25,
        'conv_width': (10, ),
        'L1': 0,
        'dropout': 0.2,
        'num_epochs': num_epochs
    }
    if use_deep_CNN:
        hyperparameters.update({
            'num_filters': (45, 50, 50),
            'conv_width': (10, 8, 5)
        })
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_test, y_test))
    results = model.test(X_test, y_test).results[0]
    assert np.allclose(tuple(results.values()), tuple(golden_results.values())), \
        '{}: result = {}, golden = {}'.format(label, results, golden_results)
Пример #10
0
def main_train(pos_sequences=None,
               neg_sequences=None,
               prefix=None,
               arch_file=None,
               weights_file=None,
               **kwargs):
    kwargs = {key: value for key, value in kwargs.items() if value is not None}
    # encode fastas
    print("loading sequence data...")
    X_pos = encode_fasta_sequences(pos_sequences)
    y_pos = np.array([[True]]*len(X_pos))
    X_neg = encode_fasta_sequences(neg_sequences)
    y_neg = np.array([[False]]*len(X_neg))
    X = np.concatenate((X_pos, X_neg))
    y = np.concatenate((y_pos, y_neg))
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)
    if arch_file is not None: # load  model
        print("loading model...")
        model = SequenceDNN.load(arch_file, weights_file)
    else: # initialize model
        print("initializing model...")
        model = SequenceDNN(seq_length=X_train.shape[-1], **kwargs)
    # train
    print("starting model training...")
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    valid_result = model.test(X_valid, y_valid)
    print("final validation metrics:")
    print(valid_result)
    # save
    print("saving model files..")
    model.save(prefix)
    print("Done!")
Пример #11
0
def main_predict(sequences=None,
                 model_file=None,
                 weights_file=None,
                 output_file=None):
    # encode fasta
    print("loading sequence data...")
    X = encode_fasta_sequences(sequences)
    # load model
    print("loading model...")
    model = SequenceDNN.load(model_file, weights_file)
    # predict
    print("getting predictions...")
    predictions = model.predict(X)
    # save predictions
    print("saving predictions to output file...")
    np.savetxt(output_file, predictions)
    print("Done!")
Пример #12
0
def main_train(pos_sequences=None,
               neg_sequences=None,
               pos_validation_sequences=None,
               neg_validation_sequences=None,
               prefix=None,
               arch_file=None,
               weights_file=None,
               **kwargs):
    kwargs = {key: value for key, value in kwargs.items() if value is not None}
    # encode fastas
    print("loading sequence data...")
    X_pos = encode_fasta_sequences(pos_sequences)
    y_pos = np.array([[True]] * len(X_pos))
    X_neg = encode_fasta_sequences(neg_sequences)
    y_neg = np.array([[False]] * len(X_neg))
    X = np.concatenate((X_pos, X_neg))
    y = np.concatenate((y_pos, y_neg))
    #if a validation set is provided by the user, encode that as well
    if (pos_validation_sequences != None or neg_validation_sequences != None):
        #both positive and negative validation sequences must be provided.
        assert neg_validation_sequences != None
        assert pos_validation_sequences != None
        X_valid_pos = encode_fasta_sequences(pos_validation_sequences)
        X_valid_neg = encode_fasta_sequences(neg_validation_sequences)
        y_valid_pos = np.array([[True]]) * len(X_valid_pos)
        y_valid_neg = np.array([[False]]) * len(X_valid_neg)
        X_valid = np.concatenate((X_valid_pos, X_valid_neg))
        y_valid = np.concatenate((y_valid_pos, y_valid_neg))
    else:
        X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                              y,
                                                              test_size=0.2)
    if arch_file is not None:  # load  model
        print("loading model...")
        model = SequenceDNN.load(model_hdf5_file, arch_file, weights_file)
    else:  # initialize model
        print("initializing model...")
        model = SequenceDNN(seq_length=X_train.shape[-1], **kwargs)
    # train
    print("starting model training...")
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    valid_result = model.test(X_valid, y_valid)
    print("final validation metrics:")
    print(valid_result)
    # save
    print("saving model files..")
    model.save(prefix)
    print("Done!")
Пример #13
0
def main_test(pos_sequences=None,
              neg_sequences=None,
              model_file=None,
              weights_file=None):
    # encode fastas
    print("loading sequence data...")
    X_test_pos = encode_fasta_sequences(pos_sequences)
    y_test_pos = np.array([[True]]*len(X_test_pos))
    X_test_neg = encode_fasta_sequences(neg_sequences)
    y_test_neg = np.array([[False]]*len(X_test_neg))
    X_test = np.concatenate((X_test_pos, X_test_neg))
    y_test = np.concatenate((y_test_pos, y_test_neg))
    # load model
    print("loading model...")
    model = SequenceDNN.load(model_file, weights_file)
    # test
    print("testing model...")
    test_result = model.test(X_test, y_test)
    print(test_result)
Пример #14
0
def main(args):
    '''
    args - parsed arguments that include pos_sequences, neg_sequences,
    arch_file, and weights_file
    '''
    # encode fasta
    print('Loading sequence data...')
    pos_seq = encode_fasta_sequences(args.pos_sequences)
    print('{} positive test sequences'.format(len(pos_seq)))
    neg_seq = encode_fasta_sequences(args.neg_sequences)
    print('{} negative test sequences\n'.format(len(neg_seq)))

    # load model
    prefix = args.arch_file.replace('.arch.json', '')
    print('Loading {} model...'.format(prefix))
    model = SequenceDNN.load(args.arch_file, args.weights_file)

    # predict binding probability on test sequences
    print('Getting predictions...')
    pos_predictions = model.predict(pos_seq)
    for index, pred in enumerate(pos_predictions):
        print('positive_test_{}\tP(bound)={}'.format(index, pred[0]))
    print('')
    neg_predictions = model.predict(neg_seq)
    for index, pred in enumerate(neg_predictions):
        print('negative_test_{}\tP(bound)={}'.format(index, pred[0]))
    print('')

    # visualize trained model and motifs
    print('Plotting deeplift scores on positive sequences...')
    model.plot_deeplift(pos_seq, '{}_deeplift_positive'.format(prefix))

    print('Plotting true motifs...')
    motif_names = ['IRF_known1', 'NFKB_known1']
    for index, motif in enumerate(motif_names):
        fig = plot_motif(motif, figsize=(10, 4), ylab=motif)
        fig.savefig('motif{}.png'.format(index + 1), bbox_inches='tight')

    print('Plotting architecture...')
    model.plot_architecture('{}_architecture.png'.format(prefix))

    print('Plotting convolutional filters...')
    plot_sequence_filters(model, prefix)
def train_test_dnn_vary_data_size(prefix, model_parameters=None,
                                  X_train=None, y_train=None,
                                  X_valid=None, y_valid=None,
                                  X_test=None, y_test=None,
                                  train_set_sizes=None):
    dnn_results = []
    for train_set_size in train_set_sizes:
        ofname_infix = dict2string(model_parameters)
        ofname_infix = "%s.train_set_size_%s" % (ofname_infix, str(train_set_size))
        ofname_prefix = "%s.%s" % (prefix, ofname_infix)
        model_fname = "%s.arch.json" % (ofname_prefix)
        weights_fname = "%s.weights.hd5" % (ofname_prefix)
        try:
            logger.debug("Checking for model files {} and {}...".format(model_fname, weights_fname))
            best_dnn = SequenceDNN.load(model_fname, weights_fname)
            logger.debug("Model files found. Loaded model successfully!")
        except:
            logger.debug("Model files not found. Training model...")
            # try 3 attempts, take best auROC, save that model
            X_train_subset = X_train[:train_set_size]
            X_train_subset = np.concatenate((X_train_subset, reverse_complement(X_train_subset)))
            y_train_subset = np.concatenate((y_train[:train_set_size], y_train[:train_set_size]))
            best_auROC = 0
            best_dnn = None
            for random_seed in [1, 2, 3]:
                np.random.seed(random_seed)
                random.seed(random_seed)
                dnn = SequenceDNN(**model_parameters)
                logger.info("training with %i examples.." % (train_set_size))
                dnn.train(X_train_subset, y_train_subset, (X_valid, y_valid))
                result = dnn.test(X_test, y_test)
                auROCs = [result.results[i]["auROC"] for i in range(y_valid.shape[-1])]
                # get average auROC across tasks
                mean_auROC = sum(auROCs) / len(auROCs)
                if mean_auROC > best_auROC:
                    best_auROC = mean_auROC
                    dnn.save(model_fname, weights_fname)
                    best_dnn = dnn
        dnn_results.append(best_dnn.test(X_test, y_test))
    # reset to original random seed
    np.random.seed(1)
    random.seed(1)
    return dnn_results
Пример #16
0
def get_SequenceDNN(SequenceDNN_parameters):
    return SequenceDNN(**SequenceDNN_parameters)
Пример #17
0
random_order = np.arange(len(X_train))
np.random.shuffle(random_order)
X_train = X_train[random_order]
y_train = y_train[random_order]

# Build and train model

if not do_hyperparameter_search:
    hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN,
                       'num_filters': (45,), 'pool_width': 25, 'conv_width': (10,),
                       'L1': 0, 'dropout': 0.2, 'num_epochs': num_epochs}
    if use_deep_CNN:
        hyperparameters.update({'num_filters': (45, 50, 50), 'conv_width': (10, 8, 5)})
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))

else:
    print('Starting hyperparameter search...')
    from dragonn.hyperparameter_search import HyperparameterSearcher
    fixed_hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN, 'num_epochs': num_epochs}
    grid = {'num_filters': ((5, 100),), 'pool_width': (5, 40),
            'conv_width': ((6, 20),), 'dropout': (0, 0.5)}
    if use_deep_CNN:
        grid.update({'num_filters': ((5, 100), (5, 100), (5, 100)),
                     'conv_width': ((6, 20), (6, 20), (6, 20))})
    if use_RNN:
        grid.update({'GRU_size': (10, 50), 'TDD_size': (20, 60)})

    # Backend is RandomSearch; if using Python 2, can also specify MOESearch
Пример #18
0
random_order = np.arange(len(X_train))
np.random.shuffle(random_order)
X_train = X_train[random_order]
y_train = y_train[random_order]

# Build and train model

if not do_hyperparameter_search:
    hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN,
                       'num_filters': (45,), 'pool_width': 25, 'conv_width': (10,),
                       'L1': 0, 'dropout': 0.2, 'num_epochs': num_epochs}
    if use_deep_CNN:
        hyperparameters.update({'num_filters': (45, 50, 50), 'conv_width': (10, 8, 5)})
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_valid, y_valid),
                save_best_model_to_prefix='best_model')

else:
    print('Starting hyperparameter search...')
    from dragonn.hyperparameter_search import HyperparameterSearcher, RandomSearch
    fixed_hyperparameters = {'seq_length': seq_length, 'use_RNN': use_RNN, 'num_epochs': num_epochs}
    grid = {'num_filters': ((5, 100),), 'pool_width': (5, 40),
            'conv_width': ((6, 20),), 'dropout': (0, 0.5)}
    if use_deep_CNN:
        grid.update({'num_filters': ((5, 100), (5, 100), (5, 100)),
                     'conv_width': ((6, 20), (6, 20), (6, 20))})
    if use_RNN:
        grid.update({'GRU_size': (10, 50), 'TDD_size': (20, 60)})
Пример #19
0
random_order = np.arange(len(X_train))
np.random.shuffle(random_order)
X_train = X_train[random_order]
y_train = y_train[random_order]

# Build model, train and test

if not do_hyperparameter_search:
    hyperparameters = {'seq_length': seq_length, 'use_deep_CNN': use_deep_CNN, 'use_RNN': use_RNN,
                       'num_filters': 45, 'pool_width': 25, 'conv_width': 10, 'L1': 0, 'dropout': 0.2}
    if use_deep_CNN:
        hyperparameters.update({'num_filters_2': 50, 'conv_width_2': 8,
                                'num_filters_3': 50, 'conv_width_3': 5})
    if use_RNN:
        hyperparameters.update({'GRU_size': 35, 'TDD_size': 45})
    model = SequenceDNN(**hyperparameters)
    model.train(X_train, y_train, validation_data=(X_valid, y_valid))
    print('Test results: {}'.format(model.test(X_test, y_test)))

else:
    print('Starting hyperparameter search...')
    from hyperparameter_search import HyperparameterSearcher
    fixed_hyperparameters = {'seq_length': seq_length, 'use_deep_CNN': use_deep_CNN, 'use_RNN': use_RNN}
    grid = {'num_filters': (5, 100), 'pool_width': (5, 40), 'conv_width': (6, 20), 'dropout': (0, 0.5)}
    if use_deep_CNN:
        grid.update({'num_filters_2': (5, 100), 'conv_width_2': (6, 20),
                     'num_filters_3': (5, 100), 'conv_width_3': (6, 20),
        })
    if use_RNN:
        grid.update({'GRU_size': (10, 50), 'TDD_size': (20, 60)})