Exemplo n.º 1
0
def test_janggu_instance_dense(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    """Test Janggu creation by shape and name. """
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    csvfile = os.path.join(data_path, 'sample.csv')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       roi=bed_file,
                                       order=1)

    df = pd.read_csv(csvfile, header=None)
    ctcf = Array('ctcf', df.values, conditions=['peaks'])

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['.']
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # due to No input name . defined
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs[list()]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # due to Wrong type for indexing
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs()[0]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # name with must be string
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name=12342134)

    # test with given model name
    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')
    # test with auto. generated modelname.
    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs[0]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['dna']
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')
    kbwm2 = model_from_json(bwm.kerasmodel.to_json())
    kbwm3 = model_from_yaml(bwm.kerasmodel.to_yaml())

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)

    bwm.save()
    bwm.summary()

    assert os.path.exists(storage)

    Janggu.create_by_name('dna_ctcf_HepG2-cnn')
Exemplo n.º 2
0
def objective(params):
    print(params)
    try:
        train_data = get_data(params)
        train_data, test = split_train_test(train_data, [test_chrom])
        train, val = split_train_test(train_data, [params['val_chrom']])
        # define a keras model only based on DNA
        K.clear_session()
        if params['inputs'] == 'epi_dna':
            dnam = Janggu.create_by_name('cage_promoters_dna_only')
            epim = Janggu.create_by_name('cage_promoters_epi_only')
            layer = Concatenate()([
                dnam.kerasmodel.layers[-2].output,
                epim.kerasmodel.layers[-2].output
            ])
            layer = Dense(1, name='geneexpr')(layer)
            model = Janggu([dnam.kerasmodel.input] + epim.kerasmodel.input,
                           layer,
                           name='cage_promoters_epi_dna')

            if not params['pretrained']:
                # This part randomly reinitializes the network
                # so that we can train it from scratch
                newjointmodel = model_from_json(model.kerasmodel.to_json())

                newjointmodel = Janggu(
                    newjointmodel.inputs,
                    newjointmodel.outputs,
                    name='cage_promoters_epi_dna_randominit')
                model = newjointmodel
        else:
            model = Janggu.create(get_model,
                                  params,
                                  train_data[0],
                                  train_data[1],
                                  name='cage_promoters_{}'.format(
                                      params['inputs']))
    except ValueError:
        main_logger.exception('objective:')
        return {'status': 'fail'}
    model.compile(optimizer=get_opt(params['opt']),
                  loss='mae',
                  metrics=['mse'])
    hist = model.fit(
        train_data[0],
        train_data[1],
        epochs=params['epochs'],
        batch_size=64,
        validation_data=[params['val_chrom']],
        callbacks=[EarlyStopping(patience=5, restore_best_weights=True)])
    print('#' * 40)
    for key in hist.history:
        print('{}: {}'.format(key, hist.history[key][-1]))
    print('#' * 40)
    pred_train = model.predict(train[0])
    pred_val = model.predict(val[0])
    pred_test = model.predict(test[0])
    model.evaluate(train[0],
                   train[1],
                   callbacks=['var_explained', 'mse', 'mae', 'cor'],
                   datatags=['train'])
    mae_val = model.evaluate(val[0],
                             val[1],
                             callbacks=['var_explained', 'mse', 'mae', 'cor'],
                             datatags=['val'])
    mae_val = mae_val[0]
    model.evaluate(test[0],
                   test[1],
                   callbacks=['var_explained', 'mse', 'mae', 'cor'],
                   datatags=['test'])

    cor_train = np.corrcoef(train[1][:][:, 0], pred_train[:, 0])[0, 1]
    cor_val = np.corrcoef(val[1][:][:, 0], pred_val[:, 0])[0, 1]
    cor_test = np.corrcoef(test[1][:][:, 0], pred_test[:, 0])[0, 1]

    model.summary()
    main_logger.info('cor [train/val/test]: {:.2f}/{:.2f}/{:.2f}'.format(
        cor_train, cor_val, cor_test))
    return {
        'loss': mae_val,
        'status': 'ok',
        'all_losses': hist.history,
        'cor_train': cor_train,
        'cor_val': cor_val,
        'cor_test': cor_test,
        'model_config': model.kerasmodel.to_json(),
        'model_weights': model.kerasmodel.get_weights(),
        'concrete_params': params
    }
Exemplo n.º 3
0
    hidden_dna = dnamodel.kerasmodel.layers[-2].output
    hidden_dnase = dnasemodel.kerasmodel.layers[-2].output

    joint_hidden = Concatenate(name='concat')([hidden_dna, hidden_dnase])
    output = Dense(1, activation='sigmoid', name='peaks')(joint_hidden)

    # fit the model with preinitialized weights
    jointmodel = Janggu(dnamodel.kerasmodel.inputs +
                        dnasemodel.kerasmodel.inputs,
                        output,
                        name='pretrained_dnase_dna_joint_model_{}_{}'.format(
                            dnasename, dnaname))

    # reload the same model architecture, but this will
    # randomly reinitialized the weights
    newjointmodel = model_from_json(jointmodel.kerasmodel.to_json())

    newjointmodel = Janggu(
        newjointmodel.inputs,
        newjointmodel.outputs,
        name='randominit_dnase_dna_joint_model_{}_{}'.format(
            dnasename, dnaname))
    newjointmodel.compile(optimizer=get_opt('amsgrad'),
                          loss='binary_crossentropy',
                          metrics=['acc'])

    hist = newjointmodel.fit(
        train_data[0],
        train_data[1],
        epochs=shared_space['epochs'],
        batch_size=64,