示例#1
0
def test_janggu_influence_genomic(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    """Test Janggu creation by shape and name. """
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    csvfile = os.path.join(data_path, 'sample.csv')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       binsize=50,
                                       roi=bed_file,
                                       order=1)

    df = pd.read_csv(csvfile, header=None)
    ctcf = Array('ctcf', df.values, conditions=['peaks'])

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['dna']
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    model = Janggu.create(_cnn_model,
                          modelparams=(2, ),
                          inputs=dna,
                          outputs=ctcf,
                          name='dna_ctcf_HepG2-cnn')

    model.compile(optimizer='adadelta', loss='binary_crossentropy')

    # check with some nice offset
    iv = dna.gindexer[0]
    chrom, start, end = iv.chrom, iv.start, iv.end
    influence = input_attribution(model,
                                  dna,
                                  chrom=chrom,
                                  start=start,
                                  end=end)

    # check with an odd offset

    #    chrom, start, end =
    influence2 = input_attribution(model,
                                   dna,
                                   chrom=chrom,
                                   start=start - 1,
                                   end=end + 1)
    np.testing.assert_equal(influence[0][:], influence2[0][:][:, 1:-1])
示例#2
0
def test_janggu_influence_fasta(tmpdir):

    data_path = pkg_resources.resource_filename('janggu', 'resources/')

    order = 1
    filename = os.path.join(data_path, 'sample.fa')

    data = Bioseq.create_from_seq('dna',
                                  fastafile=filename,
                                  order=order,
                                  cache=False)

    dna = data

    @inputlayer
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['dna']
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        output = Dense(1, activation='sigmoid')(output)
        return inputs, output

    model = Janggu.create(_cnn_model,
                          modelparams=(2, ),
                          inputs=data,
                          name='dna_ctcf_HepG2-cnn')

    #model.compile(optimizer='adadelta', loss='binary_crossentropy')

    # check with some nice offset
    iv = dna.gindexer[0]
    chrom, start, end = iv.chrom, iv.start, iv.end
    influence = input_attribution(model,
                                  dna,
                                  chrom=chrom,
                                  start=start,
                                  end=end)

    influence2 = input_attribution(model, dna, idx=0)
    np.testing.assert_equal(influence[0][:], influence2[0][:])
示例#3
0
print('Oct4 predictions scores should be greater than Mafk scores:')
print('Prediction score examples for Oct4')
for i in range(4):
    print('{}.: {}'.format(i, cov_pred[i]))
print('Prediction score examples for Mafk')
for i in range(1, 5):
    print('{}.: {}'.format(i, cov_pred[-i]))

# Extract the 4th interval to perform input feature importance attribution
# which represents an Oct4 bound region
gi = DNA.gindexer[3]
chrom = gi.chrom
start = gi.start
end = gi.end
attr_oct = input_attribution(model, DNA, chrom=chrom, start=start, end=end)

# visualize the important sequence features
plotGenomeTrack(SeqTrack(attr_oct[0]), chrom, start, end).savefig(
    os.path.join(args.path,
                 'influence_oct4_example_order{}.png'.format(args.order)))

# For the comparison, extract an interval
# representing a Mafk bound region and visualize the
# important features.
gi = DNA.gindexer[7796]
chrom = gi.chrom
start = gi.start
end = gi.end
attr_mafk = input_attribution(model, DNA, chrom=chrom, start=start, end=end)