示例#1
0
def test_output_export_clustermap(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y',
                    numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    @inputlayer
    @outputdense('sigmoid')
    def _model(inputs, inp, oup, params):
        with inputs.use('x') as layer:
            outputs = Dense(3, name='hidden')(layer)
        return inputs, outputs

    bwm = Janggu.create(_model, inputs=inputs, outputs=outputs, name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    dummy_eval = Scorer('cluster', exporter=ExportClustermap())

    bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval])

    dummy_eval = Scorer('cluster',
                        exporter=ExportClustermap(
                            fform='eps', annot={'annot': [1] * 50 + [0] * 50}))
    bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval])

    # check if plot was produced
    assert os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden',
                     "cluster.png"))
    assert os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden',
                     "cluster.eps"))
示例#2
0
SAMPLE_2 = os.path.join(DATA_PATH, 'sample2_test.fa')

DNA_TEST = Bioseq.create_from_seq('dna',
                                  fastafile=[SAMPLE_1, SAMPLE_2],
                                  order=args.order,
                                  cache=True)

Y = np.asarray([[1] for _ in range(nseqs(SAMPLE_1))] +
               [[0] for _ in range(nseqs(SAMPLE_2))])
LABELS_TEST = Array('y', Y, conditions=['TF-binding'])
annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap(
    lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list')

# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap',
                      exporter=ExportClustermap(annot=annot_test, z_score=1.))

# output the predictions as tables or json files
pred_tsv = Scorer('pred',
                  exporter=ExportTsv(annot=annot_test,
                                     row_names=DNA_TEST.gindexer.chrs))

# do the evaluation on the independent test data
# after the evaluation and prediction has been performed,
# the callbacks further process the results allowing
# to automatically generate summary statistics or figures
# into the JANGGU_OUTPUT directory.
model.evaluate(DNA_TEST,
               LABELS_TEST,
               datatags=['test'],
               callbacks=['auc', 'auprc', 'roc', 'auroc'])
示例#3
0
                      inputs=DNA,
                      outputs=ReduceDim(LABELS))

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['acc'])

hist = model.fit(DNA, ReduceDim(LABELS), epochs=100, shuffle=False)

print('#' * 40)
print('loss: {}, acc: {}'.format(hist.history['loss'][-1],
                                 hist.history['acc'][-1]))
print('#' * 40)

# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.))
tsne_eval = Scorer('tsne', exporter=ExportTsne())

# do the evaluation on the independent test data
model.evaluate(DNA_TEST,
               ReduceDim(LABELS_TEST),
               datatags=['test'],
               callbacks=['auc', 'auprc', 'roc', 'prc'])

pred = model.predict(DNA_TEST)
pred = pred[:, None, None, :]
cov_pred = Cover.create_from_array('BindingProba', pred, LABELS_TEST.gindexer)

print('Oct4 predictions scores should be greater than Mafk scores:')
print('Prediction score examples for Oct4')
for i in range(4):