def test_output_export_clustermap(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath inputs = Array("x", numpy.random.random((100, 10))) outputs = Array('y', numpy.random.randint(2, size=(100, 1)), conditions=['random']) @inputlayer @outputdense('sigmoid') def _model(inputs, inp, oup, params): with inputs.use('x') as layer: outputs = Dense(3, name='hidden')(layer) return inputs, outputs bwm = Janggu.create(_model, inputs=inputs, outputs=outputs, name='nptest') bwm.compile(optimizer='adadelta', loss='binary_crossentropy') dummy_eval = Scorer('cluster', exporter=ExportClustermap()) bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval]) dummy_eval = Scorer('cluster', exporter=ExportClustermap( fform='eps', annot={'annot': [1] * 50 + [0] * 50})) bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval]) # check if plot was produced assert os.path.exists( os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden', "cluster.png")) assert os.path.exists( os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden', "cluster.eps"))
SAMPLE_2 = os.path.join(DATA_PATH, 'sample2_test.fa') DNA_TEST = Bioseq.create_from_seq('dna', fastafile=[SAMPLE_1, SAMPLE_2], order=args.order, cache=True) Y = np.asarray([[1] for _ in range(nseqs(SAMPLE_1))] + [[0] for _ in range(nseqs(SAMPLE_2))]) LABELS_TEST = Array('y', Y, conditions=['TF-binding']) annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap( lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list') # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(annot=annot_test, z_score=1.)) # output the predictions as tables or json files pred_tsv = Scorer('pred', exporter=ExportTsv(annot=annot_test, row_names=DNA_TEST.gindexer.chrs)) # do the evaluation on the independent test data # after the evaluation and prediction has been performed, # the callbacks further process the results allowing # to automatically generate summary statistics or figures # into the JANGGU_OUTPUT directory. model.evaluate(DNA_TEST, LABELS_TEST, datatags=['test'], callbacks=['auc', 'auprc', 'roc', 'auroc'])
inputs=DNA, outputs=ReduceDim(LABELS)) model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['acc']) hist = model.fit(DNA, ReduceDim(LABELS), epochs=100, shuffle=False) print('#' * 40) print('loss: {}, acc: {}'.format(hist.history['loss'][-1], hist.history['acc'][-1])) print('#' * 40) # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.)) tsne_eval = Scorer('tsne', exporter=ExportTsne()) # do the evaluation on the independent test data model.evaluate(DNA_TEST, ReduceDim(LABELS_TEST), datatags=['test'], callbacks=['auc', 'auprc', 'roc', 'prc']) pred = model.predict(DNA_TEST) pred = pred[:, None, None, :] cov_pred = Cover.create_from_array('BindingProba', pred, LABELS_TEST.gindexer) print('Oct4 predictions scores should be greater than Mafk scores:') print('Prediction score examples for Oct4') for i in range(4):