def double_stranded_model_dnaconv(inputs, inp, oup, params): with inputs.use('dna') as layer: layer = DnaConv2D(Conv2D(params[0], (params[1], 1), activation=params[2]))(layer) output = LocalAveragePooling2D(window_size=layer.shape.as_list()[1], name='motif')(layer) return inputs, output
def _cnn_model1(inputs, inp, oup, params): with inputs.use('dna') as inlayer: layer = inlayer layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'), merge_mode='max', name='bothstrands')(layer) return inputs, layer
def test_dnaconv(): data_path = pkg_resources.resource_filename('janggu', 'resources/') bed_file = os.path.join(data_path, 'sample.bed') refgenome = os.path.join(data_path, 'sample_genome.fa') dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, storage='ndarray', roi=bed_file, order=1) xin = Input(dna.shape[1:]) l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin) m1 = Model(xin, l1) res1 = m1.predict(dna[0])[0, 0, 0, :] clayer = m1.layers[1].forward_layer # forward only l1 = clayer(xin) m2 = Model(xin, l1) res2 = m2.predict(dna[0])[0, 0, 0, :] rxin = Reverse()(Complement()(xin)) l1 = clayer(rxin) l1 = Reverse()(l1) m3 = Model(xin, l1) res3 = m3.predict(dna[0])[0, 0, 0, :] res4 = np.maximum(res3, res2) np.testing.assert_allclose(res1, res4, rtol=1e-4)
def test_dnaconv2(): # this checks if DnaConv2D layer is instantiated correctly if # the conv2d layer has been instantiated beforehand. data_path = pkg_resources.resource_filename('janggu', 'resources/') bed_file = os.path.join(data_path, 'sample.bed') refgenome = os.path.join(data_path, 'sample_genome.fa') dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, storage='ndarray', roi=bed_file, order=1) xin = Input(dna.shape[1:]) clayer = Conv2D(30, (21, 1), activation='relu') clayer(xin) l1 = DnaConv2D(clayer)(xin) m1 = Model(xin, l1) res1 = m1.predict(dna[0])[0, 0, 0, :] np.testing.assert_allclose(clayer.get_weights()[0], m1.layers[1].forward_layer.get_weights()[0]) assert len(clayer.weights) == 2
def double_stranded_model_dnaconv(inputs, inp, oup, params): with inputs.use('dna') as layer: conv = DnaConv2D(Conv2D(params[0], (params[1], 1), activation=params[2]), name='conv1')(layer) output = GlobalAveragePooling2D(name='motif')(conv) return inputs, output
def double_stranded_model_dnaconv(inputs, inp, oup, params): """ keras model for scanning both DNA strands. A more elegant way of scanning both strands for motif occurrences is achieved by the DnaConv2D layer wrapper, which internally performs the convolution operation with the normal kernel weights and the reverse complemented weights. """ with inputs.use('dna') as layer: # the name in inputs.use() should be the same as the dataset name. layer = DnaConv2D( Conv2D(params[0], (params[1], 1), activation=params[2]))(layer) output = GlobalAveragePooling2D(name='motif')(layer) return inputs, output
def dna_model(inputs, inp, oup, params): with inputs.use('dna') as dna_in: layer = dna_in if params['seq_dropout'] > 0.0: layer = Dropout(params['seq_dropout'])(layer) cl = Conv2D(params['nmotifs1'], (params['motiflen'], 1), activation='relu') if params['stranded'] == 'double': layer = DnaConv2D(cl)(layer) else: layer = cl(layer) layer = MaxPooling2D((params['pool1'], 1))(layer) layer = BatchNormalization()(layer) layer = Conv2D(params['nmotifs2'], (params['hypermotiflen'], 1), activation='relu')(layer) return inputs, layer
def dna_model_(inputs, inp, oup, params): with inputs.use('dna') as dna_in: layer = dna_in layer = Dropout(params['seq_dropout'], name='dna_dropout_1')(layer) cl = Conv2D(params['nmotifs1'], (params['motiflen'], 1), activation='relu', name='dna_conv2d_1') if params['stranded'] == 'double': layer = DnaConv2D(cl, name='dna_dnaconv2d_2')(layer) else: layer = cl(layer) layer = MaxPooling2D((params['pool1'], 1), name='dna_maxpooling1')(layer) layer = BatchNormalization(name='dna_batchnorm_1')(layer) layer = Conv2D(params['nmotifs2'], (params['hypermotiflen'], 1), activation='relu', name='dna_conv2d_2')(layer) layer = GlobalMaxPooling2D(name='global_max_pooling')(layer) layer = BatchNormalization(name='dna_batchnorm_2')(layer) return inputs, layer
# Training input and labels are purely defined genomic coordinates DNA = Bioseq.create_from_refgenome('dna', refgenome=REFGENOME, roi=ROI_TRAIN, binsize=200) LABELS = Cover.create_from_bed('peaks', roi=ROI_TRAIN, bedfiles=PEAK_FILE, binsize=200, resolution=None) # define a keras model here xin = Input((200, 1, 4)) layer = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin) layer = GlobalAveragePooling2D()(layer) layer = Dense(1, activation='sigmoid')(layer) # the last one is used to make the dimensionality compatible with # the coverage dataset dimensions. # Alternatively, the ReduceDim dataset wrapper may be used to transform # the output to a 2D dataset object. output = Reshape((1, 1, 1))(layer) model = Model(xin, output) model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['acc']) model.summary()
roi=ROI_TRAIN, binsize=200) LABELS = Cover.create_from_bed('peaks', roi=ROI_TRAIN, bedfiles=PEAK_FILE, binsize=200, resolution=None) # define a keras model here xin = Input((200, 1, 4), name="dna") convl = Conv2D(30, (21, 1), activation='relu') if args.model == 'double': layer = DnaConv2D(convl)(xin) else: layer = convl(xin) layer = GlobalAveragePooling2D()(layer) layer = Dense(1, activation='sigmoid')(layer) # the last one is used to make the dimensionality compatible with # the coverage dataset dimensions. # Alternatively, the ReduceDim dataset wrapper may be used to transform # the output to a 2D dataset object. output = Reshape((1, 1, 1), name="peaks")(layer) model = Model(xin, output) model.compile(optimizer='adadelta',