print('INFO - %s' % ('building concatenate model.')) units = 512 x = Merge(mode='concat', concat_axis=1)([reg_output, seq_output]) x = Dense(units, activation='relu')(x) x = Dropout(0.5, seed=42)(x) x = Dense(units, activation='relu')(x) x = Dropout(0.5, seed=42)(x) rgs_output = Dense(1, activation='linear', name='rgs_output')(x) model = Model(input=[reg_input, seq_input], output=[rgs_output]) model.compile(loss={'rgs_output': 'mean_squared_error'}, optimizer='sgd') plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir)) print('INFO - %s' % ('loading data.')) train, val, test = input_data.read_data_sets(train_pct=80, val_pct=10, test_pct=10, mode='whole_experiment') print('INFO - %s' % ('training model.')) reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5) early_stopping = EarlyStopping(monitor='val_loss', patience=10) checkpoint = ModelCheckpoint( filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir), monitor='val_loss') batchhistory = BatchHistory(val_data=val, loss_function='mse', every_n_batch=1000) history = model.fit( { 'seq_input': train['seq'], 'reg_input': train['reg']
name='scaled_rgs_output')([rgs_output, label_output]) model = Model(input=[reg_input, seq_input], output=[scaled_rgs_output, label_output]) model.compile(loss={ 'scaled_rgs_output': 'mean_squared_error', 'label_output': 'binary_crossentropy' }, optimizer=SGD(lr=0.01, momentum=0.5), loss_weights=[1., 0.]) plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir)) print('INFO - %s' % ('loading data.')) train, val, test = input_data.read_data_sets(train_pct=80, val_pct=10, test_pct=10, conv1d=True, add_shuffle=True) print('INFO - %s' % ('training model.')) reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5) early_stopping = EarlyStopping(monitor='val_loss', patience=10) checkpoint = ModelCheckpoint( filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir), monitor='val_loss', save_best_only=True) history = model.fit({ 'seq_input': train['seq'], 'reg_input': train['reg'] }, { 'scaled_rgs_output': train['expr'],
x = Merge(mode='concat', concat_axis=1)([reg_output, seq_output]) x = Dense(units, activation='relu')(x) x = Dropout(0.5, seed=42)(x) x = Dense(units, activation='relu')(x) x = Dropout(0.5, seed=42)(x) cls_output = Dense(3, activation='sigmoid', name='cls_output')(x) model = Model(input=[reg_input, seq_input], output=[cls_output]) model.compile(loss={'cls_output': 'categorical_crossentropy'}, optimizer='sgd', metric='accuracy') plot(model, show_shapes=True, to_file='%s/model.eps' % (fig_dir)) print('INFO - %s' % ('loading data.')) train, val, test = input_data.read_data_sets(train_pct=80, val_pct=10, test_pct=10) print('INFO - %s' % ('training model.')) reduce_lr = ReduceLROnPlateau(verbose=1, factor=0.5, patience=5) early_stopping = EarlyStopping(monitor='val_loss', patience=10) checkpoint = ModelCheckpoint( filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5" % (log_dir), monitor='val_loss') history = model.fit({ 'seq_input': train['seq'], 'reg_input': train['reg'] }, {'cls_output': to_categorical(train['class'])}, validation_data=({ 'seq_input': val['seq'], 'reg_input': val['reg']
plt.ylabel('distance') dendrogram(z,leaf_rotation=90,labels=diff_t.index) plt.tight_layout() def plot_orig_vs_perturbed(x,y,ylab): plt.figure() plt.scatter(x,y) plt.xlabel('Original') plt.ylabel(ylab) # wildtype model: model_fn='../logs/concatenation/regression/model.41-0.2300.hdf5' model=load_model(model_fn) full_data,_,_=input_data.read_data_sets(train_pct=100,val_pct=0,test_pct=0) pred=model.predict({'seq_input':full_data['seq'],'reg_input':full_data['reg']},batch_size=100,verbose=1) # MSN2/4 motif KO: full_data,_,_=input_data.read_data_sets(train_pct=100,val_pct=0,test_pct=0,seq_file='%s/yeast_promoters.msn24_ko.fa'%(seq_dir)) pred_2=model.predict({'seq_input':full_data['seq'],'reg_input':full_data['reg']},batch_size=100,verbose=1) plot_orig_vs_perturbed(pred, pred_2, 'MSN2/4 motif KO') plt.savefig('%s/orig_vs_msn24_motif_ko.png'%fig_dir) hclust(pred, pred_2, full_data) plt.savefig('%s/hclust_msn24_motif_ko.pdf'%fig_dir)
from deeplift.blobs import NonlinearMxtsMode from deeplift.visualization import viz_sequence import os from sklearn.cluster.bicluster import SpectralBiclustering from sklearn.metrics import consensus_score from scipy.stats import ranksums from scipy.stats import ttest_ind from collections import OrderedDict # Load model: model_fn = '../logs/concatenation/classification/model.32-0.5284.hdf5' model = load_model(model_fn) full_data, _, _ = input_data.read_data_sets(train_pct=100, val_pct=0, test_pct=0) #NonlinearMxtsMode defines the method for computing importance scores. #NonlinearMxtsMode.DeepLIFT_GenomicsDefault uses the RevealCancel rule on Dense layers #and the Rescale rule on conv layers (see paper for rationale) #Other supported values are: #NonlinearMxtsMode.RevealCancel - DeepLIFT-RevealCancel at all layers (used for the MNIST example) #NonlinearMxtsMode.Rescale - DeepLIFT-rescale at all layers #NonlinearMxtsMode.Gradient - the 'multipliers' will be the same as the gradients #NonlinearMxtsMode.GuidedBackprop - the 'multipliers' will be what you get from guided backprop #Use deeplift.util.get_integrated_gradients_function to compute integrated gradients #Feel free to email avanti [dot] [email protected] if anything is unclear deeplift_model = kc.convert_functional_model( model, nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.
x=Dropout(0.5,seed=42)(x) x=Dense(units)(x) x=Activation('relu')(x) x=Dropout(0.5,seed=42)(x) rgs_output=[] for i in xrange(num_treatment): rgs_output.append(Dense(1,name='rgs_output_%s'%i)(x)) model=Model(input=[seq_input],output=rgs_output) model.compile(loss='mean_squared_error',optimizer=SGD(lr=0.001)) plot(model, show_shapes=True, to_file='%s/model.eps'%(fig_dir)) print('INFO - %s'%('loading data.')) train,val,test=input_data.read_data_sets(train_pct=90,val_pct=5,test_pct=5,conv1d=True,seq_only=True) print('INFO - %s'%('training model.')) reduce_lr=ReduceLROnPlateau(verbose=1,factor=0.5, patience=5) early_stopping=EarlyStopping(monitor='val_loss',patience=10) checkpoint=ModelCheckpoint(filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5"%(log_dir), monitor='val_loss') train_output_dict=dict() val_output_dict=dict() for i in xrange(num_treatment): train_output_dict['rgs_output_%s'%i]=train['expr'].iloc[:,i] val_output_dict['rgs_output_%s'%i]=val['expr'].iloc[:,i] history=model.fit({'seq_input':train['seq']},train_output_dict, validation_data=({'seq_input':val['seq']},val_output_dict), nb_epoch=200,
print('INFO - %s'%('building concatenate model.')) units=512 x=Merge(mode='concat',concat_axis=1)([reg_output,seq_output]) x=Dense(units,activation='relu')(x) x=Dropout(0.5,seed=42)(x) x=Dense(units,activation='relu')(x) x=Dropout(0.5,seed=42)(x) rgs_output=Dense(1,activation='linear',name='rgs_output')(x) model=Model(input=[reg_input,seq_input],output=[rgs_output]) model.compile(loss={'rgs_output':'mean_squared_error'},optimizer='sgd') plot(model, show_shapes=True,to_file='%s/model.eps'%(fig_dir)) print('INFO - %s'%('loading data.')) train,val,test=input_data.read_data_sets(train_pct=80,val_pct=10,test_pct=10,mode='whole_gene') print('INFO - %s'%('training model.')) reduce_lr=ReduceLROnPlateau(verbose=1,factor=0.5, patience=5) early_stopping=EarlyStopping(monitor='val_loss',patience=10) checkpoint=ModelCheckpoint(filepath="%s/model.{epoch:02d}-{val_loss:.4f}.hdf5"%(log_dir), monitor='val_loss') batchhistory=BatchHistory(val_data=val,loss_function='mse',every_n_batch=1000) history=model.fit({'seq_input':train['seq'],'reg_input':train['reg']},{'rgs_output':train['expr']}, validation_data=({'seq_input':val['seq'],'reg_input':val['reg']},{'rgs_output':val['expr']}), nb_epoch=100, batch_size=100, callbacks=[early_stopping,checkpoint,reduce_lr,batchhistory], verbose=1) with open('%s/history.pkl'%(log_dir),'wb') as f: pickle.dump([history.history,batchhistory.val_loss],f)