def main_interpret(sequences=None, arch_file=None, weights_file=None, pos_threshold=None, peak_width=10, prefix=None): # encode fasta print("loading sequence data...") X = encode_fasta_sequences(sequences) # load model print("loading model...") model = SequenceDNN.load(arch_file, weights_file) # predict print("getting predictions...") predictions = model.predict(X) # deeplift print("getting deeplift scores...") deeplift_scores = model.deeplift(X) # get important sequences and write to file print("extracting important sequences and writing to file...") for task_index, task_scores in enumerate(deeplift_scores): peak_positions = [] peak_sequences = [] for sequence_index, sequence_scores in enumerate(task_scores): if predictions[sequence_index, task_index] > pos_threshold: #print(sequence_scores.shape) basewise_sequence_scores = sequence_scores.max(axis=(0,1)) peak_position = basewise_sequence_scores.argmax() peak_positions.append(peak_position) peak_sequences.append(X[sequence_index : sequence_index + 1, :, :, peak_position - peak_width : peak_position + peak_width]) else: peak_positions.append(-1) peak_sequences.append(np.zeros((1, 1, 4, 2 * peak_width))) peak_sequences = np.concatenate(peak_sequences) peak_sequence_strings = get_sequence_strings(peak_sequences) # write important sequences to file ofname = "%s.task_%i.important_sequences.txt" % (prefix, task_index) with open(ofname, "w") as wf: for i, peak_position in enumerate(peak_positions): wf.write("> sequence_%i\n" % (i)) wf.write("%i: %s\n" %(peak_position, peak_sequence_strings[i])) print("Done!")
def main_interpret(sequences=None, arch_file=None, weights_file=None, pos_threshold=None, peak_width=10, prefix=None): # encode fasta print("loading sequence data...") X = encode_fasta_sequences(sequences) # load model print("loading model...") model = SequenceDNN.load(arch_file, weights_file) # predict print("getting predictions...") predictions = model.predict(X) # deeplift print("getting deeplift scores...") deeplift_scores = model.deeplift(X) # get important sequences and write to file print("extracting important sequences and writing to file...") for task_index, task_scores in enumerate(deeplift_scores): peak_positions = [] peak_sequences = [] for sequence_index, sequence_scores in enumerate(task_scores): if predictions[sequence_index, task_index] > pos_threshold: #print(sequence_scores.shape) basewise_sequence_scores = sequence_scores.max(axis=(0, 1)) peak_position = basewise_sequence_scores.argmax() peak_positions.append(peak_position) peak_sequences.append( X[sequence_index:sequence_index + 1, :, :, peak_position - peak_width:peak_position + peak_width]) else: peak_positions.append(-1) peak_sequences.append(np.zeros((1, 1, 4, 2 * peak_width))) peak_sequences = np.concatenate(peak_sequences) peak_sequence_strings = get_sequence_strings(peak_sequences) # write important sequences to file ofname = "%s.task_%i.important_sequences.txt" % (prefix, task_index) with open(ofname, "w") as wf: for i, peak_position in enumerate(peak_positions): wf.write("> sequence_%i\n" % (i)) wf.write("%i: %s\n" % (peak_position, peak_sequence_strings[i])) print("Done!")
from deeplift.dinuc_shuffle import dinuc_shuffle from dragonn.utils import get_sequence_strings import random import numpy as np import wget url = "http://mitra.stanford.edu/kundaje/projects/dragonn/deep_lift_input_classification_spi1.npy" wget.download(url) deep_lift_input_classification_spi1 = np.load( "deep_lift_input_classification_spi1.npy") print(deep_lift_input_classification_spi1.shape) deep_lift_input_classification_spi1_strings = get_sequence_strings( deep_lift_input_classification_spi1) for i in range(len(deep_lift_input_classification_spi1)): random.seed(1234) shuffled_strings = dinuc_shuffle( deep_lift_input_classification_spi1_strings[i]) random.seed(1234) shuffled_array = dinuc_shuffle( deep_lift_input_classification_spi1[i].squeeze()) #decode the array shuffled_array = ''.join( get_sequence_strings( np.expand_dims(np.expand_dims(shuffled_array, axis=1), axis=1))) #make sure shuffling the string and numpy array gave same shuffle output if (shuffled_strings != shuffled_array): print("FAILED!") print("TEST PASSED!")
from keras.models import load_model from dragonn.tutorial_utils import deeplift from dragonn.utils import get_sequence_strings, one_hot_encode from deeplift import dinuc_shuffle import numpy as np import wget url_data = "http://mitra.stanford.edu/kundaje/projects/dragonn/deep_lift_input_classification_spi1.npy" url_model = "http://mitra.stanford.edu/kundaje/projects/dragonn/SPI1.classification.model.hdf5" wget.download(url_data) wget.download(url_model) deep_lift_input_classification_spi1 = np.load( "deep_lift_input_classification_spi1.npy") deep_lift_input_classification_spi1_strings = get_sequence_strings( deep_lift_input_classification_spi1) #get scores with GC reference deep_lift_scores_spi1_gc_ref = deeplift("SPI1.classification.model.hdf5", deep_lift_input_classification_spi1, reference="gc_ref") print(deep_lift_scores_spi1_gc_ref.shape) print(np.max(deep_lift_scores_spi1_gc_ref)) print(np.min(deep_lift_scores_spi1_gc_ref)) #Get scores with shuffled reference (starting with strings ) deep_lift_scores_spi1_shuffled_ref_strings = deeplift( "SPI1.classification.model.hdf5", deep_lift_input_classification_spi1_strings, one_hot_func=one_hot_encode) print(deep_lift_scores_spi1_shuffled_ref_strings.shape)