def test_init(self): """Init should properly initialize Motif object.""" module = Module({ ('a',3): ModuleInstance('guc', Location('a',3,5)), ('b',3): ModuleInstance('guc', Location('b',3,5)), ('c',8): ModuleInstance('guc', Location('c',8,10)), }) m = Motif(module) self.assertEqual(m.Modules,[module]) self.assertEqual(m.Info,None)
def __call__(self, word_length): """Builds a dict of all Modules and a list of their order. - module_dict is {module pattern:Module object} - module_order is a list in descending order of their count. """ #Dictionary keying k-word to Module self.ModuleDict = {} #For each sequence in the alignment for key, seq in self.Alignment.items(): #For each position in seq till end - word_length for i in range(0, len(seq) - word_length + 1): #Get the current k-word word = seq[i:i + word_length].tostring() #Create a location object location = Location(key, i, i + word_length) #Create a ModuleInstance curr_instance = ModuleInstance(word, location) #Check to see if pattern is already in dict if word in self.ModuleDict: #Add instance to Module self.ModuleDict[word][(key, i)] = curr_instance #Not in dict else: #Create a new module and add to dict self.ModuleDict[word]=Module({(key,i):curr_instance},\ MolType=self.MolType) #Get list of counts module_counts = \ [(len(mod.Names),word) for word,mod in self.ModuleDict.items()] #Sort and put in descending order module_counts.sort() module_counts.reverse() #Get list of only the words in descending order self.ModuleOrder = [word for i, word in module_counts]
def test_init(self): """Init should properly initialize MotifResults object.""" module = Module({ ('a',3): ModuleInstance('guc', Location('a',3,5)), ('b',3): ModuleInstance('guc', Location('b',3,5)), ('c',8): ModuleInstance('guc', Location('c',8,10)), }) motif = Motif([module]) results = {'key1':'value1','key2':'value2'} parameters = {'parameter1':1,'parameter2':2} mr = MotifResults([module],[motif],results,parameters) self.assertEqual(mr.Modules,[module]) self.assertEqual(mr.Motifs,[motif]) self.assertEqual(mr.Results,results) self.assertEqual(mr.parameter1,1) self.assertEqual(mr.parameter2,2)
def fixModuleSequence(self, module): """Remaps original (non-reduced) sequence string for each ModuleInstance """ module_len = len(str(module)) module.Template = str(module) for k, v in module.items(): seq_id, module_start = k module_end = module_start + module_len loc = Location(seq_id, module_start, module_end) curr_str = \ self.Alignment[seq_id][module_start:module_end] curr_instance = ModuleInstance(curr_str, loc) module[k] = curr_instance return module
def build_module_objects(motif_block, sequence_map, truncate_len=None): """Returns module object given a motif_block and sequence_map. - motif_block is list of lines resulting from calling get_motif_blocks - sequence_map is the mapping between Gibbs sequence numbering and sequence id from fasta file. """ #Get motif id motif_id = motif_block[0].strip().split()[-1] #Get motif_list motif_list = get_motif_sequences(motif_block) #Get motif p-value motif_p = get_motif_p_value(motif_block) #Guess alphabet from motif sequences alphabet = guess_alphabet(motif_list) #Create Module object(s) gibbs_module = {} module_keys = ["1"] for motif in motif_list: seq_id = str(sequence_map[motif[0]]) if truncate_len: seq_id = seq_id[:truncate_len] start = motif[1] seq = motif[2] sig = motif[3] motif_num = "1" #Create Location object location = Location(seq_id, start, start + len(seq)) #Create ModuleInstance mod_instance = ModuleInstance(seq, location, sig) cur_key = (seq_id, start) gibbs_module[(seq_id, start)] = mod_instance gibbs_mod = Module(gibbs_module, MolType=alphabet) gibbs_mod.Pvalue = motif_p gibbs_mod.ID = motif_id + module_keys[0] yield gibbs_mod
def extractModuleData(module_data, alphabet, remap_dict): """Creates Module object given module_data list. - Only works on 1 module at a time: only pass in data from one module. """ #Create Module object meme_module = {} #Only keep first 3 elements of the list module_data = module_data[:3] #Get Module general information: module_data[0] #Only need to keep first line general_dict = getModuleGeneralInfo(module_data[0][0]) module_length = int(general_dict['width']) #Get ModuleInstances: module_data[2] instance_data = module_data[2][4:-2] for i in range(len(instance_data)): instance_data[i] = instance_data[i].split() #Create a ModuleInstance object and add it to Module for each instance for instance in instance_data: seqId = remap_dict[instance[0]] start = int(instance[1]) - 1 Pvalue = float(instance[2]) sequence = instance[4] #Create Location object for ModuleInstance location = Location(seqId, start, start + module_length) #Create ModuleInstance mod_instance = ModuleInstance(sequence, location, Pvalue) #Add ModuleInstance to Module meme_module[(seqId, start)] = mod_instance meme_module = Module(meme_module, MolType=alphabet) #Get Multilevel Consensus Sequence meme_module.ConsensusSequence = getConsensusSequence(module_data[1]) #Pull out desired values from dict meme_module.Llr = int(general_dict['llr']) meme_module.Evalue = float(general_dict['E-value']) meme_module.ID = general_dict['MOTIF'] return meme_module
def setUp(self): """Setup function for ModuleInstance tests.""" self.sequences = [ 'accucua', 'caucguu', 'accucua', 'cgacucg', 'cgaucag', 'cuguacc', 'cgcauca', ] self.locations = [ Location('seq0',1,3), Location('seq1',2,3), Location('seq1',1,5), Location('seq1',5,3), Location('seq2',3,54), Location('seq2',54,2), Location('seq3',4,0), ] self.Pvalues = [ .1, .002, .0000000003, .6, .0094, .6, .00201, ] self.Evalues = [ .006, .02, .9, .0200000001, .09, .0000003, .900001, ] self.modules_no_e = [] for i in xrange(7): self.modules_no_e.append(ModuleInstance(self.sequences[i], self.locations[i], self.Pvalues[i])) self.modules_p_and_e = [] for i in xrange(7): self.modules_p_and_e.append(ModuleInstance(self.sequences[i], self.locations[i], self.Pvalues[i], self.Evalues[i])) self.modules_no_e_sorted = [ ModuleInstance(self.sequences[2],self.locations[2],self.Pvalues[2]), ModuleInstance(self.sequences[1],self.locations[1],self.Pvalues[1]), ModuleInstance(self.sequences[6],self.locations[6],self.Pvalues[6]), ModuleInstance(self.sequences[4],self.locations[4],self.Pvalues[4]), ModuleInstance(self.sequences[0],self.locations[0],self.Pvalues[0]), ModuleInstance(self.sequences[3],self.locations[3],self.Pvalues[3]), ModuleInstance(self.sequences[5],self.locations[5],self.Pvalues[5]), ] self.modules_p_and_e_sorted = [ ModuleInstance(self.sequences[2],self.locations[2],self.Pvalues[2]), ModuleInstance(self.sequences[1],self.locations[1],self.Pvalues[1]), ModuleInstance(self.sequences[6],self.locations[6],self.Pvalues[6]), ModuleInstance(self.sequences[4],self.locations[4],self.Pvalues[4]), ModuleInstance(self.sequences[0],self.locations[0],self.Pvalues[0]), ModuleInstance(self.sequences[5],self.locations[5],self.Pvalues[5]), ModuleInstance(self.sequences[3],self.locations[3],self.Pvalues[3]), ]
def setUp(self): """SetUp for MotifFormatter class tests.""" self.sequences = [ 'accucua', 'caucguu', 'accucua', 'cgacucg', 'cgaucag', 'cuguacc', 'cgcauca', ] self.locations = [ Location('seq0',1,3), Location('seq1',2,3), Location('seq1',1,5), Location('seq1',5,3), Location('seq2',3,54), Location('seq2',54,2), Location('seq3',4,0), ] self.Pvalues = [ .1, .002, .0000000003, .6, .0094, .6, .00201, ] self.Evalues = [ .006, .02, .9, .0200000001, .09, .0000003, .900001, ] self.modules_no_e = [] for i in xrange(7): self.modules_no_e.append(ModuleInstance(self.sequences[i], self.locations[i], self.Pvalues[i])) self.module_with_template = Module( { (self.modules_no_e[0].Location.SeqId, self.modules_no_e[0].Location.Start):self.modules_no_e[0], (self.modules_no_e[1].Location.SeqId, self.modules_no_e[1].Location.Start):self.modules_no_e[1], (self.modules_no_e[2].Location.SeqId, self.modules_no_e[2].Location.Start):self.modules_no_e[2], (self.modules_no_e[3].Location.SeqId, self.modules_no_e[3].Location.Start):self.modules_no_e[3], (self.modules_no_e[4].Location.SeqId, self.modules_no_e[4].Location.Start):self.modules_no_e[4], (self.modules_no_e[5].Location.SeqId, self.modules_no_e[5].Location.Start):self.modules_no_e[5], (self.modules_no_e[6].Location.SeqId, self.modules_no_e[6].Location.Start):self.modules_no_e[6], }, Template = 'accgucg', ID='1' ) self.modules_with_ids =\ [Module({ ('a',3): ModuleInstance('guc', Location('a',3,5)), ('b',3): ModuleInstance('guc', Location('b',3,5)), ('c',8): ModuleInstance('guc', Location('c',8,10)), },ID='1'), Module({ ('a',7): ModuleInstance('cca', Location('a',7,9)), ('b',7): ModuleInstance('cca', Location('b',7,9)), ('c',11): ModuleInstance('cca',Location('c',11,13)), },ID='2'), Module({ ('a',10): ModuleInstance('gca',Location('a',10,12)), ('b',10): ModuleInstance('gca',Location('b',10,12)), ('c',14): ModuleInstance('gca',Location('c',14,12)), },ID='3'), Module({ ('a',13): ModuleInstance('ggg',Location('a',13,15)), ('b',13): ModuleInstance('ggg',Location('b',13,15)), ('c',18): ModuleInstance('ggg',Location('c',18,20)), },ID='4'), ] self.motifs_with_ids = map(Motif,self.modules_with_ids) self.motif_results = MotifResults(Modules=self.modules_with_ids,\ Motifs=self.motifs_with_ids) self.color_map = {'1':"""background-color: #0000FF; ; font-family: 'Courier New', Courier""", '2':"""background-color: #FFFF00; ; font-family: 'Courier New', Courier""", '3':"""background-color: #00FFFF; ; font-family: 'Courier New', Courier""", '4':"""background-color: #FF00FF; ; font-family: 'Courier New', Courier""", } self.color_map_rgb = { 'color_1':(0.0,0.0,1.0), 'color_2':(1.0,1.0,0.0), 'color_3':(0.0,1.0,1.0), 'color_4':(1.0,0.0,1.0), }
def setUp(self): """Setup for Location tests.""" self.location_no_end = Location('seq1',1) self.locations = [ Location('seq1',1,5), Location('seq2',3,54), Location('seq1',5,3), Location('seq1',2,3), Location('seq2',54,2), Location('seq0',1,3), ] self.locations_sorted = [ Location('seq0',1,3), Location('seq1',1,5), Location('seq1',5,3), Location('seq1',2,3), Location('seq2',3,54), Location('seq2',54,2), ]
def setUp(self): """SetUp for Module class tests.""" self.sequences = [ 'accucua', 'caucguu', 'accucua', 'cgacucg', 'cgaucag', 'cuguacc', 'cgcauca', ] self.locations = [ Location('seq0',1,3), Location('seq1',2,3), Location('seq1',1,5), Location('seq1',5,3), Location('seq2',3,54), Location('seq2',54,2), Location('seq3',4,0), ] self.Pvalues = [ .1, .002, .0000000003, .6, .0094, .6, .00201, ] self.Evalues = [ .006, .02, .9, .0200000001, .09, .0000003, .900001, ] self.modules_no_e = [] for i in xrange(7): self.modules_no_e.append(ModuleInstance(self.sequences[i], self.locations[i], self.Pvalues[i])) self.modules_p_and_e = [] for i in xrange(7): self.modules_p_and_e.append(ModuleInstance(self.sequences[i], self.locations[i], self.Pvalues[i], self.Evalues[i])) self.module_no_template = Module( { (self.modules_no_e[0].Location.SeqId, self.modules_no_e[0].Location.Start):self.modules_no_e[0], (self.modules_no_e[1].Location.SeqId, self.modules_no_e[1].Location.Start):self.modules_no_e[1], (self.modules_no_e[2].Location.SeqId, self.modules_no_e[2].Location.Start):self.modules_no_e[2], (self.modules_no_e[3].Location.SeqId, self.modules_no_e[3].Location.Start):self.modules_no_e[3], (self.modules_no_e[4].Location.SeqId, self.modules_no_e[4].Location.Start):self.modules_no_e[4], (self.modules_no_e[5].Location.SeqId, self.modules_no_e[5].Location.Start):self.modules_no_e[5], (self.modules_no_e[6].Location.SeqId, self.modules_no_e[6].Location.Start):self.modules_no_e[6], } ) self.module_with_template = Module( { (self.modules_no_e[0].Location.SeqId, self.modules_no_e[0].Location.Start):self.modules_no_e[0], (self.modules_no_e[1].Location.SeqId, self.modules_no_e[1].Location.Start):self.modules_no_e[1], (self.modules_no_e[2].Location.SeqId, self.modules_no_e[2].Location.Start):self.modules_no_e[2], (self.modules_no_e[3].Location.SeqId, self.modules_no_e[3].Location.Start):self.modules_no_e[3], (self.modules_no_e[4].Location.SeqId, self.modules_no_e[4].Location.Start):self.modules_no_e[4], (self.modules_no_e[5].Location.SeqId, self.modules_no_e[5].Location.Start):self.modules_no_e[5], (self.modules_no_e[6].Location.SeqId, self.modules_no_e[6].Location.Start):self.modules_no_e[6], }, Template = 'accgucg' )