def build_module_objects(motif_block, sequence_map, truncate_len=None): """Returns module object given a motif_block and sequence_map. - motif_block is list of lines resulting from calling get_motif_blocks - sequence_map is the mapping between Gibbs sequence numbering and sequence id from fasta file. """ #Get motif id motif_id = motif_block[0].strip().split()[-1] #Get motif_list motif_list = get_motif_sequences(motif_block) #Get motif p-value motif_p = get_motif_p_value(motif_block) #Guess alphabet from motif sequences alphabet = guess_alphabet(motif_list) #Create Module object(s) gibbs_module = {} module_keys = ["1"] for motif in motif_list: seq_id = str(sequence_map[motif[0]]) if truncate_len: seq_id = seq_id[:truncate_len] start = motif[1] seq = motif[2] sig = motif[3] motif_num = "1" #Create Location object location = Location(seq_id, start, start + len(seq)) #Create ModuleInstance mod_instance = ModuleInstance(seq,location,sig) cur_key = (seq_id,start) gibbs_module[(seq_id,start)]=mod_instance gibbs_mod = Module(gibbs_module,MolType=alphabet) gibbs_mod.Pvalue = motif_p gibbs_mod.ID = motif_id + module_keys[0] yield gibbs_mod
def build_module_objects(motif_block, sequence_map, truncate_len=None): """Returns module object given a motif_block and sequence_map. - motif_block is list of lines resulting from calling get_motif_blocks - sequence_map is the mapping between Gibbs sequence numbering and sequence id from fasta file. """ #Get motif id motif_id = motif_block[0].strip().split()[-1] #Get motif_list motif_list = get_motif_sequences(motif_block) #Get motif p-value motif_p = get_motif_p_value(motif_block) #Guess alphabet from motif sequences alphabet = guess_alphabet(motif_list) #Create Module object(s) gibbs_module = {} module_keys = ["1"] for motif in motif_list: seq_id = str(sequence_map[motif[0]]) if truncate_len: seq_id = seq_id[:truncate_len] start = motif[1] seq = motif[2] sig = motif[3] motif_num = "1" #Create Location object location = Location(seq_id, start, start + len(seq)) #Create ModuleInstance mod_instance = ModuleInstance(seq, location, sig) cur_key = (seq_id, start) gibbs_module[(seq_id, start)] = mod_instance gibbs_mod = Module(gibbs_module, MolType=alphabet) gibbs_mod.Pvalue = motif_p gibbs_mod.ID = motif_id + module_keys[0] yield gibbs_mod
def extractModuleData(module_data, alphabet, remap_dict): """Creates Module object given module_data list. - Only works on 1 module at a time: only pass in data from one module. """ #Create Module object meme_module = {} #Only keep first 3 elements of the list module_data = module_data[:3] #Get Module general information: module_data[0] #Only need to keep first line general_dict = getModuleGeneralInfo(module_data[0][0]) module_length = int(general_dict['width']) #Get ModuleInstances: module_data[2] instance_data = module_data[2][4:-2] for i in range(len(instance_data)): instance_data[i] = instance_data[i].split() #Create a ModuleInstance object and add it to Module for each instance for instance in instance_data: seqId = remap_dict[instance[0]] start = int(instance[1]) - 1 Pvalue = float(instance[2]) sequence = instance[4] #Create Location object for ModuleInstance location = Location(seqId, start, start + module_length) #Create ModuleInstance mod_instance = ModuleInstance(sequence, location, Pvalue) #Add ModuleInstance to Module meme_module[(seqId, start)] = mod_instance meme_module = Module(meme_module, MolType=alphabet) #Get Multilevel Consensus Sequence meme_module.ConsensusSequence = getConsensusSequence(module_data[1]) #Pull out desired values from dict meme_module.Llr = int(general_dict['llr']) meme_module.Evalue = float(general_dict['E-value']) meme_module.ID = general_dict['MOTIF'] return meme_module
def extractModuleData(module_data, alphabet, remap_dict): """Creates Module object given module_data list. - Only works on 1 module at a time: only pass in data from one module. """ #Create Module object meme_module = {} #Only keep first 3 elements of the list module_data = module_data[:3] #Get Module general information: module_data[0] #Only need to keep first line general_dict = getModuleGeneralInfo(module_data[0][0]) module_length = int(general_dict['width']) #Get ModuleInstances: module_data[2] instance_data = module_data[2][4:-2] for i in xrange(len(instance_data)): instance_data[i] = instance_data[i].split() #Create a ModuleInstance object and add it to Module for each instance for instance in instance_data: seqId = remap_dict[instance[0]] start = int(instance[1])-1 Pvalue = float(instance[2]) sequence = instance[4] #Create Location object for ModuleInstance location = Location(seqId, start, start + module_length) #Create ModuleInstance mod_instance = ModuleInstance(sequence,location,Pvalue) #Add ModuleInstance to Module meme_module[(seqId,start)] = mod_instance meme_module = Module(meme_module, MolType=alphabet) #Get Multilevel Consensus Sequence meme_module.ConsensusSequence = getConsensusSequence(module_data[1]) #Pull out desired values from dict meme_module.Llr = int(general_dict['llr']) meme_module.Evalue = float(general_dict['E-value']) meme_module.ID = general_dict['MOTIF'] return meme_module