def build_module_objects(motif_block, sequence_map, truncate_len=None): """Returns module object given a motif_block and sequence_map. - motif_block is list of lines resulting from calling get_motif_blocks - sequence_map is the mapping between Gibbs sequence numbering and sequence id from fasta file. """ #Get motif id motif_id = motif_block[0].strip().split()[-1] #Get motif_list motif_list = get_motif_sequences(motif_block) #Get motif p-value motif_p = get_motif_p_value(motif_block) #Guess alphabet from motif sequences alphabet = guess_alphabet(motif_list) #Create Module object(s) all_modules = {} # DISABLED FOR NOW #module_keys = set([x[4] for x in motif_list]) module_keys = ["1"] for k in module_keys: cur_mod = Module({}, Alphabet=alphabet) cur_mod.Pvalue = motif_p cur_mod.ID = motif_id + k all_modules[k] = cur_mod for motif in motif_list: seq_id = str(sequence_map[motif[0]]) if truncate_len: seq_id = seq_id[:truncate_len] start = motif[1] seq = motif[2] sig = motif[3] #motif_num = motif[4] motif_num = "1" #Create Location object location = Location(seq_id, start, start + len(seq)) #Create ModuleInstance mod_instance = ModuleInstance(seq,location,sig) cur_key = (seq_id,start) all_modules[motif_num][(seq_id,start)]=mod_instance for gmod in all_modules.values(): yield gmod
def extractModuleData(module_data, alphabet): """Creates Module object given module_data list. - Only works on 1 module at a time: only pass in data from one module. """ # Create Module object meme_module = Module({}, Alphabet=alphabet) # Only keep first 3 elements of the list module_data = module_data[:3] # Get Module general information: module_data[0] # Only need to keep first line general_dict = getModuleGeneralInfo(module_data[0][0]) # Get Multilevel Consensus Sequence meme_module.ConsensusSequence = getConsensusSequence(module_data[1]) # Pull out desired values from dict module_length = int(general_dict["width"]) meme_module.Llr = int(general_dict["llr"]) meme_module.Evalue = float(general_dict["E-value"]) meme_module.ID = general_dict["MOTIF"] # Get ModuleInstances: module_data[2] instance_data = module_data[2][4:-2] for i in xrange(len(instance_data)): instance_data[i] = instance_data[i].split() # Create a ModuleInstance object and add it to Module for each instance for instance in instance_data: seqId = instance[0] start = int(instance[1]) - 1 Pvalue = float(instance[2]) sequence = instance[4] # Create Location object for ModuleInstance location = Location(seqId, start, start + module_length) # Create ModuleInstance mod_instance = ModuleInstance(sequence, location, Pvalue) # Add ModuleInstance to Module meme_module[(seqId, start)] = mod_instance return meme_module