Пример #1
0
def mutant_locations(self, loc, count=1, max=False):
    
    '''
    this function takes from the dict of all potential _mutation_locations a 
    set of mutations that fall within a loc tuple. It returns an iterator that
    spits out potential mutants at these locations; it is randomized by position
    first, then by mutation. 
    
    count is the number of mutations to return, setting to one returns all 
    possible sequences off by one, setting to two returns all sequences with
    two mutations made, etc, etc.
    '''
    #first, make sure my self._mutant_locations dict is instantiatied
    if not hasattr(self, '_mutant_locations'):
        self._mutant_locations = mutate.mutant_locations(self)
            
    #deal with interval() versus tuple inputs
    if isinstance(loc, interval):
        if len(loc) == 0:
            return iter([])
        loc_ivl = loc
        loc = interval.hull([loc_ivl]).to_tuple()
    else:
        loc_ivl = interval(loc)
            
    #create an iterator that returns all keys for _mutant_locations that are
    #in this location range
    mut_ivls = (interval(ml) for ml in self._mutant_locations.keys())
    
    
    #now loc iter will output a non-random set of mutation locations which are 
    #keys to the _mutation_locations dict
    loc_iter = ifilter(itemgetter(1), ((ivl, ivl.overlaps(loc_ivl)) for \
                                     ivl in mut_ivls))
 
    #change interval obj into loc tuple
    loc_tup = lambda loc: loc[0].to_tuple()    
    #get the mutation set (the values) for a loc tuple
    loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)]
    #expand the mutation set into individual mutations for a loc tuple
    loc_mset = lambda loc: ((loc_tup(loc), i) for i in loc_muts(loc))
    
    #put them all together for a randomized list of generators, one generator
    #for each loc tuple
    pos_mut_sets = map(lambda loc: (loc_mset(loc)), loc_iter)
        
    emit_sets = combinations(util.irandomize(
                    chain.from_iterable(pos_mut_sets), seed=random_seed), count)
    
    emit_sets = imap(frozenset, emit_sets)
    
    is_unique_pos = \
        lambda mset: (
            len([m[0] for m in mset]) == len(set([m[0] for m in mset]))
            and set(mset) not in self.mut_sets)
    
    mut_iter = util.irandomize(
                   ifilter(is_unique_pos,
                   util.irandomize(emit_sets,
                                   seed=random_seed)),
                   seed=random_seed)
    
    # if this feature overlaps exons
    #expand the motif to codons, so that we can check that mutants are 
    # synonymous
    if interval(self.exon_list[0].extract_pos()).overlaps(loc_ivl):
        codon_loc = \
            (interval(mutate.expand_motif_to_codons(self, loc)) \
            & interval(self.exon_list[0].extract_pos())).to_tuple() 

        #check all mutations for synonymousness
        seq_str = str(self.seq)[slice(*codon_loc)]
        is_synon = lambda seq_str, codon_loc: lambda mut_tups: \
           mutate.check_translation(\
               string.upper(mutate.tups_to_str(seq_str, codon_loc, mut_tups)),
               seq_str)
           
        is_synon = is_synon(seq_str, codon_loc)
         
        return util.irandomize(ifilter(lambda mut: is_synon(mut), mut_iter),
                               seed=random_seed)
    else:
         return mut_iter 
Пример #2
0
def mutate_all_positions(self, loc):
    '''
    mutate every codon and/or nucleotide within feature bounds
    '''
    
    #first, make sure my self._mutant_locations dict is instantiated
    if not hasattr(self, '_mutant_locations'):
        self._mutant_locations = mutate.mutant_locations(self)
            
    #deal with interval() versus tuple inputs
    if isinstance(loc, interval):
        loc_ivl = loc
        loc = interval.hull([loc_ivl]).to_tuple()
    else:
        loc_ivl = interval(loc)
    
    ivl_len = loc_ivl.sum_len()
    
    (e_coords, i_coords) = mutate.get_motif_boundaries(loc, self)

    #mutant choices will be a list of random.choice lambda functions
    #that randomly chooses a different codon for every position or a different
    #nucleotide for every intronic base

    mutant_choices = set()

    for codon_loc in e_coords:    
            
        #go through every codon in codon_loc        
        for c_loc in range(codon_loc[0], codon_loc[1], 3):
            codon = str(self.seq[c_loc:(c_loc + 3)]).upper()
            #get other codons
            bckt = mutate.codon_back_table()
            fwdt = mutate.codon_fwd_table()
            other_codons = bckt[fwdt[codon]]
            other_codons = other_codons.difference((codon,))
            
            if len(other_codons) == 0:
                continue
            
            #convert these codons into mut tuples (cmut_tuples)
            # (one codon might be two or even three tuples)
            cmut_tuples = ()
            for other_cod in other_codons:
                cod_tup = ()
                for diff in util.str_diff(other_cod, codon):
                    diff_loc = (c_loc + diff, c_loc + diff + 1)
                    cod_tup += ((diff_loc, other_cod[diff]),)
                
                cmut_tuples += (cod_tup,)
                
            #finally store a lambda function that randomly chooses a 
            #different codon for this position, using a unique-state
            #random generator
            rgen = random.Random()
            rgen.seed(random_seed ^ hash(cmut_tuples) ^ hash(loc))    
            codon_choice = lambda cmt, rgen: lambda: rgen.choice(cmt)
            
            mutant_choices.add(codon_choice(cmut_tuples, rgen))
    
    for intron_loc in i_coords:
        
        intron_ivl = interval(intron_loc)
        mut_ivls = (interval(ml) for ml in self._mutant_locations.keys())
        loc_list = filter(itemgetter(1), \
                          [(ivl, ivl.overlaps(intron_ivl)) for ivl in mut_ivls])
        
        #change interval obj into loc tuple
        loc_tup = lambda loc: loc[0].to_tuple()    
        #get the mutation set (the values) for a loc tuple
        loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)]
        #expand the mutation set into individual mutations for a loc tuple
                
        loc_mset = lambda loc, rgen: \
            lambda: rgen.choice([((loc_tup(loc), i),) for i in loc_muts(loc)])
        
        #generate independently seeded random number gens for each pos
        rgens = [random.Random() for i in loc_list]
        [rg.seed((random_seed, loc)) for rg, loc in zip(rgens, loc_list)]
            
        pos_rnd_muts = map(lambda loc, rgen: loc_mset(loc, rgen), loc_list, rgens)
        
        mutant_choices.update(pos_rnd_muts)
    
    #now that we have a mutant choices list with one function for every 
    # codon/nt, we need to create a generator that calls each function in the 
    # list once only
    while True:
        yielded = set()
        next_mut = frozenset(
                        chain.from_iterable(map(lambda f: f(), mutant_choices)))
        seen_count = 0
        if next_mut not in yielded:
            yielded.add(next_mut)
            yield next_mut
        elif next_mut in yielded and seen_count < 20:
            seen_count += 1
        elif next_mut in yielded and seen_count >= 20:
            raise StopIteration