示例#1
0
class Numbers(NumbersI, MappedList):
    """Safe version of Numbers that validates on all list operations.

    For each item in data (which must be iterable), tests whether the item
    is a number and, if so, adds it to the Numbers.

    Note: this means we have to override _all_ the list methods that might
    potentially add new data to the list. This makes it much slower than 
    UnsafeNumbers, but impossible for it to hold invalid data.
    """
    Mask = FunctionWrapper(float)
    
    def __init__(self, data=None, Constraint=None, Mask=None):
        """Initializes a new Numbers object.

        Usage: nl = Numbers(data)

        For each item in data, tries to convert to a float. If successful,
        produces new Numbers with data.

        Note: this means that a single string of digits will be treated as
        a list of digits, _not_ as a single number. This might not be what
        you expected.

        Also, data must be iterable (so a 1-element list containing a number
        is OK, but a single number by itself is not OK).
        """
        if data is not None:
            data = map(float, data) #fails if any items are not floatable
        else:
            data = []
        MappedList.__init__(self, data, Constraint, Mask)
示例#2
0
class Freqs(FreqsI, MappedDict):
    """Holds a frequency distribution, i.e. a set of category -> count pairs.
    
    Class data:
        ValueMask: function that transforms values before they are entered.
        RequiredKeys: keys that are automatically added with frequency 0 before
        frequencies are added.

    Performs (expensive) validation on many operations that change the 
    dictionary. Use UnsafeFreqs if speed is more important than validation.
    """
    ValueMask = FunctionWrapper(freqwatcher)
 
    def __init__(self, data=None, Constraint=None, Mask=None, ValueMask=None):
        """Passes on to superclass, but adds required keys if absent.
        
        Parameters (for polymorphism with MappedDict superclass):

        data:           data to load into self
        Constraint:     only items that Constraint __contains__ are allowed
        Mask:           function applied to keys before lookup
        ValueMask:      function applied to values before addition
        """
        super(Freqs, self).__init__(Constraint=Constraint, Mask=Mask, \
                ValueMask=ValueMask)
        self += data
        for key in self.RequiredKeys:
            if key not in self:
                self[key] = 0.0
示例#3
0
class DbRefs(MappedRecord, ConstrainedDict):
    """Holds Database -> [Accessions] mapping.
    
    The accessions for a particular database are always stored as a list.

    DbRefs will ultimately contain methods for actually getting the records
    from known databases.
    """
    ValueMask = FunctionWrapper(_make_list)
    DefaultValue = []
    def __init__(self, parameters={}, synonyms={}):
        """Initialize the Parameters object.

        parameters: a dictionary of Parameter objects keyed by their identifier
        synonyms: a dictionary of synonyms. Keys are synonyms, values are
            parameter identifiers.
        """
        mask = FunctionWrapper(_find_synonym(synonyms))
        super(Parameters, self).__init__(data=deepcopy(parameters), Mask=mask)

        self.__setitem__ = self.setdefault = self.update =\
            self.__delitem__ = self._raiseNotImplemented
示例#5
0
class NumberFreqs(NumberFreqsI, MappedDict):
    """Class holding freqs where both keys and values are numbers.

    Mean, variance etc. assume that the data are frequencies of other
    numbers rather than treating each key as a separate category.
    
    Changes calculation of mean, standard deviation, etc. by assuming that
    the keys have weight proportional to their values (i.e. if the key is
    5 and the value is 3, it contributes 15 'units' rather than 3 to things
    like mean() and normalize()).

    Performs (expensive) validation to ensure that keys are floats and
    values are non-negative floats.
    
    All keys and values are automatically converted to float.
    """
    RequiredKeys = None
    Mask = FunctionWrapper(float)
    ValueMask = FunctionWrapper(freqwatcher)
 
    def __init__(self, data=None, Constraint=None, Mask=None, ValueMask=None):
        """Passes on to superclass, but adds required keys if absent.
        
        Parameters (for polymorphism with MappedDict superclass):

        data:           data to load into self
        Constraint:     only items that Constraint __contains__ are allowed
        Mask:           function applied to keys before lookup
        ValueMask:      function applied to values before addition
        """
        super(NumberFreqs, self).__init__(Constraint=Constraint, Mask=Mask, \
                ValueMask=ValueMask)
        self += data
        r = self.RequiredKeys
        if r:
            for key in r:
                if key not in self:
                    self[key] = 0.0
示例#6
0
文件: usage.py 项目: yatisht/pycogent
class AminoAcidUsage(InfoFreqs):
    """Stores counts ofthe 20 canonical amino acids."""
    Mask = FunctionWrapper(upper)
    RequiredKeys = dict.fromkeys(AminoAcids)

    def bases(self, genetic_code=SGC, codon_usage=_equal_codons):
        """Predicts most likely set of base frequencies.
        
        Optionally uses a genetic code (default: standard genetic code) and 
        codon usage (default: unbiased codon usage).
        """
        result = self.codons(genetic_code, codon_usage).bases()
        result.normalize()
        return result

    def codons(self, genetic_code=SGC, codon_usage=_equal_codons):
        """Predicts most likely set of codon frequencies.

        Optionally uses genetic_code (to figure out which codons belong
        with each amino acid), and codon_usage (to get most likely codons for 
        each amino acid). Defaults are the standard genetic code and unbiased 
        codon frequencies.
        """
        result = {}
        normalized = Freqs(self)
        normalized.normalize()
        for aa, aa_freq in normalized.items():
            curr_codons = [
                c.upper().replace('T', 'U') for c in genetic_code[aa]
            ]
            if not curr_codons:
                continue  #code might be missing some amino acids?
            curr_codon_freqs = Numbers([codon_usage[c] for c in curr_codons])
            curr_codon_freqs.normalize()
            for codon, c_freq in zip(curr_codons, curr_codon_freqs):
                result[codon] = c_freq * aa_freq
        return CodonUsage(result, self.info, genetic_code)

    def positionalBases(self, genetic_code=SGC, codon_usage=_equal_codons):
        """Predicts most likely set of positional base frequencies.

        Optionally uses a genetic code (default: standard genetic code) and
        codon usage (default: unbiased codon usage).
        """
        return self.codons(genetic_code, codon_usage).positionalBases()

    def aminoAcids(self):
        """Supports bases/positionalBases/aminoAcids/codons interface."""
        return self
示例#7
0
文件: usage.py 项目: yatisht/pycogent
class CodonUsage(CodonUsageI, InfoFreqs):
    """Stores frequencies of the 64 codons, mapped to RNA.

    This class is convenient but inefficient, since it automatically maps any
    lookups to the uppercase RNA alphabet internally. Use UnsafeBaseUsage for 
    speed when necessary.
    """

    Mask = FunctionWrapper(key_to_rna)
    RequiredKeys = RnaCodons
    BaseUsageClass = BaseUsage

    def __init__(self, data=None, Info=None, GeneticCode=None, \
        Mask=None, ValueMask=None, Constraint=None):
        """Initializes new CodonUsage with Info and frequency data.
        
        Note: Mask, ValueMask and Constraint are ignored, but must be present
        to support copy() because of the ConstrainedContainer interface.
        """
        #check if we have a sequence: if so, take it 3 bases at a time
        #this will (properly) fail on lists of tuples or anything else where
        #the items don't behave like strings.
        try:
            codons = [''.join(data[i:i + 3]) for i in xrange(0, len(data), 3)]
        except:
            codons = data
        super(CodonUsage, self).__init__(codons, Info)

        if GeneticCode:
            if isinstance(GeneticCode, GenCodeClass):
                curr_code = GeneticCode
            else:
                curr_code = GeneticCodes[GeneticCode]
        else:
            curr_code = self._default_code
        self.__dict__['GeneticCode'] = curr_code

    def __getitem__(self, key):
        """Normalizes key and treats T=U."""
        key = self.Mask(key)
        if len(key) == 2:  #pair of bases, e.g. GC for GC content
            dup = BaseUsage(self)
            dup.normalize()
            return sum([dup.get(i, 0) for i in key], 0)
        else:
            return super(CodonUsage, self).__getitem__(key)
示例#8
0
文件: usage.py 项目: yatisht/pycogent
class BaseUsage(BaseUsageI, InfoFreqs):
    """Stores frequencies of the four bases, mapped to RNA.

    This class is convenient but inefficient, since it automatically maps any
    lookups to the uppercase RNA alphabet internally. Use UnsafeBaseUsage for 
    speed when necessary.
    """

    Mask = FunctionWrapper(key_to_rna)
    RequiredKeys = dict.fromkeys(Bases)

    def __getitem__(self, key):
        """Normalizes key and treats T=U."""
        key = self.Mask(key)
        if len(key) == 2:  #pair of bases, e.g. GC for GC content
            dup = BaseUsage(self)
            dup.normalize()
            return sum([dup.get(i, 0) for i in key], 0)
        else:
            return super(BaseUsage, self).__getitem__(key)
示例#9
0
文件: usage.py 项目: yatisht/pycogent
class DinucUsage(DinucI, InfoFreqs):
    """Stores frequencies of the 16 dinucleotides, mapped to RNA.

    This class is convenient but inefficient, since it automatically maps any
    lookups to the uppercase RNA alphabet internally. Use UnsafeBaseUsage for 
    speed when necessary.
    """
    Mask = FunctionWrapper(key_to_rna)
    RequiredKeys = RnaDinucs

    def __init__(self, data=None, Info=None, Overlapping=True, \
        GeneticCode=None, Mask=None, ValueMask=None, Constraint=None):
        """Initializes new CodonUsage with Info and frequency data.
        
        Note: Mask, ValueMask and Constraint are ignored, but must be present
        to support copy() because of the ConstrainedContainer interface.
        """
        #check if we have a sequence: if so, take it 3 bases at a time
        #this will (properly) fail on lists of tuples or anything else where
        #the items don't behave like strings.
        if Mask is not None:
            self.Mask = Mask
        if ValueMask is not None:
            self.ValueMask = ValueMask
        try:
            data = self.Mask(data)
            if Overlapping == '3-1':
                range_ = range(2, len(data) - 1, 3)
            elif Overlapping:
                range_ = range(0, len(data) - 1)
            else:
                range_ = range(0, len(data) - 1, 2)
            dinucs = [''.join(data[i:i + 2]) for i in range_]
        except:
            dinucs = data
        super(DinucUsage, self).__init__(dinucs, Info)

    def __getitem__(self, key):
        """Normalizes key and treats T=U."""
        key = self.Mask(key)
        return super(DinucUsage, self).__getitem__(key)
示例#10
0
class SpansOnly(ConstrainedList):
    """List that converts elements to Spans on addition."""
    Mask = FunctionWrapper(Span)
    _constraint = ClassChecker(Span)
示例#11
0
 def __init__(self, motifset, Gap=IUPAC_gap, Missing=IUPAC_missing,\
         Gaps=None,
         Sequence=None, Ambiguities=None,
         label=None, Complements=None, Pairs=None, MWCalculator=None, \
         add_lower=False, preserve_existing_moltypes=False, \
         make_alphabet_group=False, ModelSeq=None):
     """Returns a new MolType object. Note that the parameters are in flux.
     
     Currently:
         motifset: Alphabet or sequence of items in the default
             alphabet. Does not include degenerates.
         
         Gap: default gap symbol
         
         Missing: symbol for missing data
         
         Gaps: any other symbols that should be treated as gaps (doesn't have
               to include Gap or Missing; they will be silently added)
         
         Sequence: Class for constructing sequences.
         
         Ambiguities: dict of char:tuple, doesn't include gaps (these are
             hard-coded as - and ?, and added later.
         
         label: text label, don't know what this is used for. Unnecessary?
         
         Complements: dict of symbol:symbol showing how the non-degenerate
             single characters complement each other. Used for constructing
             on the fly the complement table, incl. support for mustPair and
             canPair.
         
         Pairs: dict in which keys are pairs of symbols that can pair
             with each other, values are True (must pair) or False (might
             pair). Currently, the meaning of GU pairs as 'weak' is conflated
             with the meaning of degenerate symbol pairs (which might pair
             with each other but don't necessarily, depending on how the
             symbol is resolved). This should be refactored.
         
         MWCalculator: f(seq) -> molecular weight.
         
         add_lower: if True (default: False) adds the lowercase versions of
             everything into the alphabet. Slated for deletion.
         
         preserve_existing_moltypes: if True (default: False), does not
         set the MolType of the things added in **kwargs to self.
         
         make_alphabet_group: if True, makes an AlphabetGroup relating
         the various alphabets to one another.
         
         ModelSeq: sequence type for modeling
     
     Note on "Degenerates" versus "Ambiguities": self.Degenerates contains
     _only_ mappings for degenerate symbols, whereas self.Ambiguities
     contains mappings for both degenerate and non-degenerate symbols.
     Sometimes you want one, sometimes the other, so both are provided.
     """
     self.Gap = Gap
     self.Missing = Missing
     self.Gaps = frozenset([Gap, Missing])
     if Gaps:
         self.Gaps = self.Gaps.union(frozenset(Gaps))
     self.label = label
     #set the sequence constructor
     if Sequence is None:
         Sequence = ''.join     #safe default string constructor
     elif not preserve_existing_moltypes:
         Sequence.MolType = self
     self.Sequence = Sequence
     
     #set the ambiguities
     ambigs = {self.Missing:tuple(motifset)+(self.Gap,),self.Gap:(self.Gap,)}
     if Ambiguities:
         ambigs.update(Ambiguities)
     for c in motifset:
         ambigs[c] = (c,)
     self.Ambiguities = ambigs
     
     #set Complements -- must set before we make the alphabet group
     self.Complements = Complements or {}
     
     if make_alphabet_group: #note: must use _original_ ambiguities here
         self.Alphabets = AlphabetGroup(motifset, Ambiguities, \
             MolType=self)
         self.Alphabet = self.Alphabets.Base
     else:
         if isinstance(motifset, Enumeration):
             self.Alphabet = motifset
         elif max(len(motif) for motif in motifset) == 1:
             self.Alphabet = CharAlphabet(motifset, MolType=self)
         else:
             self.Alphabet = Alphabet(motifset, MolType=self)
     #set the other properties
     self.Degenerates = Ambiguities and Ambiguities.copy() or {}
     self.Degenerates[self.Missing] = ''.join(motifset)+self.Gap
     self.Matches = make_matches(motifset, self.Gaps, self.Degenerates)
     self.Pairs = Pairs and Pairs.copy() or {}
     self.Pairs.update(make_pairs(Pairs, motifset, self.Gaps, \
         self.Degenerates))
     self.MWCalculator = MWCalculator
     #add lowercase characters, if we're doing that
     if add_lower:
         self._add_lowercase()
     #cache various other data that make the calculations faster
     self._make_all()
     self._make_comp_table()
     # a gap can be a true gap char or a degenerate character, typically '?'
     # we therefore want to ensure consistent treatment across the definition
     # of characters as either gap or degenerate
     self.GapString = ''.join(self.Gaps)
     strict_gap = "".join(set(self.GapString) - set(self.Degenerates))
     self.stripDegenerate = FunctionWrapper(
         keep_chars(strict_gap+''.join(self.Alphabet)))
     self.stripBad = FunctionWrapper(keep_chars(''.join(self.All)))
     to_keep = set(self.Alphabet) ^ set(self.Degenerates) - set(self.Gaps)
     self.stripBadAndGaps = FunctionWrapper(keep_chars(''.join(to_keep)))
     
     #make inverse degenerates from degenerates
     #ensure that lowercase versions also exist if appropriate
     inv_degens = {}
     for key, val in self.Degenerates.items():
         inv_degens[frozenset(val)] = key.upper()
         if add_lower:
             inv_degens[frozenset(''.join(val).lower())] = key.lower()
     for m in self.Alphabet:
         inv_degens[frozenset(m)] = m
         if add_lower:
             inv_degens[frozenset(''.join(m).lower())] = m.lower()
     for m in self.Gaps:
         inv_degens[frozenset(m)] = m
     self.InverseDegenerates = inv_degens
     
     #set array type for modeling alphabets
     try:
         self.ArrayType = self.Alphabet.ArrayType
     except AttributeError:
         self.ArrayType = None
     
     #set modeling sequence
     self.ModelSeq = ModelSeq
示例#12
0
 class kp(InfoFreqs):
     Mask = FunctionWrapper(int)
     RequiredKeys = dict.fromkeys([1, 2, 3])