Python recode_sequence示例，p4.code_utils.recode_sequence Python示例

示例#1

0

显示文件

文件： alignment_recoding.py 项目： pgfoster/p4-phylogenetics

def indelizeCodons(self, aas, code="Standard"):
    """
    This method returns a copy of *self* where the codons corresponding to
    amino-acids listed in *aas* are replaced by indels.
    *code* is the Biopython name of the genetic code under which degeneracy has to be interpreted
    or a dictionary converting from codons to amino-acids.
    Default is to use the standard genetic code. Possible values for *code* are:
    %s
    """ % "\n".join(sorted(CodonTable.unambiguous_dna_by_name.keys()))
    if isinstance(code, types.StringType):
        code = getBiopythonCode(code) # defined in code_utils.py
    else:
        msg = "code must be a dictionary, or a string naming the code in Biopython."
        assert isinstance(code, dict), msg
    # Ensure the amino-acids are in lowercase.
    aas = set([aa.lower() for aa in aas])
    # Build a conversion dictionary.
    # The keys are the codons, the values their "indelized" replacements.
    cod2indels = {}
    for codon in code.keys():
        if code[codon] in aas:
            cod2indels[codon] = "---"
        else:
            cod2indels[codon] = codon
    # Make a copy of self.
    ali = self.dupe()
    for seq in ali.sequences:
        # Recode the sequence using the conversion dictionary built previously.
        # recode_sequence is defined in Codon_utils.py
        seq.sequence = recode_sequence(seq.sequence, cod2indels, code=code)
    return ali

示例#2

0

显示文件

文件： alignment_recoding.py 项目： pgfoster/p4-phylogenetics

def indelizeCodons(self, aas, code="Standard"):
    """
    This method returns a copy of *self* where the codons corresponding to
    amino-acids listed in *aas* are replaced by indels.
    *code* is the Biopython name of the genetic code under which degeneracy has to be interpreted
    or a dictionary converting from codons to amino-acids.
    Default is to use the standard genetic code. Possible values for *code* are:
    %s
    """ % "\n".join(sorted(CodonTable.unambiguous_dna_by_name.keys()))
    if isinstance(code, str):
        code = getBiopythonCode(code)  # defined in code_utils.py
    else:
        msg = "code must be a dictionary, or a string naming the code in Biopython."
        assert isinstance(code, dict), msg
    # Ensure the amino-acids are in lowercase.
    aas = set([aa.lower() for aa in aas])
    # Build a conversion dictionary.
    # The keys are the codons, the values their "indelized" replacements.
    cod2indels = {}
    for codon in code.keys():
        if code[codon] in aas:
            cod2indels[codon] = "---"
        else:
            cod2indels[codon] = codon
    # Make a copy of self.
    ali = self.dupe()
    for seq in ali.sequences:
        # Recode the sequence using the conversion dictionary built previously.
        # recode_sequence is defined in Codon_utils.py
        seq.sequence = recode_sequence(seq.sequence, cod2indels, code=code)
    return ali

示例#3

0

显示文件

文件： alignment_recoding.py 项目： pgfoster/p4-phylogenetics

def degenerate(self, code="Standard", positions=[1, 2, 3], restrict_to=[], ignore=[], sub_code=None):
    """
    This method returns a copy of *self* where the codons are replaced with degenerate versions.
    If *restrict_to* is not empty, only those codons that code amino-acids listed in *restrict_to*
    will be degenerated.
    If *ignore* is not empty, those codons that code amino-acids listed in *ignore*
    will not be degenerated.
    *positions* determines which codon positions are degenerated. By default, the whole codons are
    degenerated (if there is degeneracy of course).
    *code* is the Biopython name of the genetic code under which degeneracy has to be interpreted
    or a dictionary converting from codons to amino-acids (all in lower case).
    Default is to use the standard genetic code. Possible values for *code* are:
    %s
    *sub_code*, if provided, should be a dictionary associating amino-acids to codons
    (all in lower case). For the purpose of defining degeneracy groups, the codons present
    in *sub_code* will be considered as coding for the amino-acid defined there instead of
    the one defined by *code*. This can be used for instance to keep two distinct types of
    serine codons, with degeneracy only within each type. The codons still count as coding
    their original amino-acid with respect to the *restrict_to* and *ignore* options.
    """ % "\n".join(sorted(CodonTable.unambiguous_dna_by_name.keys()))
    if isinstance(code, types.StringType):
        code = getBiopythonCode(code) # defined in code_utils.py
    else:
        msg = "code must be a dictionary, or a string naming the code in Biopython."
        assert isinstance(code, dict), msg
    # codons belonging to different sub-groups of codons for one amino-acid
    # can be considered as coding different amino-acids
    # (sub-amino-acids of the "normal" amino-acid, for instance two types of serine)
    if sub_code is None:
        sub_code = {}
    else:
        assert isinstance(sub_code, dict), "sub_code must be a dictionary."
        sub_code = copy.copy(sub_code) # otherwise there are side effects:
                                       # the content of sub_code can be modified
                                       # in the calling context
        if any([sub_aa in code.values() for sub_aa in sub_code.values()]):
            msg = CAT(["Note that at least one sub-aminoacid provided in sub_code ",
                       "is identical to an amino-acid provided by the chosen genetic code.\n",
                       "The sub-amino-acids are:\n%s\n" % ", ".join(
                           [str(aa) for aa in sub_code.values()])])
            warnings.warn(msg)
    # Ensure the amino-acids are in lowercase.
    restrict_to = set([aa.lower() for aa in restrict_to])
    ignored_aas = set([aa.lower() for aa in ignore])
    # Find the groups of synonymous codons.
    # The keys are amino-acids, the values are lists of codons that code the amino-acid.
    aas_codons = {}
    for codon in code.keys():
        aa = code[codon]
        if not sub_code.has_key(codon):
            sub_code[codon] = aa # sub_aa will be the same as aa
        sub_aa = sub_code[codon]
        # Only consider codons that are compatible with the restriction rule, if there is one.
        if (len(restrict_to) == 0 or aa.lower() in restrict_to) and not (aa.lower() in ignored_aas) :
            #if aas_codons.has_key(aa):
            if aas_codons.has_key(sub_aa):
                #aas_codons[aa].append(codon)
                aas_codons[sub_aa].append(codon)
            else:
                #aas_codons[aa] = [codon]
                aas_codons[sub_aa] = [codon]
    # Build a conversion dictionary.
    # The keys are the codons, the values their degenerate replacements.
    cod2degen = {}
    for codons in aas_codons.values():
        # Compute degeneracy values at the 3 positions
        # The degenerate value at a position is the binary union
        # of the values of the nucleotides found at that position.
        # nuc2val and reduce_by_or are defined in code_utils.py
        degen1 = reduce_by_or([nuc2val[cod[0]] for cod in codons])
        degen2 = reduce_by_or([nuc2val[cod[1]] for cod in codons])
        degen3 = reduce_by_or([nuc2val[cod[2]] for cod in codons])
        # Compute the string representation of the resulting degenerate codon.
        # val2nuc is defined in code_utils.py
        degenerate_codon = val2nuc[degen1] + val2nuc[degen2] + val2nuc[degen3]
        # Associate this representation to all the synonymous codons it represents.
        for cod in codons:
            cod2degen[cod.lower()] = degenerate_codon.lower()
            # If restrict_to is not empty, it is likely that not all codons
            # are present in cod2degen, but the code_utils.recode_sequence function
            # will just keep those codons as is.
    # Make a copy of self.
    ali = self.dupe()
    for seq in ali.sequences:
        # Recode the sequence using the conversion dictionary built previously.
        # recode_sequence is defined in Codon_utils.py
        seq.sequence = recode_sequence(seq.sequence, cod2degen, positions, code=code)
    return ali

示例#4

0

显示文件

文件： alignment_recoding.py 项目： pgfoster/p4-phylogenetics

def degenerate(self,
               code="Standard",
               positions=[1, 2, 3],
               restrict_to=[],
               ignore=[],
               sub_code=None):
    """
    This method returns a copy of *self* where the codons are replaced with degenerate versions.
    If *restrict_to* is not empty, only those codons that code amino-acids listed in *restrict_to*
    will be degenerated.
    If *ignore* is not empty, those codons that code amino-acids listed in *ignore*
    will not be degenerated.
    *positions* determines which codon positions are degenerated. By default, the whole codons are
    degenerated (if there is degeneracy of course).
    *code* is the Biopython name of the genetic code under which degeneracy has to be interpreted
    or a dictionary converting from codons to amino-acids (all in lower case).
    Default is to use the standard genetic code. Possible values for *code* are:
    %s
    *sub_code*, if provided, should be a dictionary associating amino-acids to codons
    (all in lower case). For the purpose of defining degeneracy groups, the codons present
    in *sub_code* will be considered as coding for the amino-acid defined there instead of
    the one defined by *code*. This can be used for instance to keep two distinct types of
    serine codons, with degeneracy only within each type. The codons still count as coding
    their original amino-acid with respect to the *restrict_to* and *ignore* options.
    """ % "\n".join(sorted(CodonTable.unambiguous_dna_by_name.keys()))
    if isinstance(code, str):
        code = getBiopythonCode(code)  # defined in code_utils.py
    else:
        msg = "code must be a dictionary, or a string naming the code in Biopython."
        assert isinstance(code, dict), msg
    # codons belonging to different sub-groups of codons for one amino-acid
    # can be considered as coding different amino-acids
    # (sub-amino-acids of the "normal" amino-acid, for instance two types of serine)
    if sub_code is None:
        sub_code = {}
    else:
        assert isinstance(sub_code, dict), "sub_code must be a dictionary."
        sub_code = copy.copy(sub_code)  # otherwise there are side effects:
        # the content of sub_code can be modified
        # in the calling context
        if any([sub_aa in code.values() for sub_aa in sub_code.values()]):
            msg = CAT([
                "Note that at least one sub-aminoacid provided in sub_code ",
                "is identical to an amino-acid provided by the chosen genetic code.\n",
                "The sub-amino-acids are:\n%s\n" %
                ", ".join([str(aa) for aa in sub_code.values()])
            ])
            warnings.warn(msg)
    # Ensure the amino-acids are in lowercase.
    restrict_to = set([aa.lower() for aa in restrict_to])
    ignored_aas = set([aa.lower() for aa in ignore])
    # Find the groups of synonymous codons.
    # The keys are amino-acids, the values are lists of codons that code the amino-acid.
    aas_codons = {}
    for codon in code.keys():
        aa = code[codon]
        if codon not in sub_code:
            sub_code[codon] = aa  # sub_aa will be the same as aa
        sub_aa = sub_code[codon]
        # Only consider codons that are compatible with the restriction rule, if there is one.
        if (len(restrict_to) == 0 or aa.lower()
                in restrict_to) and not (aa.lower() in ignored_aas):
            #if aa in aas_codons:
            if sub_aa in aas_codons:
                #aas_codons[aa].append(codon)
                aas_codons[sub_aa].append(codon)
            else:
                #aas_codons[aa] = [codon]
                aas_codons[sub_aa] = [codon]
    # Build a conversion dictionary.
    # The keys are the codons, the values their degenerate replacements.
    cod2degen = {}
    for codons in aas_codons.values():
        # Compute degeneracy values at the 3 positions
        # The degenerate value at a position is the binary union
        # of the values of the nucleotides found at that position.
        # nuc2val and reduce_by_or are defined in code_utils.py
        degen1 = reduce_by_or([nuc2val[cod[0]] for cod in codons])
        degen2 = reduce_by_or([nuc2val[cod[1]] for cod in codons])
        degen3 = reduce_by_or([nuc2val[cod[2]] for cod in codons])
        # Compute the string representation of the resulting degenerate codon.
        # val2nuc is defined in code_utils.py
        degenerate_codon = val2nuc[degen1] + val2nuc[degen2] + val2nuc[degen3]
        # Associate this representation to all the synonymous codons it represents.
        for cod in codons:
            cod2degen[cod.lower()] = degenerate_codon.lower()
            # If restrict_to is not empty, it is likely that not all codons
            # are present in cod2degen, but the code_utils.recode_sequence function
            # will just keep those codons as is.
    # Make a copy of self.
    ali = self.dupe()
    for seq in ali.sequences:
        # Recode the sequence using the conversion dictionary built previously.
        # recode_sequence is defined in Codon_utils.py
        seq.sequence = recode_sequence(seq.sequence,
                                       cod2degen,
                                       positions,
                                       code=code)
    return ali