Exemplos de Alphabet em Python, exemplos de Bio.Alphabet.Alphabet em Python

Exemplo n.º 1

0

Exibir arquivo

def createAlphabet(align=False, transSeq=False, extendAlphabet=False, protein=False):
    if not transSeq and not protein:
        alphabet = Alphabet.DNAAlphabet()
        alphabet.letters = "ACGT" if not extendAlphabet else "ACGTN"
    else:
        alphabet = Alphabet.ProteinAlphabet()
        alphabet.letters = IUPACProtein().letters + ('*' if not extendAlphabet else '*X')
    if align:
        alphabet.letters += '-'
    return alphabet

Exemplo n.º 2

0

Exibir arquivo

Arquivo: baum_welch_trainer.py Projeto: kuaka/igc_lib

def get_flying_sequence(flight):
    state_alphabet = Alphabet()
    state_alphabet.letters = list("fs")
    emissions_alphabet = Alphabet()
    emissions_alphabet.letters = list("FS")

    emissions = []
    for x in flight._flying_emissions():
        if x == 1:
            emissions.append("F")
        else:
            emissions.append("S")
    emissions = Seq("".join(emissions), emissions_alphabet)
    empty_states = Seq("", state_alphabet)
    return TrainingSequence(emissions, empty_states)

Exemplo n.º 3

0

Exibir arquivo

def as_biopython_seq(seq):
    if isinstance(seq, Seq):
        return seq
    elif isinstance(seq, str):
        return Seq(seq, Alphabet())
    else:
        raise Exception('Cannot resolve type %s as Biopython Seq' % type(seq))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: baum_welch_trainer.py Projeto: kuaka/igc_lib

def initial_markov_model_flying():
    state_alphabet = Alphabet()
    state_alphabet.letters = list("fs")
    emissions_alphabet = Alphabet()
    emissions_alphabet.letters = list("FS")

    mmb = MarkovModelBuilder(state_alphabet, emissions_alphabet)
    mmb.set_initial_probabilities({'f': 0.20, 's': 0.80})
    mmb.allow_all_transitions()
    mmb.set_transition_score('f', 'f', 0.99)
    mmb.set_transition_score('f', 's', 0.01)
    mmb.set_transition_score('s', 'f', 0.01)
    mmb.set_transition_score('s', 's', 0.99)
    mmb.set_emission_score('f', 'F', 0.90)
    mmb.set_emission_score('f', 'S', 0.10)
    mmb.set_emission_score('s', 'F', 0.10)
    mmb.set_emission_score('s', 'S', 0.90)
    mm = mmb.get_markov_model()
    return mm

Exemplo n.º 5

0

Exibir arquivo

Arquivo: seqs.py Projeto: JoseBlanca/franklin

    def complement(self):
        """Returns the complement sequence. New Seq object.

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import IUPAC
        >>> my_dna = Seq("CCCCCGATAG", IUPAC.unambiguous_dna)
        >>> my_dna
        Seq('CCCCCGATAG', IUPACUnambiguousDNA())
        >>> my_dna.complement()
        Seq('GGGGGCTATC', IUPACUnambiguousDNA())

        You can of course used mixed case sequences,

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import generic_dna
        >>> my_dna = Seq("CCCCCgatA-GD", generic_dna)
        >>> my_dna
        Seq('CCCCCgatA-GD', DNAAlphabet())
        >>> my_dna.complement()
        Seq('GGGGGctaT-CH', DNAAlphabet())

        Note in the above example, ambiguous character D denotes
        G, A or T so its complement is H (for C, T or A).

        Trying to complement a protein sequence raises an exception.

        >>> my_protein = Seq("MAIVMGR", IUPAC.protein)
        >>> my_protein.complement()
        Traceback (most recent call last):
           ...
        ValueError: Proteins do not have complements!
        """
        base = Alphabet._get_base_alphabet(self.alphabet)
        if isinstance(base, Alphabet.ProteinAlphabet):
            raise ValueError("Proteins do not have complements!")
        if isinstance(base, Alphabet.DNAAlphabet):
            ttable = _dna_complement_table
        elif isinstance(base, Alphabet.RNAAlphabet):
            ttable = _rna_complement_table
        elif ("U" in self._data or "u" in self._data) and ("T" in self._data or "t" in self._data):
            # TODO - Handle this cleanly?
            raise ValueError("Mixed RNA/DNA found")
        elif "U" in self._data or "u" in self._data:
            ttable = _rna_complement_table
        else:
            ttable = _dna_complement_table
        # Much faster on really long sequences than the previous loop based one.
        # thx to Michael Palmer, University of Waterloo
        return self.__class__(str(self).translate(ttable), self.alphabet)

Exemplo n.º 6

0

Exibir arquivo

def get_flying_sequence(flight):
    state_alphabet = Alphabet()
    state_alphabet.letters = list("fs")
    emissions_alphabet = Alphabet()
    emissions_alphabet.letters = list("FS")

    emissions = []
    for x in flight._flying_emissions():
        if x == 1:
            emissions.append("F")
        else:
            emissions.append("S")
    emissions = Seq("".join(emissions), emissions_alphabet)
    empty_states = Seq("", state_alphabet)
    return TrainingSequence(emissions, empty_states)

Exemplo n.º 7

0

Exibir arquivo

def open_fasta(file_path, ab=Alphabet()):
    """
    :param file_path: file of fasta file
    :param *alphabet: optional, alphabet used for sequences
                    "dna" or "protein"
    :type path_aln: String
    :type *alphabet: String

    :return: bioseq, seq_id list of sequence id
    :rtype: list of BioPython sequences, list of String

    :exceptions:
        FileNotFoundError if file could not be opened
        raises FileNotFoundError
    
        other exception:
        raises general Exception
                (used for tracing in the main function)

    .. note:: requires BioPython library
    """
    bioseq = []
    seq_id = []

    try:
        for seq_record in SeqIO.parse(file_path, "fasta", alphabet=ab):
            bioseq.append(seq_record)
            seq_id.append(seq_record.id)
    except FileNotFoundError as not_found_error:
        raise FileNotFoundError("Error while processing open_fasta function.\
                                 \nFile not found error. {} ".format(
            not_found_error))
    except err:
        raise Exception("Error while processing open_fasta function.\
                        \nUnable to read fasta file: {}. \nERROR: {} "\
                        .format(file_path, err))

    return bioseq, seq_id

Exemplo n.º 8

0

Exibir arquivo

def initial_markov_model_flying():
    state_alphabet = Alphabet()
    state_alphabet.letters = list("fs")
    emissions_alphabet = Alphabet()
    emissions_alphabet.letters = list("FS")

    mmb = MarkovModelBuilder(state_alphabet, emissions_alphabet)
    mmb.set_initial_probabilities({'f': 0.20, 's': 0.80})
    mmb.allow_all_transitions()
    mmb.set_transition_score('f', 'f', 0.99)
    mmb.set_transition_score('f', 's', 0.01)
    mmb.set_transition_score('s', 'f', 0.01)
    mmb.set_transition_score('s', 's', 0.99)
    mmb.set_emission_score('f', 'F', 0.90)
    mmb.set_emission_score('f', 'S', 0.10)
    mmb.set_emission_score('s', 'F', 0.10)
    mmb.set_emission_score('s', 'S', 0.90)
    mm = mmb.get_markov_model()
    return mm

Exemplo n.º 9

0

Exibir arquivo

Arquivo: headers_to_bed.py Projeto: peritob/Myrtaceae_NLR_workflow

headerFileName = sys.argv[1]
extend = int(sys.argv[2])
fastaFileName = sys.argv[3]
bedFileName = sys.argv[4]

with open(headerFileName) as headerFile:
   # read the header file data into a list of tuples
   headerList = [(m.group(1), int(m.group(2)), int(m.group(3)), m.group(4)) for m in [re.search(">([^:]*):(\d*)-(\d*)\(([+-])\)", line) for line in headerFile]]
   # build a set of all the seq ids
   headerIdSet = {t[0] for t in headerList}
   # now build a dictionary with the id as the key and the list of related tuples as the value (leave the id out of the tuple)
   headerDict = {id:sorted([(t[1], t[2], t[3]) for t in headerList if t[0] == id], key=lambda t:t[0]) for id in headerIdSet}

# the only thing we need from the fasta file is the sequence lengths
with open(fastaFileName, "r") as fastaFile:
   for seqRec in SeqIO.parse(fastaFile, "fasta", Alphabet()):
      if seqRec.id in headerDict:
         seqLen = len(seqRec.seq)
         tupleList = headerDict[seqRec.id]
         for i, t in enumerate(tupleList):
            # special case - only one value in the list
            if len(tupleList) == 1:
               tupleList[0] = (max(t[0] - extend, 0), min(t[1] + extend, seqLen), t[2])
               continue
            # special case - first value but more than one in the list
            if i == 0:
               nextTuple = tupleList[1] 
               tupleList[0] = (max(t[0] - extend, 0), min(t[1] + extend, nextTuple[0] - 1), t[2])
               continue
            # special case - last value but more than one in the list
            if i == len(tupleList) - 1:

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ex30.py Projeto: dushabella/Introduction_to_Bioinformatics

"""**Elia's solution**"""

#INPUT PHASE

lines = []

with open('input.txt','r') as handle:
    for l in handle.readlines():
        lines.append(l)

iterations = int(lines[0])

x = lines[2].strip()

alphabet = lines[4].split()
emission_alphabet = Alphabet()
emission_alphabet.size = 1
emission_alphabet.letters = alphabet

states = lines[6].split()
states_alphabet = Alphabet()
states_alphabet.size = 1
states_alphabet.letters = states

transition_probs = np.zeros((len(states),len(states)))

for i in range(len(states)):
    prob = lines[9+i].split()
    for j in range(len(states)):
        put = prob[1+j]
        transition_probs[i][j] = float(put)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: primer_design.py Projeto: Jian-Zhan/pydna

def cloning_primers(template,
                    minlength=16,
                    maxlength=29,
                    fp=None,
                    rp=None,
                    fp_tail='',
                    rp_tail='',
                    target_tm=55.0,
                    primerc=1000.0,
                    saltc=50.0,
                    formula=tmbresluc):
    '''This function can design primers for PCR amplification of a given sequence.
    This function accepts a Dseqrecord object containing the template sequence and
    returns a tuple cntaining two ::mod`Bio.SeqRecord.SeqRecord` objects describing
    the primers.

    Primer tails can optionally be given in the form of strings.

    An predesigned primer can be given, either the forward or reverse primers. In this
    case this function tries to design a primer with a Tm to match the given primer.


    Parameters
    ----------

    template : Dseqrecord
        a Dseqrecord object.

    minlength : int, optional
        Minimum length of the annealing part of the primer

    maxlength : int, optional
        Maximum length (including tail) for designed primers.

    fp, rp : SeqRecord, optional
        optional Biopython SeqRecord objects containing one primer each.

    fp_tail, rp_tail : string, optional
        optional tails to be added to the forwars or reverse primers

    target_tm : float, optional
        target tm for the primers

    primerc : float, optional
        Concentration of each primer in nM, set to 1000.0 nM by default

    saltc  : float, optional
        Salt concentration (monovalet cations) :mod:`tmbresluc` set to 50.0 mM by default

    formula : function
        formula used for tm calculation
        this is the name of a function.
        built in options are:

        1. :func:`pydna.amplify.tmbresluc` (default)
        2. :func:`pydna.amplify.basictm`
        3. :func:`pydna.amplify.tmstaluc98`
        4. :func:`pydna.amplify.tmbreslauer86`

        These functions are imported from the :mod:`pydna.amplify` module, but can be
        substituted for some other custom made function.

    Returns
    -------
    fp, rp : tuple
        fp is a :mod:Bio.SeqRecord object describing the forward primer
        rp is a :mod:Bio.SeqRecord object describing the reverse primer



    Examples
    --------

    >>> import pydna
    >>> t=pydna.Dseqrecord("atgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatg")
    >>> t
    Dseqrecord(-64)
    >>> pf,pr = pydna.cloning_primers(t)
    >>> pf
    Primer(seq=Seq('atgactgctaacccttc', IUPACAmbiguousDNA()), id='pfw64', name='pfw64', description='pfw64', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('catcgtaagtttcgaac', IUPACAmbiguousDNA()), id='prv64', name='prv64', description='prv64', dbxrefs=[])
    >>> pcr_prod = pydna.pcr(pf, pr, t)
    >>> pcr_prod
    Amplicon(64)
    >>>
    >>> print pcr_prod.figure()
    5atgactgctaacccttc...gttcgaaacttacgatg3
                         ||||||||||||||||| tm 42.4 (dbd) 52.9
                        3caagctttgaatgctac5
    5atgactgctaacccttc3
     ||||||||||||||||| tm 44.5 (dbd) 54.0
    3tactgacgattgggaag...caagctttgaatgctac5
    >>> pf,pr = pydna.cloning_primers(t, fp_tail="GGATCC", rp_tail="GAATTC")
    >>> pf
    Primer(seq=Seq('GGATCCatgactgctaacccttc', IUPACAmbiguousDNA()), id='pfw64', name='pfw64', description='pfw64', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('GAATTCcatcgtaagtttcgaac', IUPACAmbiguousDNA()), id='prv64', name='prv64', description='prv64', dbxrefs=[])
    >>> pcr_prod = pydna.pcr(pf, pr, t)
    >>> print pcr_prod.figure()
          5atgactgctaacccttc...gttcgaaacttacgatg3
                               ||||||||||||||||| tm 42.4 (dbd) 52.9
                              3caagctttgaatgctacCTTAAG5
    5GGATCCatgactgctaacccttc3
           ||||||||||||||||| tm 44.5 (dbd) 54.0
          3tactgacgattgggaag...caagctttgaatgctac5
    >>> print pcr_prod.seq
    GGATCCatgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatgGAATTC
    >>>
    >>> from Bio.Seq import Seq
    >>> from Bio.SeqRecord import SeqRecord
    >>> pf = SeqRecord(Seq("atgactgctaacccttccttggtgttg"))
    >>> pf,pr = pydna.cloning_primers(t, fp = pf, fp_tail="GGATCC", rp_tail="GAATTC")
    >>> pf
    Primer(seq=Seq('GGATCCatgactgctaacccttccttggtgttg', Alphabet()), id='pfw64', name='pfw64', description='pfw64', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('GAATTCcatcgtaagtttcgaacgaaatgtcgtc', IUPACAmbiguousDNA()), id='prv64', name='prv64', description='prv64', dbxrefs=[])
    >>> ampl = pydna.pcr(pf,pr,t)
    >>> print ampl.figure()
          5atgactgctaacccttccttggtgttg...gacgacatttcgttcgaaacttacgatg3
                                         |||||||||||||||||||||||||||| tm 57.5 (dbd) 72.2
                                        3ctgctgtaaagcaagctttgaatgctacCTTAAG5
    5GGATCCatgactgctaacccttccttggtgttg3
           ||||||||||||||||||||||||||| tm 59.0 (dbd) 72.3
          3tactgacgattgggaaggaaccacaac...ctgctgtaaagcaagctttgaatgctac5
    >>>


    '''

    if fp and not rp:
        fp = Primer(Seq(fp_tail, IUPACAmbiguousDNA())) + fp
        p = Anneal([fp], template).fwd_primers.pop()
        fp = Primer(p.footprint)
        fp_tail = Primer(p.tail)
        rp = Primer(
            Seq(
                str(template[-(maxlength * 3 -
                               len(rp_tail)):].reverse_complement().seq),
                IUPACAmbiguousDNA()))
        target_tm = formula(str(fp.seq).upper(), primerc=primerc, saltc=saltc)
    elif not fp and rp:
        rp = Primer(Seq(rp_tail, IUPACAmbiguousDNA())) + rp
        p = Anneal([rp], template).rev_primers.pop()
        rp = Primer(p.footprint)
        rp_tail = Primer(p.tail)
        fp = Primer(
            Seq(str(template[:maxlength * 3 - len(fp_tail)].seq),
                IUPACAmbiguousDNA()))
        target_tm = formula(str(rp.seq).upper(), primerc=primerc, saltc=saltc)
    elif not fp and not rp:
        fp = Primer(
            Seq(str(template[:maxlength - len(fp_tail)].seq),
                IUPACAmbiguousDNA()))
        rp = Primer(
            Seq(
                str(template[-maxlength +
                             len(rp_tail):].reverse_complement().seq),
                IUPACAmbiguousDNA()))
    else:
        raise Exception("Specify one or none of the primers, not both.")

    lowtm, hightm = sorted([(formula(str(fp.seq), primerc, saltc), fp, "f"),
                            (formula(str(rp.seq), primerc, saltc), rp, "r")])

    while lowtm[0] > target_tm and len(lowtm[1]) > minlength:
        shorter = lowtm[1][:-1]
        tm = formula(str(shorter.seq).upper(), primerc=primerc, saltc=saltc)
        lowtm = (tm, shorter, lowtm[2])

    while hightm[0] > lowtm[0] + 2.0 and len(hightm[1]) > minlength:
        shorter = hightm[1][:-1]
        tm = formula(str(shorter.seq).upper(), primerc=primerc, saltc=saltc)
        hightm = (tm, shorter, hightm[2])

    fp, rp = sorted((lowtm, hightm), key=itemgetter(2))

    fp = fp_tail + fp[1]
    rp = rp_tail + rp[1]

    fp.description = "pfw{}".format(len(template))
    rp.description = "prv{}".format(len(template))

    fp.name = fp.description[:15]
    rp.name = rp.description[:15]

    fp.id = fp.name
    rp.id = rp.name

    #assert minlength<=len(fp)<=maxlength
    #assert minlength<=len(rp)<=maxlength

    if fp.seq.alphabet == Alphabet():
        fp.seq.alphabet = IUPACAmbiguousDNA()
    if rp.seq.alphabet == Alphabet():
        rp.seq.alphabet = IUPACAmbiguousDNA()

    return fp, rp

Exemplo n.º 12

0

Exibir arquivo

Arquivo: primer_design.py Projeto: Jian-Zhan/pydna

 def __init__(self, seq, *args, **kwargs):
     if seq.alphabet == Alphabet():
         seq.alphabet = IUPACAmbiguousDNA()
     super(Primer, self).__init__(seq, *args, **kwargs)

Exemplo n.º 13

0

Exibir arquivo

def cloning_primers(template,
                    minlength=16,
                    maxlength=29,
                    fp=None,
                    rp=None,
                    fp_tail='',
                    rp_tail='',
                    target_tm=55.0,
                    primerc=1000.0,
                    saltc=50.0,
                    formula=tmbresluc,
                    path=u""):
    '''This function can design primers for PCR amplification of a given sequence.
    This function accepts a Dseqrecord object containing the template sequence and
    returns a tuple cntaining two ::mod`Bio.SeqRecord.SeqRecord` objects describing
    the primers.

    Primer tails can optionally be given in the form of strings.

    An predesigned primer can be given, either the forward or reverse primers. In this
    case this function tries to design a primer with a Tm to match the given primer.


    Parameters
    ----------

    template : Dseqrecord
        a Dseqrecord object.

    minlength : int, optional
        Minimum length of the annealing part of the primer

    maxlength : int, optional
        Maximum length (including tail) for designed primers.

    fp, rp : SeqRecord, optional
        optional Biopython SeqRecord objects containing one primer each.

    fp_tail, rp_tail : string, optional
        optional tails to be added to the forwars or reverse primers

    target_tm : float, optional
        target tm for the primers

    primerc : float, optional
        Concentration of each primer in nM, set to 1000.0 nM by default

    saltc  : float, optional
        Salt concentration (monovalet cations) :mod:`tmbresluc` set to 50.0 mM by default

    formula : function
        formula used for tm calculation
        this is the name of a function.
        built in options are:

        1. :func:`pydna.amplify.tmbresluc` (default)
        2. :func:`pydna.amplify.basictm`
        3. :func:`pydna.amplify.tmstaluc98`
        4. :func:`pydna.amplify.tmbreslauer86`

        These functions are imported from the :mod:`pydna.amplify` module, but can be
        substituted for some other custom made function.

    path : unicode, optional
        This variable can be set to a path to a text file, which will be created
        if it does not exist.
        This file (if it exists) will be parsed for sequences in fasta or
        genbank format and a Biopython SeqRecord object will be created for
        each sequence.

        If a SeqRecord object is found with the same description as any of the
        primers designed, the SeqRecord object parsed from the file will be
        returned by this function instead of the newly designed primer.

        If no sequence with the same description can be found, the primer(s)
        will be appended to the file in fasta format.


    Returns
    -------
    fp, rp : tuple
        fp is a :mod:Bio.SeqRecord object describing the forward primer
        rp is a :mod:Bio.SeqRecord object describing the reverse primer



    Examples
    --------

    >>> import pydna
    >>> t=pydna.Dseqrecord("atgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatg")
    >>> t
    Dseqrecord(-64)
    >>> pf,pr = pydna.cloning_primers(t)
    >>> pf
    Primer(seq=Seq('atgactgctaacccttc', IUPACAmbiguousDNA()), id='fw64', name='fw64', description='fw64 -', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('catcgtaagtttcgaac', IUPACAmbiguousDNA()), id='rv64', name='rv64', description='rv64 -', dbxrefs=[])
    >>> pcr_prod = pydna.pcr(pf, pr, t)
    >>> pcr_prod
    Amplicon(64)
    >>>
    >>> print(pcr_prod.figure())
    5atgactgctaacccttc...gttcgaaacttacgatg3
                         ||||||||||||||||| tm 42.4 (dbd) 52.9
                        3caagctttgaatgctac5
    5atgactgctaacccttc3
     ||||||||||||||||| tm 44.5 (dbd) 54.0
    3tactgacgattgggaag...caagctttgaatgctac5
    >>> pf,pr = pydna.cloning_primers(t, fp_tail="GGATCC", rp_tail="GAATTC")
    >>> pf
    Primer(seq=Seq('GGATCCatgactgctaacccttc', IUPACAmbiguousDNA()), id='fw64', name='fw64', description='fw64 -', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('GAATTCcatcgtaagtttcgaac', IUPACAmbiguousDNA()), id='rv64', name='rv64', description='rv64 -', dbxrefs=[])
    >>> pcr_prod = pydna.pcr(pf, pr, t)
    >>> print(pcr_prod.figure())
          5atgactgctaacccttc...gttcgaaacttacgatg3
                               ||||||||||||||||| tm 42.4 (dbd) 52.9
                              3caagctttgaatgctacCTTAAG5
    5GGATCCatgactgctaacccttc3
           ||||||||||||||||| tm 44.5 (dbd) 54.0
          3tactgacgattgggaag...caagctttgaatgctac5
    >>> print(pcr_prod.seq)
    GGATCCatgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatgGAATTC
    >>>
    >>> from Bio.Seq import Seq
    >>> from Bio.SeqRecord import SeqRecord
    >>> pf = SeqRecord(Seq("atgactgctaacccttccttggtgttg"))
    >>> pf,pr = pydna.cloning_primers(t, fp = pf, fp_tail="GGATCC", rp_tail="GAATTC")
    >>> pf
    Primer(seq=Seq('GGATCCatgactgctaacccttccttggtgttg', Alphabet()), id='fw64', name='fw64', description='fw64 -', dbxrefs=[])
    >>> pr
    Primer(seq=Seq('GAATTCcatcgtaagtttcgaacgaaatgtcgtc', IUPACAmbiguousDNA()), id='rv64', name='rv64', description='rv64 -', dbxrefs=[])
    >>> ampl = pydna.pcr(pf,pr,t)
    >>> print(ampl.figure())
          5atgactgctaacccttccttggtgttg...gacgacatttcgttcgaaacttacgatg3
                                         |||||||||||||||||||||||||||| tm 57.5 (dbd) 72.2
                                        3ctgctgtaaagcaagctttgaatgctacCTTAAG5
    5GGATCCatgactgctaacccttccttggtgttg3
           ||||||||||||||||||||||||||| tm 59.0 (dbd) 72.3
          3tactgacgattgggaaggaaccacaac...ctgctgtaaagcaagctttgaatgctac5
    >>>


    '''

    if fp and not rp:
        fp = Primer(Seq(fp_tail, IUPACAmbiguousDNA())) + fp
        p = Anneal([fp], template).fwd_primers.pop()
        fp = Primer(p.footprint)
        fp_tail = Primer(p.tail)
        rp = Primer(
            Seq(
                str(template[-(maxlength * 3 -
                               len(rp_tail)):].reverse_complement().seq),
                IUPACAmbiguousDNA()))
        target_tm = formula(str(fp.seq).upper(), primerc=primerc, saltc=saltc)
    elif not fp and rp:
        rp = Primer(Seq(rp_tail, IUPACAmbiguousDNA())) + rp
        p = Anneal([rp], template).rev_primers.pop()
        rp = Primer(p.footprint)
        rp_tail = Primer(p.tail)
        fp = Primer(
            Seq(str(template[:maxlength * 3 - len(fp_tail)].seq),
                IUPACAmbiguousDNA()))
        target_tm = formula(str(rp.seq).upper(), primerc=primerc, saltc=saltc)
    elif not fp and not rp:
        fp = Primer(
            Seq(str(template[:maxlength - len(fp_tail)].seq),
                IUPACAmbiguousDNA()))
        rp = Primer(
            Seq(
                str(template[-maxlength +
                             len(rp_tail):].reverse_complement().seq),
                IUPACAmbiguousDNA()))
    else:
        raise Exception("Specify maximum one of the two primers, not both.")

    lowtm, hightm = sorted([(formula(str(fp.seq), primerc, saltc), fp, "f"),
                            (formula(str(rp.seq), primerc, saltc), rp, "r")])

    while lowtm[0] > target_tm and len(lowtm[1]) > minlength:
        shorter = lowtm[1][:-1]
        tm = formula(str(shorter.seq).upper(), primerc=primerc, saltc=saltc)
        lowtm = (tm, shorter, lowtm[2])

    while hightm[0] > lowtm[0] + 2.0 and len(hightm[1]) > minlength:
        shorter = hightm[1][:-1]
        tm = formula(str(shorter.seq).upper(), primerc=primerc, saltc=saltc)
        hightm = (tm, shorter, hightm[2])

    fp, rp = sorted((lowtm, hightm), key=itemgetter(2))

    fp = fp_tail + fp[1]
    rp = rp_tail + rp[1]

    #fp.description = "fw{}".format(len(template))
    #rp.description = "rv{}".format(len(template))

    #fp.name = "fw{}".format(len(template))[:15]
    #rp.name = "rv{}".format(len(template))[:15]

    fp.description = "fw{}".format(len(template)) + ' ' + template.accession
    rp.description = "rv{}".format(len(template)) + ' ' + template.accession

    fp.id = "fw{}".format(len(template))
    rp.id = "rv{}".format(len(template))

    fp.name = fp.id
    rp.name = rp.id

    if fp.seq.alphabet == Alphabet():
        fp.seq.alphabet = IUPACAmbiguousDNA()
    if rp.seq.alphabet == Alphabet():
        rp.seq.alphabet = IUPACAmbiguousDNA()


#    If the path argument is supplied primers will be written to a file with that
#    path. If the file does not exist, it will be created and both primers will be
#    written to it. If the file exists, the file will be parsed for sequences in
#    fasta or genbank into .

    if path:
        try:
            with open(path, 'rU') as f:
                raw = f.read()
        except IOError:
            raw = u""
            with open(path, 'w') as f:
                f.write(fp.format("fasta"))
                f.write(rp.format("fasta"))
        else:
            primer_dict = {x.description: x for x in parse(raw, ds=False)}
            try:
                fp = primer_dict[fp.description]
            except KeyError:
                with open(path, 'a') as f:
                    f.write(u"\n" + fp.format("fasta").strip())
            try:
                rp = primer_dict[rp.description]
            except KeyError:
                with open(path, 'a') as f:
                    f.write(u"\n" + rp.format("fasta").strip())
    return fp, rp

Exemplo n.º 14

0

Exibir arquivo

Arquivo: flag_fasta_target_regions.py Projeto: huddlej/fasta_tools


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("fasta")
    parser.add_argument("slop", type=int)
    parser.add_argument("flagged")
    parser.add_argument("--primer3", action="store_true", help="output in primer3-compatible input format")
    parser.add_argument("--min_primer_size", type=int, default=18)
    parser.add_argument("--opt_primer_size", type=int, default=20)
    parser.add_argument("--max_primer_size", type=int, default=27)
    parser.add_argument("--max_primers_to_return", type=int, default=5)
    args = parser.parse_args()

    slop = args.slop
    alphabet = Alphabet()
    alphabet.letters = ["A", "T", "C", "G", "[", "]"]

    with open(args.flagged, "w") as fh:
        for seq_record in SeqIO.parse(args.fasta, "fasta"):
            if args.primer3:
                sequence = str(seq_record.seq)
                fh.write("SEQUENCE_ID=%s\n" % seq_record.id)
                fh.write("SEQUENCE_TEMPLATE=%s\n" % sequence)
                fh.write("SEQUENCE_TARGET=%i,%i\n" % (slop, len(sequence) - 2 * slop))
                fh.write("PRIMER_OPT_SIZE=%i\n" % args.opt_primer_size)
                fh.write("PRIMER_MIN_SIZE=%i\n" % args.min_primer_size)
                fh.write("PRIMER_MAX_SIZE=%i\n" % args.max_primer_size)
                fh.write("PRIMER_NUM_RETURN=%i\n" % args.max_primers_to_return)
                fh.write("=\n")
            else: