Exemplos de DNA em Python, exemplos de coral.DNA em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: needle.py Projeto: vuthanhcraz/coral

def needle(reference, query, gap_open=-15, gap_extend=0,
           matrix=submat.DNA_SIMPLE):
    '''Do a Needleman-Wunsch alignment.

    :param reference: Reference sequence.
    :type reference: coral.DNA
    :param query: Sequence to align against the reference.
    :type query: coral.DNA
    :param gapopen: Penalty for opening a gap.
    :type gapopen: float
    :param gapextend: Penalty for extending a gap.
    :type gapextend: float
    :param matrix: Matrix to use for alignment - options are DNA_simple (for
                   DNA) and BLOSUM62 (for proteins).
    :type matrix: str
    :returns: (aligned reference, aligned query, score)
    :rtype: tuple of two coral.DNA instances and a float

    '''
    # Align using cython Needleman-Wunsch
    aligned_ref, aligned_res = aligner(str(reference),
                                       str(query),
                                       gap_open=gap_open,
                                       gap_extend=gap_extend,
                                       method='global_cfe',
                                       matrix=matrix.matrix,
                                       alphabet=matrix.alphabet)

    # Score the alignment
    score = score_alignment(aligned_ref, aligned_res, gap_open, gap_extend,
                            matrix.matrix, matrix.alphabet)

    return cr.DNA(aligned_ref), cr.DNA(aligned_res), score

Exemplo n.º 2

0

Exibir arquivo

Arquivo: needle.py Projeto: vuthanhcraz/coral

def needle_msa(reference, results, gap_open=-15, gap_extend=0,
               matrix=submat.DNA_SIMPLE):
    '''Create a multiple sequence alignment based on aligning every result
    sequence against the reference, then inserting gaps until every aligned
    reference is identical

    '''
    gap = '-'
    # Convert alignments to list of strings
    alignments = []
    for result in results:
        ref_dna, res_dna, score = needle(reference, result, gap_open=gap_open,
                                         gap_extend=gap_extend,
                                         matrix=matrix)
        alignments.append([str(ref_dna), str(res_dna), score])

    def insert_gap(sequence, position):
        return sequence[:position] + gap + sequence[position:]

    i = 0
    while True:
        # Iterate over 'columns' in every reference
        refs = [alignment[0][i] for alignment in alignments]

        # If there's a non-unanimous gap, insert gap into alignments
        gaps = [ref == gap for ref in refs]
        if any(gaps) and not all(gaps):
            for alignment in alignments:
                if alignment[0][i] != gap:
                    alignment[0] = insert_gap(alignment[0], i)
                    alignment[1] = insert_gap(alignment[1], i)

        # If all references match, we're all done
        alignment_set = set(alignment[0] for alignment in alignments)
        if len(alignment_set) == 1:
            break

        # If we've reach the end of some, but not all sequences, add end gap
        lens = [len(alignment[0]) for alignment in alignments]
        if i + 1 in lens:
            for alignment in alignments:
                if len(alignment[0]) == i + 1:
                    alignment[0] = alignment[0] + gap
                    alignment[1] = alignment[1] + gap

        i += 1

        if i > 20:
            break

    # Convert into MSA format
    output_alignment = [cr.DNA(alignments[0][0])]
    for alignment in alignments:
        output_alignment.append(cr.DNA(alignment[1]))

    return output_alignment

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_pcr.py Projeto: vuthanhcraz/coral

 def test_overhang(self):
     '''Tests that primer overhangs are added correctly to the amplicon.'''
     template = self.template[30:-30]
     fwd_overhang = cr.DNA('AGCGGGGGGGGGCTGGGGCTGAT')
     rev_overhang = cr.DNA('GGGTGGGGGGGGGGGGGGG')
     fwd = cr.design.primer(template, overhang=fwd_overhang)
     rev = cr.design.primer(template.reverse_complement(),
                            overhang=rev_overhang)
     expected = (fwd_overhang + template +
                 rev_overhang.reverse_complement())
     self.pcr_equal(expected, self.template, fwd, rev)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_pcr.py Projeto: vuthanhcraz/coral

 def __init__(self):
     # Part BBa_R0010 (pLac promoter)
     bba_r0010 = ('caatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcag'
                  'ctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgca'
                  'attaatgtgagttagctcactcattaggcaccccaggctttacacttta'
                  'tgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttca'
                  'caca')
     self.template = cr.DNA(bba_r0010, circular=False)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: random_sequences.py Projeto: vuthanhcraz/coral

def random_dna(n):
    '''Generate a random DNA sequence.

    :param n: Output sequence length.
    :type n: int
    :returns: Random DNA sequence of length n.
    :rtype: coral.DNA

    '''
    return coral.DNA(''.join([random.choice('ATGC') for i in range(n)]))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: _resect.py Projeto: klavinslab/coral

    def count_end_gaps(seq):
        gap = coral.DNA('-')
        count = 0
        for base in seq:
            if base == gap:
                count += 1
            else:
                break

        return count

Exemplo n.º 7

0

Exibir arquivo

def test_primer_larger_than_template():
    template = cr.design.random_dna(50)
    overhangs = [cr.design.random_dna(200), cr.DNA('')]
    expected = overhangs[0] + template
    primer1, primer2 = cr.design.primers(template,
                                         overhangs=overhangs,
                                         min_len=14)
    amplicon = cr.reaction.pcr(template, primer1, primer2)

    assert_true(expected == amplicon)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: mafft.py Projeto: vuthanhcraz/coral

def MAFFT(sequences, gap_open=1.53, gap_extension=0.0, retree=2):
    '''A Coral wrapper for the MAFFT command line multiple sequence aligner.

    :param sequences: A list of sequences to align.
    :type sequences: List of homogeneous sequences (all DNA, or all RNA,
                     etc.)
    :param gap_open: --op (gap open) penalty in MAFFT cli.
    :type gap_open: float
    :param gap_extension: --ep (gap extension) penalty in MAFFT cli.
    :type gap_extension: float
    :param retree: Number of times to build the guide tree.
    :type retree: int

    '''
    arguments = ['mafft']
    arguments += ['--op', str(gap_open)]
    arguments += ['--ep', str(gap_extension)]
    arguments += ['--retree', str(retree)]
    arguments.append('input.fasta')
    tempdir = tempfile.mkdtemp()
    try:
        with open(os.path.join(tempdir, 'input.fasta'), 'w') as f:
            for i, sequence in enumerate(sequences):
                if hasattr(sequence, 'name'):
                    name = sequence.name
                else:
                    name = 'sequence{}'.format(i)
                f.write('>{}\n'.format(name))
                f.write(str(sequence) + '\n')
        process = subprocess.Popen(arguments,
                                   stdout=subprocess.PIPE,
                                   stderr=open(os.devnull, 'w'),
                                   cwd=tempdir)
        stdout = process.communicate()[0]
    finally:
        shutil.rmtree(tempdir)

    # Process stdout into something downstream process can use

    records = stdout.split('>')
    # First line is now blank
    records.pop(0)
    aligned_list = []
    for record in records:
        lines = record.split('\n')
        name = lines.pop(0)
        aligned_list.append(coral.DNA(''.join(lines)))

    return aligned_list

Exemplo n.º 9

0

Exibir arquivo

def read_dna(path):
    '''Read DNA from file. Uses BioPython and coerces to coral format.

    :param path: Full path to input file.
    :type path: str
    :returns: DNA sequence.
    :rtype: coral.DNA

    '''
    filename, ext = os.path.splitext(os.path.split(path)[-1])

    genbank_exts = ['.gb', '.ape']
    fasta_exts = ['.fasta', '.fa', '.fsa', '.seq']
    abi_exts = ['.abi', '.ab1']

    if any([ext == extension for extension in genbank_exts]):
        file_format = 'genbank'
    elif any([ext == extension for extension in fasta_exts]):
        file_format = 'fasta'
    elif any([ext == extension for extension in abi_exts]):
        file_format = 'abi'
    else:
        raise ValueError('File format not recognized.')

    seq = SeqIO.read(path, file_format)
    dna = coral.DNA(str(seq.seq))
    if seq.name == '.':
        dna.name = filename
    else:
        dna.name = seq.name

    # Features
    for feature in seq.features:
        try:
            dna.features.append(_seqfeature_to_coral(feature))
        except FeatureNameError:
            pass
    dna.features = sorted(dna.features, key=lambda feature: feature.start)
    # Used to use data_file_division, but it's inconsistent (not always the
    # molecule type)
    dna.topology = 'linear'
    with open(path) as f:
        first_line = f.read().split()
        for word in first_line:
            if word == 'circular':
                dna.topology = 'circular'

    return dna

Exemplo n.º 10

0

Exibir arquivo

Arquivo: oligo_assembly.py Projeto: eyu-boltthreads/coral

    def write_map(self, path):
        '''Write genbank map that highlights overlaps.

        :param path: full path to .gb file to write.
        :type path: str

        '''
        starts = [index[0] for index in self.overlap_indices]
        features = []
        for i, start in enumerate(starts):
            stop = start + len(self.overlaps[i])
            name = 'overlap {}'.format(i + 1)
            feature_type = 'misc'
            strand = 0
            features.append(
                coral.Feature(name, start, stop, feature_type, strand=strand))
        seq_map = coral.DNA(self.template, features=features)
        coral.seqio.write_dna(seq_map, path)

Exemplo n.º 11

0

Exibir arquivo

def get_yeast_sequence(chromosome, start, end, reverse_complement=False):
    '''Acquire a sequence from SGD http://www.yeastgenome.org
    :param chromosome: Yeast chromosome.
    :type chromosome: int
    :param start: A biostart.
    :type start: int
    :param end: A bioend.
    :type end: int
    :param reverse_complement: Get the reverse complement.
    :type revervse_complement: bool
    :returns: A DNA sequence.
    :rtype: coral.DNA

    '''
    import requests

    if start != end:
        if reverse_complement:
            rev_option = '-REV'
        else:
            rev_option = ''
        param_url = '&chr=' + str(chromosome) + '&beg=' + str(start) + \
                    '&end=' + str(end) + '&rev=' + rev_option
        url = 'http://www.yeastgenome.org/cgi-bin/getSeq?map=a2map' + \
            param_url

        res = requests.get(url)
        # ok... sadely, I contacted SGD and they haven;t implemented this so
        # I have to parse their yeastgenome page, but
        # it is easy between the raw sequence is between <pre> tags!

        # warning that's for the first < so we need +5!
        begin_index = res.text.index('<pre>')

        end_index = res.text.index('</pre>')
        sequence = res.text[begin_index + 5:end_index]
        sequence = sequence.replace('\n', '').replace('\r', '')
    else:
        sequence = ''

    return coral.DNA(sequence)

Exemplo n.º 12

0

Exibir arquivo

def get_yeast_promoter_ypa(gene_name):
    '''Retrieve promoter from Yeast Promoter Atlas
    (http://ypa.csbb.ntu.edu.tw).

    :param gene_name: Common name for yeast gene.
    :type gene_name: str
    :returns: Double-stranded DNA sequence of the promoter.
    :rtype: coral.DNA

    '''
    import requests

    loc = get_yeast_gene_location(gene_name)
    gid = get_gene_id(gene_name)
    ypa_baseurl = 'http://ypa.csbb.ntu.edu.tw/do'
    params = {
        'act': 'download',
        'nucle': 'InVitro',
        'right': str(loc[2]),
        'left': str(loc[1]),
        'gene': str(gid),
        'chr': str(loc[0])
    }

    response = requests.get(ypa_baseurl, params=params)
    text = response.text
    # FASTA records are just name-sequence pairs split up by > e.g.
    # >my_dna_name
    # GACGATA
    # TODO: most of this is redundant, as we just want the 2nd record
    record_split = text.split('>')
    record_split.pop(0)
    parsed = []
    for record in record_split:
        parts = record.split('\n')
        sequence = coral.DNA(''.join(parts[1:]))
        sequence.name = parts[0]
        parsed.append(sequence)

    return parsed[1]

Exemplo n.º 13

0

Exibir arquivo

Arquivo: _yeast.py Projeto: eyu-boltthreads/coral

def get_yeast_sequence(chromosome, start, end, reverse_complement=False):
    """Acquire a sequence from SGD http://www.yeastgenome.org
    :param chromosome: Yeast chromosome.
    :type chromosome: int
    :param start: A biostart.
    :type start: int
    :param end: A bioend.
    :type end: int
    :param reverse_complement: Get the reverse complement.
    :type revervse_complement: bool
    :returns: A DNA sequence.
    :rtype: coral.DNA

    """
    if start != end:
        if reverse_complement:
            rev_option = "-REV"
        else:
            rev_option = ""
        param_url = "&chr=" + str(chromosome) + "&beg=" + str(start) + \
                    "&end=" + str(end) + "&rev=" + rev_option
        url = "http://www.yeastgenome.org/cgi-bin/getSeq?map=a2map" + \
            param_url

        res = requests.get(url)
        # ok... sadely, I contacted SGD and they haven;t implemented this so
        # I have to parse their yeastgenome page, but
        # it is easy between the raw sequence is between <pre> tags!

        # warning that"s for the first < so we need +5!
        begin_index = res.text.index("<pre>")

        end_index = res.text.index("</pre>")
        sequence = res.text[begin_index + 5:end_index]
        sequence = sequence.replace("\n", "").replace("\r", "")
    else:
        sequence = ""

    return coral.DNA(sequence)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: _yeast.py Projeto: eyu-boltthreads/coral

def get_yeast_promoter_ypa(gene_name):
    """Retrieve promoter from Yeast Promoter Atlas
    (http://ypa.csbb.ntu.edu.tw).

    :param gene_name: Common name for yeast gene.
    :type gene_name: str
    :returns: Double-stranded DNA sequence of the promoter.
    :rtype: coral.DNA

    """
    loc = get_yeast_gene_location(gene_name)
    gid = get_gene_id(gene_name)
    ypa_baseurl = "http://ypa.csbb.ntu.edu.tw/do"
    params = {
        "act": "download",
        "nucle": "InVitro",
        "right": str(loc[2]),
        "left": str(loc[1]),
        "gene": str(gid),
        "chr": str(loc[0])
    }

    response = requests.get(ypa_baseurl, params=params)
    text = response.text
    # FASTA records are just name-sequence pairs split up by > e.g.
    # >my_dna_name
    # GACGATA
    # TODO: most of this is redundant, as we just want the 2nd record
    record_split = text.split(">")
    record_split.pop(0)
    parsed = []
    for record in record_split:
        parts = record.split("\n")
        sequence = coral.DNA("".join(parts[1:]))
        sequence.name = parts[0]
        parsed.append(sequence)

    return parsed[1]

Exemplo n.º 15

0

Exibir arquivo

Arquivo: _rebase.py Projeto: eyu-boltthreads/coral

        except urllib2.HTTPError, e:
            print 'HTTP Error: {} {}'.format(e.code, url)
            print 'Falling back on default enzyme list'
            self._enzyme_dict = coral.constants.fallback_enzymes
        except urllib2.URLError, e:
            print 'URL Error: {} {}'.format(e.reason, url)
            print 'Falling back on default enzyme list'
            self._enzyme_dict = coral.constants.fallback_enzymes
        # Process into RestrictionSite objects? (depends on speed)
        print 'Processing into RestrictionSite instances.'
        self.restriction_sites = {}
        # TODO: make sure all names are unique
        for key, (site, cuts) in self._enzyme_dict.iteritems():
            # Make a site
            try:
                r = coral.RestrictionSite(coral.DNA(site), cuts, name=key)
                # Add it to dict with name as key
                self.restriction_sites[key] = r
            except ValueError:
                # Encountered ambiguous sequence, have to ignore it until
                # coral.DNA can handle ambiguous DNA
                pass

    def get(self, name):
        '''Retrieve enzyme by name.

        :param name: Name of the restriction enzyme, e.g. EcoRV.
        :type name: str
        :returns: Restriction site matching the input name.
        :rtype: coral.RestrictionSite
        :raises: Exception when enzyme is not found in the database.

Exemplo n.º 16

0

Exibir arquivo

 def __init__(self):
     self.dnas = [cr.DNA('GATACTAGCG'),
                  cr.DNA('TACGATT'),
                  cr.DNA('GATACG')]
     self.rnas = [s.transcribe() for s in self.dnas]
     self.nupack = cr.analysis.NUPACK()

Exemplo n.º 17

0

Exibir arquivo

def convert_sequence(seq, to_material):
    '''Translate a DNA sequence into peptide sequence.

    The following conversions are supported:
        Transcription (seq is DNA, to_material is 'rna')
        Reverse transcription (seq is RNA, to_material is 'dna')
        Translation (seq is RNA, to_material is 'peptide')

    :param seq: DNA or RNA sequence.
    :type seq: coral.DNA or coral.RNA
    :param to_material: material to which to convert ('rna', 'dna', or
                        'peptide').
    :type to_material: str
    :returns: sequence of type coral.sequence.[material type]

    '''
    if isinstance(seq, coral.DNA) and to_material == 'rna':
        # Transcribe

        # Can't transcribe a gap
        if '-' in seq:
            raise ValueError('Cannot transcribe gapped DNA')
        # Convert DNA chars to RNA chars
        origin = ALPHABETS['dna'][:-1]
        destination = ALPHABETS['rna']
        code = dict(zip(origin, destination))
        converted = ''.join([code.get(str(k), str(k)) for k in seq])
        # Instantiate RNA object
        converted = coral.RNA(converted)
    elif isinstance(seq, coral.RNA):
        if to_material == 'dna':
            # Reverse transcribe
            origin = ALPHABETS['rna']
            destination = ALPHABETS['dna'][:-1]
            code = dict(zip(origin, destination))
            converted = ''.join([code.get(str(k), str(k)) for k in seq])
            # Instantiate DNA object
            converted = coral.DNA(converted)
        elif to_material == 'peptide':
            # Translate
            seq_list = list(str(seq))
            # Convert to peptide until stop codon is found.
            converted = []
            while True:
                if len(seq_list) >= 3:
                    base_1 = seq_list.pop(0)
                    base_2 = seq_list.pop(0)
                    base_3 = seq_list.pop(0)
                    codon = ''.join(base_1 + base_2 + base_3).upper()
                    amino_acid = CODONS[codon]
                    # Stop when stop codon is found
                    if amino_acid == '*':
                        break
                    converted.append(amino_acid)
                else:
                    break
            converted = ''.join(converted)
            converted = coral.Peptide(converted)
    else:
        msg1 = 'Conversion from '
        msg2 = '{0} to {1} is not supported.'.format(seq.__class__.__name__,
                                                     to_material)
        raise ValueError(msg1 + msg2)

    return converted

Exemplo n.º 18

0

Exibir arquivo

def fetch_yeast_locus_sequence(locus_name, flanking_size=0):
    '''Acquire a sequence from SGD http://www.yeastgenome.org.

    :param locus_name: Common name or systematic name for the locus (e.g. ACT1
                       or YFL039C).
    :type locus_name: str
    :param flanking_size: The length of flanking DNA (on each side) to return
    :type flanking_size: int

    '''
    from intermine.webservice import Service

    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    if flanking_size > 0:

        # The view specifies the output columns
        # secondaryIdentifier: the systematic name (e.g. YFL039C)
        # symbol: short name (e.g. ACT1)
        # length: sequence length
        # flankingRegions.direction: Upstream or downstream (or both) of locus
        # flankingRegions.sequence.length: length of the flanking regions
        # flankingRegions.sequence.residues: sequence of the flanking regions
        query.add_view('secondaryIdentifier', 'symbol', 'length',
                       'flankingRegions.direction',
                       'flankingRegions.sequence.length',
                       'flankingRegions.sequence.residues')

        # You can edit the constraint values below
        query.add_constraint('flankingRegions.direction',
                             '=',
                             'both',
                             code='A')
        query.add_constraint('Gene',
                             'LOOKUP',
                             locus_name,
                             'S. cerevisiae',
                             code='B')
        query.add_constraint('flankingRegions.distance',
                             '=',
                             '{:.1f}kb'.format(flanking_size / 1000.),
                             code='C')
        # Uncomment and edit the code below to specify your own custom logic:
        query.set_logic('A and B and C')

        # TODO: What to do when there's more than one result?
        first_result = query.rows().next()
        # FIXME: Use logger module instead
        # print first_result['secondaryIdentifier']
        # print first_result['symbol'], row['length']
        # print first_result['flankingRegions.direction']
        # print first_result['flankingRegions.sequence.length']
        # print first_result['flankingRegions.sequence.residues']

        seq = coral.DNA(first_result['flankingRegions.sequence.residues'])
        # TODO: add more metadata

    elif flanking_size == 0:
        # The view specifies the output columns
        query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                       'name', 'sgdAlias', 'organism.shortName',
                       'sequence.length', 'sequence.residues', 'description',
                       'qualifier')

        query.add_constraint('status', 'IS NULL', code='D')
        query.add_constraint('status', '=', 'Active', code='C')
        query.add_constraint('qualifier', 'IS NULL', code='B')
        query.add_constraint('qualifier', '!=', 'Dubious', code='A')
        query.add_constraint('Gene',
                             'LOOKUP',
                             locus_name,
                             'S. cerevisiae',
                             code='E')

        # Your custom constraint logic is specified with the code below:
        query.set_logic('(A or B) and (C or D) and E')

        first_result = query.rows().next()
        seq = coral.DNA(first_result['sequence.residues'])
    else:
        print 'Problem with the flanking region size....'
        seq = coral.DNA('')

    return seq

Exemplo n.º 19

0

Exibir arquivo

def primer(dna, tm=65, min_len=10, tm_undershoot=1, tm_overshoot=3,
           end_gc=False, tm_parameters='cloning', overhang=None,
           structure=False):
    '''Design primer to a nearest-neighbor Tm setpoint.

    :param dna: Sequence for which to design a primer.
    :type dna: coral.DNA
    :param tm: Ideal primer Tm in degrees C.
    :type tm: float
    :param min_len: Minimum primer length.
    :type min_len: int
    :param tm_undershoot: Allowed Tm undershoot.
    :type tm_undershoot: float
    :param tm_overshoot: Allowed Tm overshoot.
    :type tm_overshoot: float
    :param end_gc: Obey the 'end on G or C' rule.
    :type end_gc: bool
    :param tm_parameters: Melting temp calculator method to use.
    :type tm_parameters: string
    :param overhang: Append the primer to this overhang sequence.
    :type overhang: str
    :param structure: Evaluate primer for structure, with warning for high
                      structure.
    :type structure: bool
    :returns: A primer.
    :rtype: coral.Primer
    :raises: ValueError if the input sequence is lower than the Tm settings
             allow.
             ValueError if a primer ending with G or C can't be found given
             the Tm settings.

    '''
    # Check Tm of input sequence to see if it's already too low
    seq_tm = coral.analysis.tm(dna, parameters=tm_parameters)
    if seq_tm < (tm - tm_undershoot):
        msg = 'Input sequence Tm is lower than primer Tm setting'
        raise ValueError(msg)
    # Focus on first 90 bases - shouldn't need more than 90bp to anneal
    dna = dna[0:90]

    # Generate primers from min_len to 'tm' + tm_overshoot
    # TODO: this is a good place for optimization. Only calculate as many
    # primers as are needed. Use binary search.
    primers_tms = []
    last_tm = 0
    bases = min_len
    while last_tm <= tm + tm_overshoot and bases != len(dna):
        next_primer = dna[0:bases]
        last_tm = coral.analysis.tm(next_primer, parameters=tm_parameters)
        primers_tms.append((next_primer, last_tm))
        bases += 1

    # Trim primer list based on tm_undershoot and end_gc
    primers_tms = [(primer, melt) for primer, melt in primers_tms if
                   melt >= tm - tm_undershoot]
    if end_gc:
        primers_tms = [pair for pair in primers_tms if
                       pair[0][-1] == coral.DNA('C') or
                       pair[0][-1] == coral.DNA('G')]
    if not primers_tms:
        raise ValueError('No primers could be generated using these settings')

    # Find the primer closest to the set Tm, make it single stranded
    tm_diffs = [abs(melt - tm) for primer, melt in primers_tms]
    best_index = tm_diffs.index(min(tm_diffs))
    best_primer, best_tm = primers_tms[best_index]
    best_primer = best_primer.top

    # Apply overhang
    if overhang:
        overhang = overhang.top

    output_primer = coral.Primer(best_primer, best_tm, overhang=overhang)

    def _structure(primer):
        '''Check annealing sequence for structure.

        :param primer: Primer for which to evaluate structure
        :type primer: sequence.Primer

        '''
        # Check whole primer for high-probability structure, focus in on
        # annealing sequence, report average
        nupack = coral.analysis.Nupack(primer.primer())
        pairs = nupack.pairs(0)
        anneal_len = len(primer.anneal)
        pairs_mean = sum(pairs[-anneal_len:]) / anneal_len
        if pairs_mean < 0.5:
            warnings.warn('High probability structure', Warning)
        return pairs_mean
    if structure:
        _structure(output_primer)
    return output_primer

Exemplo n.º 20

0

Exibir arquivo

Arquivo: _yeast.py Projeto: eyu-boltthreads/coral

def fetch_yeast_locus_sequence(locus_name, flanking_size=0):
    """Acquire a sequence from SGD http://www.yeastgenome.org.

    :param locus_name: Common name or systematic name for the locus (e.g. ACT1
                       or YFL039C).
    :type locus_name: str
    :param flanking_size: The length of flanking DNA (on each side) to return
    :type flanking_size: int

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    if flanking_size > 0:

        # The view specifies the output columns
        # secondaryIdentifier: the systematic name (e.g. YFL039C)
        # symbol: short name (e.g. ACT1)
        # length: sequence length
        # flankingRegions.direction: Upstream or downstream (or both) of locus
        # flankingRegions.sequence.length: length of the flanking regions
        # flankingRegions.sequence.residues: sequence of the flanking regions
        query.add_view("secondaryIdentifier", "symbol", "length",
                       "flankingRegions.direction",
                       "flankingRegions.sequence.length",
                       "flankingRegions.sequence.residues")

        # You can edit the constraint values below
        query.add_constraint("flankingRegions.direction",
                             "=",
                             "both",
                             code="A")
        query.add_constraint("Gene",
                             "LOOKUP",
                             locus_name,
                             "S. cerevisiae",
                             code="B")
        query.add_constraint("flankingRegions.distance",
                             "=",
                             "{:.1f}kb".format(flanking_size / 1000.),
                             code="C")
        # Uncomment and edit the code below to specify your own custom logic:
        query.set_logic("A and B and C")

        # TODO: What to do when there"s more than one result?
        first_result = query.rows().next()
        # FIXME: Use logger module instead
        # print first_result["secondaryIdentifier"]
        # print first_result["symbol"], row["length"]
        # print first_result["flankingRegions.direction"]
        # print first_result["flankingRegions.sequence.length"]
        # print first_result["flankingRegions.sequence.residues"]

        seq = coral.DNA(first_result["flankingRegions.sequence.residues"])
        # TODO: add more metadata

    elif flanking_size == 0:
        # The view specifies the output columns
        query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                       "name", "sgdAlias", "organism.shortName",
                       "sequence.length", "sequence.residues", "description",
                       "qualifier")

        query.add_constraint("status", "IS NULL", code="D")
        query.add_constraint("status", "=", "Active", code="C")
        query.add_constraint("qualifier", "IS NULL", code="B")
        query.add_constraint("qualifier", "!=", "Dubious", code="A")
        query.add_constraint("Gene",
                             "LOOKUP",
                             locus_name,
                             "S. cerevisiae",
                             code="E")

        # Your custom constraint logic is specified with the code below:
        query.set_logic("(A or B) and (C or D) and E")

        first_result = query.rows().next()
        seq = coral.DNA(first_result["sequence.residues"])
    else:
        print "Problem with the flanking region size...."
        seq = coral.DNA("")

    return seq