Python DNA示例，cogent.DNA Python示例

示例#1

0

显示文件

文件： test_genome.py 项目： Skylersun/pycogent

 def test_assemble_seq(self):
     """should correctly fill in a sequence with N's"""
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGGNNN")
     frags = ["AAAAA","CCCCC","GGG"]
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
     positions = [(1, 6), (8, 13), (15, 18)]
     self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
     # should work with:
     # start matches first frag start
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGGNNN")
     positions = [(0, 5), (7, 12), (14, 17)]
     self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
     # end matches last frag_end
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGG")
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
     # both start and end matched
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGG")
     positions = [(10, 15), (17, 22), (24, 27)]
     self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
     # one frag
     expect = DNA.makeSequence(''.join(frags))
     positions = [(10, 23)]
     self.assertEqual(_assemble_seq([''.join(frags)],10,23,positions),
                             expect)

示例#2

0

显示文件

文件： test_align.py 项目： GavinHuttley/pycogent

 def test_gaps_at_both_ends(self):
     s = 'aaaccggttt'
     s1 = DNA.makeSequence(s[:-2], Name="A")
     s2 = DNA.makeSequence(s[2:], Name="B")
     for a in self._aligned_both_ways(s1, s2, local=False):
         self.assertEqual(matchedColumns(a), 6)
         self.assertEqual(len(a), 10)

示例#3

0

显示文件

文件： test_align.py 项目： carze/clovr-base

 def test_gaps_at_both_ends(self):
     s = 'aaaccggttt'
     s1 = DNA.makeSequence(s[:-2], Name="A")
     s2 = DNA.makeSequence(s[2:], Name="B")
     for a in self._aligned_both_ways(s1, s2, local=False):
         self.assertEqual(matchedColumns(a), 6)
         self.assertEqual(len(a), 10)

示例#4

0

显示文件

文件： test_genome.py 项目： mikerobeson/pycogent

 def test_assemble_seq(self):
     """should correctly fill in a sequence with N's"""
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGGNNN")
     frags = ["AAAAA", "CCCCC", "GGG"]
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
     positions = [(1, 6), (8, 13), (15, 18)]
     self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
     # should work with:
     # start matches first frag start
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGGNNN")
     positions = [(0, 5), (7, 12), (14, 17)]
     self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
     # end matches last frag_end
     expect = DNA.makeSequence("NAAAAANNCCCCCNNGGG")
     positions = [(11, 16), (18, 23), (25, 28)]
     self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
     # both start and end matched
     expect = DNA.makeSequence("AAAAANNCCCCCNNGGG")
     positions = [(10, 15), (17, 22), (24, 27)]
     self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
     # one frag
     expect = DNA.makeSequence(''.join(frags))
     positions = [(10, 23)]
     self.assertEqual(_assemble_seq([''.join(frags)], 10, 23, positions),
                      expect)

示例#5

0

显示文件

文件： test_align.py 项目： GavinHuttley/pycogent

 def test_local_tiebreak(self):
     """Should pick the first best-equal hit rather than the last one"""
     # so that the Pyrex and Python versions give the same result.
     score_matrix = make_dna_scoring_dict(match=1, transition=-1, 
             transversion=-1)
     pattern = DNA.makeSequence('cwc', Name='pattern')
     two_hit = DNA.makeSequence( 'cactc', Name= 'target')
     aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
     hit = aln.NamedSeqs['target']
     self.assertEqual(str(hit).lower(), 'cac')

示例#6

0

显示文件

文件： test_cigar.py 项目： miklou/pycogent

 def setUp(self):
     self.cigar_text = '3D2M3D6MDM2D3MD'
     self.aln_seq = DNA.makeSequence('---AA---GCTTAG-A--CCT-')
     self.aln_seq1 = DNA.makeSequence('CCAAAAAA---TAGT-GGC--G')
     self.map, self.seq = self.aln_seq.parseOutGaps()
     self.map1, self.seq1 = self.aln_seq1.parseOutGaps()
     self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
     self.aln = LoadSeqs(data = {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1})
     self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
     self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}

示例#7

0

显示文件

 def setUp(self):
     self.cigar_text = '3D2M3D6MDM2D3MD'
     self.aln_seq = DNA.makeSequence('---AA---GCTTAG-A--CCT-')
     self.aln_seq1 = DNA.makeSequence('CCAAAAAA---TAGT-GGC--G')
     self.map, self.seq = self.aln_seq.parseOutGaps()
     self.map1, self.seq1 = self.aln_seq1.parseOutGaps()
     self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
     self.aln = LoadSeqs(data = {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1})
     self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
     self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}

示例#8

0

显示文件

文件： test_align.py 项目： mikerobeson/pycogent

 def test_local_tiebreak(self):
     """Should pick the first best-equal hit rather than the last one"""
     # so that the Pyrex and Python versions give the same result.
     score_matrix = make_dna_scoring_dict(match=1,
                                          transition=-1,
                                          transversion=-1)
     pattern = DNA.makeSequence('cwc', Name='pattern')
     two_hit = DNA.makeSequence('cactc', Name='target')
     aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
     hit = aln.NamedSeqs['target']
     self.assertEqual(str(hit).lower(), 'cac')

示例#9

0

显示文件

文件： region.py 项目： chungtseng/pycogent

 def _make_utr_seq(self):
     if self.UntranslatedExons5 is None and self.UntranslatedExons3 is None:
         self._cached["Utr5"] = self.NULL_VALUE
         self._cached["Utr3"] = self.NULL_VALUE
         return
     Utr5_seq, Utr3_seq = DNA.makeSequence(""), DNA.makeSequence("")
     for exon in self.UntranslatedExons5:
         Utr5_seq += exon.Seq
     for exon in self.UntranslatedExons3:
         Utr3_seq += exon.Seq
     self._cached["Utr5"] = Utr5_seq
     self._cached["Utr3"] = Utr3_seq

示例#10

0

显示文件

文件： uclust.py 项目： teravest/qiime

def process_uclust_pw_alignment_results(fasta_pairs_lines, uc_lines):
    """ Process results of uclust search and align """
    alignments = get_next_two_fasta_records(fasta_pairs_lines)
    for hit in get_next_record_type(uc_lines, 'H'):
        matching_strand = hit[4]
        if matching_strand == '-':
            strand_id = '-'
            target_rev_match = True
        elif matching_strand == '+':
            strand_id = '+'
            target_rev_match = False
        elif matching_strand == '.':
            # protein sequence, so no strand information
            strand_id = ''
            target_rev_match = False
        else:
            raise UclustParseError, "Unknown strand type: %s" % matching_strand
        uc_query_id = hit[8]
        uc_target_id = hit[9]
        percent_id = float(hit[3])

        fasta_pair = alignments.next()

        fasta_query_id = fasta_pair[0][0]
        aligned_query = fasta_pair[0][1]

        if fasta_query_id != uc_query_id:
            raise UclustParseError,\
             "Order of fasta and uc files do not match."+\
             " Got query %s but expected %s." %\
              (fasta_query_id, uc_query_id)

        fasta_target_id = fasta_pair[1][0]
        aligned_target = fasta_pair[1][1]

        if fasta_target_id != uc_target_id + strand_id:
            raise UclustParseError, \
             "Order of fasta and uc files do not match."+\
             " Got target %s but expected %s." %\
              (fasta_target_id, uc_target_id + strand_id)

        if target_rev_match:
            query_id = uc_query_id + ' RC'
            aligned_query = DNA.rc(aligned_query)
            target_id = uc_target_id
            aligned_target = DNA.rc(aligned_target)
        else:
            query_id = uc_query_id
            aligned_query = aligned_query
            target_id = uc_target_id
            aligned_target = aligned_target

        yield (query_id, target_id, aligned_query, aligned_target, percent_id)

示例#11

0

显示文件

文件： test_core_standalone.py 项目： carze/clovr-base

 def test_picklability(self):
     """Pickle an alignment containing an annotated sequence"""
     # This depends on alignments, sequences, features, maps and spans
     # Doesn't test round trip result is correct, which should possibly
     # be done for maps/spans, but seqs/alignments are just simple
     # python classes without __getstate__ etc.
     import cPickle as pickle
     seq1 = DNA.makeSequence("aagaagaagaccccca")
     seq2 = DNA.makeSequence("aagaagaagaccccct")
     seq2.addFeature('exon', 'fred', [(10,15)])
     aln = LoadSeqs(data={'a':seq1, 'b':seq2})
     aln2 = pickle.loads(pickle.dumps(aln))

示例#12

0

显示文件

文件： benchmark_aligning.py 项目： carze/clovr-base

def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t

示例#13

0

显示文件

文件： uclust.py 项目： Jorge-C/qiime

def process_uclust_pw_alignment_results(fasta_pairs_lines,uc_lines):
    """ Process results of uclust search and align """
    alignments = get_next_two_fasta_records(fasta_pairs_lines)
    for hit in get_next_record_type(uc_lines,'H'):
        matching_strand = hit[4]
        if matching_strand == '-':
            strand_id = '-'
            target_rev_match = True
        elif matching_strand == '+':
            strand_id = '+'
            target_rev_match = False
        elif matching_strand == '.':
            # protein sequence, so no strand information
            strand_id = ''
            target_rev_match = False
        else:
            raise UclustParseError, "Unknown strand type: %s" % matching_strand
        uc_query_id = hit[8]
        uc_target_id = hit[9]
        percent_id = float(hit[3])
        
        fasta_pair = alignments.next()
        
        fasta_query_id = fasta_pair[0][0]
        aligned_query = fasta_pair[0][1]
        
        if fasta_query_id != uc_query_id:
            raise UclustParseError,\
             "Order of fasta and uc files do not match."+\
             " Got query %s but expected %s." %\
              (fasta_query_id, uc_query_id)
            
        fasta_target_id = fasta_pair[1][0]
        aligned_target = fasta_pair[1][1]
            
        if fasta_target_id != uc_target_id + strand_id:
            raise UclustParseError, \
             "Order of fasta and uc files do not match."+\
             " Got target %s but expected %s." %\
              (fasta_target_id, uc_target_id + strand_id)
            
        if target_rev_match:
            query_id = uc_query_id + ' RC'
            aligned_query = DNA.rc(aligned_query)
            target_id = uc_target_id
            aligned_target = DNA.rc(aligned_target)
        else:
            query_id = uc_query_id
            aligned_query = aligned_query
            target_id = uc_target_id
            aligned_target = aligned_target
            
        yield (query_id, target_id, aligned_query, aligned_target, percent_id)

示例#14

0

显示文件

文件： benchmark_aligning.py 项目： pombredanne/pycogent-1

def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence("AAAATGCTTA" * r)
    seq1 = DNA.makeSequence("AATTTTGCTG" * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t

示例#15

0

显示文件

文件： test_core_standalone.py 项目： cxhernandez/pycogent

 def test_picklability(self):
     """Pickle an alignment containing an annotated sequence"""
     # This depends on alignments, sequences, features, maps and spans
     # Doesn't test round trip result is correct, which should possibly
     # be done for maps/spans, but seqs/alignments are just simple
     # python classes without __getstate__ etc.
     import pickle as pickle
     seq1 = DNA.makeSequence("aagaagaagaccccca")
     seq2 = DNA.makeSequence("aagaagaagaccccct")
     seq2.addFeature('exon', 'fred', [(10, 15)])
     aln = LoadSeqs(data={'a': seq1, 'b': seq2})
     aln2 = pickle.loads(pickle.dumps(aln))

示例#16

0

显示文件

文件： test_align.py 项目： GavinHuttley/pycogent

 def test_codon(self):
     s1 = DNA.makeSequence('tacgccgta', Name="A")
     s2 = DNA.makeSequence('tacgta', Name="B")
     codon_model = cogent.evolve.substitution_model.Codon(
                              model_gaps=False, equal_motif_probs=True,
                              mprob_model='conditional')
     tree = cogent.LoadTree(tip_names=['A', 'B'])
     lf = codon_model.makeLikelihoodFunction(tree, aligned=False)
     lf.setSequences(dict(A=s1, B=s2))
     a = lf.getLogLikelihood().edge.getViterbiPath().getAlignment()
     self.assertEqual(matchedColumns(a), 6)
     self.assertEqual(len(a), 9)

示例#17

0

显示文件

文件： test_align.py 项目： carze/clovr-base

 def test_codon(self):
     s1 = DNA.makeSequence('tacgccgta', Name="A")
     s2 = DNA.makeSequence('tacgta', Name="B")
     codon_model = cogent.evolve.substitution_model.Codon(
         model_gaps=False,
         equal_motif_probs=True,
         mprob_model='conditional')
     tree = cogent.LoadTree(tip_names=['A', 'B'])
     lf = codon_model.makeLikelihoodFunction(tree, aligned=False)
     lf.setSequences(dict(A=s1, B=s2))
     (score, a) = lf.getLogLikelihood().edge.getViterbiScoreAndAlignment()
     self.assertEqual(matchedColumns(a), 6)
     self.assertEqual(len(a), 9)

示例#18

0

显示文件

文件： benchmark_aligning.py 项目： GavinHuttley/pycogent

def test(r=1, **kw):   
    S = make_dna_scoring_dict(10, -1, -8)
    
    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)
    
    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int ( (len(seq1)*len(seq2))/t/1000 )

示例#19

0

显示文件

文件： extract_barcodes.py 项目： rob-knight/qiime

def process_barcode_single_end_data(read1_data,
                                    output_bc_fastq,
                                    output_fastq1,
                                    bc1_len=6,
                                    rev_comp_bc1=False):
    """ Processes, writes single-end barcode data, parsed sequence
    
    read1_data: list of header, read, quality scores
    output_bc_fastq: open output fastq filepath
    output_fastq1: open output fastq reads filepath
    bc1_len: length of barcode to remove from beginning of data
    rev_comp_bc1: reverse complement barcode before writing.
    """

    header_index = 0
    sequence_index = 1
    quality_index = 2

    bc_read = read1_data[sequence_index][:bc1_len]
    bc_qual = read1_data[quality_index][:bc1_len]
    if rev_comp_bc1:
        bc_read = DNA.rc(bc_read)
        bc_qual = bc_qual[::-1]

    bc_lines = format_fastq_record(read1_data[header_index], bc_read, bc_qual)
    output_bc_fastq.write(bc_lines)
    seq_lines = format_fastq_record(read1_data[header_index],
                                    read1_data[sequence_index][bc1_len:],
                                    read1_data[quality_index][bc1_len:])
    output_fastq1.write(seq_lines)

    return

示例#20

0

显示文件

文件： test_draw.py 项目： GavinHuttley/pycogent

def makeSampleSequence():
    seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
    seq = DNA.makeSequence(seq)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20,35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit', [(39,49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2', [(49,60)])
    return seq

示例#21

0

显示文件

文件： test_annotation.py 项目： miklou/pycogent

 def test_inherit_feature(self):
     """should be able to subclass and extend _Feature"""
     class NewFeat(_Feature):
         def __init__(self, *args, **kwargs):
             super(NewFeat, self).__init__(*args, **kwargs)
         
         def newMethod(self):
             if len(self.map.spans) > 1:
                 as_one = self.asOneSpan() # should create new instance of NewFeat
                 return as_one.newMethod()
             return True
         
     
     seq = DNA.makeSequence('ACGTACGTACGT')
     f = seq.addAnnotation(NewFeat, as_map([(1,3), (5,7)], len(seq)),
                             type='gene', Name='abcd')
     self.assertEqual(type(f.asOneSpan()), NewFeat)
     self.assertEqual(type(f.getShadow()), NewFeat)
     f2 = seq.addAnnotation(NewFeat, as_map([(3,5)], len(seq)),
                             type='gene', Name='def')
     
     self.assertEqual(type(seq.getRegionCoveringAll([f, f2],
                                             feature_class=NewFeat)),
                     NewFeat)
     # now use the new method
     f.newMethod()

示例#22

0

显示文件

def parse_illumina_line(l,barcode_length,rev_comp_barcode,
                        barcode_in_sequence=False):
    """Parses a single line of Illumina data
    """
    fields = l.strip().split(':')
    
    y_position_subfields = fields[4].split('#')
    y_position = int(y_position_subfields[0])
    sequence = fields[5]
    qual_string = fields[6]
    
    if barcode_in_sequence:
        barcode = sequence[:barcode_length]
        sequence = sequence[barcode_length:]
        qual_string = qual_string[barcode_length:]
    else:
        barcode = y_position_subfields[1][:barcode_length]
    
    if rev_comp_barcode:
        barcode = DNA.rc(barcode)
    
    result = {\
     'Full description':':'.join(fields[:5]),\
     'Machine Name':fields[0],\
     'Channel Number':int(fields[1]),\
     'Tile Number':int(fields[2]),\
     'X Position':int(fields[3]),\
     'Y Position':y_position,\
     'Barcode':barcode,\
     'Full Y Position Field':fields[4],\
     'Sequence':sequence,\
     'Quality Score':qual_string}
     
    return result

示例#23

0

显示文件

文件： extract_barcodes.py 项目： rob-knight/qiime

def process_barcode_single_end_data(read1_data, output_bc_fastq, output_fastq1, bc1_len=6, rev_comp_bc1=False):
    """ Processes, writes single-end barcode data, parsed sequence
    
    read1_data: list of header, read, quality scores
    output_bc_fastq: open output fastq filepath
    output_fastq1: open output fastq reads filepath
    bc1_len: length of barcode to remove from beginning of data
    rev_comp_bc1: reverse complement barcode before writing.
    """

    header_index = 0
    sequence_index = 1
    quality_index = 2

    bc_read = read1_data[sequence_index][:bc1_len]
    bc_qual = read1_data[quality_index][:bc1_len]
    if rev_comp_bc1:
        bc_read = DNA.rc(bc_read)
        bc_qual = bc_qual[::-1]

    bc_lines = format_fastq_record(read1_data[header_index], bc_read, bc_qual)
    output_bc_fastq.write(bc_lines)
    seq_lines = format_fastq_record(
        read1_data[header_index], read1_data[sequence_index][bc1_len:], read1_data[quality_index][bc1_len:]
    )
    output_fastq1.write(seq_lines)

    return

示例#24

0

显示文件

文件： split_libraries_illumina.py 项目： Ecogenomics/FrankenQIIME

def parse_illumina_single_end_read_file(read_file,barcode_length,\
    max_bad_run_length,quality_threshold,min_per_read_length,
    rev_comp,rev_comp_barcode,barcode_in_seq,barcode_max_N=0,seq_max_N=0):
    """Parses Illumina single-end read file
    """
    
    for read_line in read_file:
        read = parse_illumina_line(read_line,barcode_length,
                                   rev_comp_barcode,barcode_in_seq)
        
        read_desc = illumina_read_description_from_read_data(read)
        
        read_barcode = read['Barcode']
        
        if read_barcode.count('N') > barcode_max_N:
           continue
        
        seq, qual = read_qual_score_filter(\
         read['Sequence'], read['Quality Score'],\
         max_bad_run_length, quality_threshold)
         
        if (len(seq) < min_per_read_length) or (seq.count('N') > seq_max_N):
            continue
            
        if rev_comp:
            seq = DNA.rc(seq)
            qual = qual[::-1]
        
        yield read_desc, read_barcode, seq, qual

示例#25

0

显示文件

文件： cigar.py 项目： chungtseng/pycogent

def CigarParser(seqs, cigars, sliced = False, ref_seqname = None, start = None, end = None, moltype=DNA):
    """return an alignment from raw sequences and cigar strings
    if sliced, will return an alignment correspondent to ref sequence start to end
    
    Arguments:
        seqs - raw sequences as {seqname: seq}
        cigars - corresponding cigar text as {seqname: cigar_text}
        cigars and seqs should have the same seqnames
        MolType - optional default to DNA
    """
    data = {}
    if not sliced:
        for seqname in seqs.keys():
            aligned_seq = aligned_from_cigar(cigars[seqname], 
                                            seqs[seqname], moltype=moltype)
            data[seqname] = aligned_seq
    else:
        ref_aln_seq = aligned_from_cigar(cigars[ref_seqname], 
                                        seqs[ref_seqname], moltype=moltype)
        m, aln_loc = slice_cigar(cigars[ref_seqname], start, end, by_align = False)
        data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
        for seqname in [seqname for seqname in seqs.keys() if seqname != ref_seqname]:
            m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
            if seq_loc:
                seq = seqs[seqname]
                if isinstance(seq, str):
                    seq = moltype.makeSequence(seq)
                data[seqname] = seq[seq_loc[0]:seq_loc[1]].gappedByMap(m)
            else:
                data[seqname] = DNA.makeSequence('-'*(aln_loc[1] - aln_loc[0]))
    aln = LoadSeqs(data = data, aligned = True)
    return aln

示例#26

0

显示文件

文件： parse.py 项目： Gaby1212/qiime

def parse_illumina_line(l, barcode_length, rev_comp_barcode,
                        barcode_in_sequence=False):
    """Parses a single line of Illumina data
    """
    fields = l.strip().split(':')

    y_position_subfields = fields[4].split('#')
    y_position = int(y_position_subfields[0])
    sequence = fields[5]
    qual_string = fields[6]

    if barcode_in_sequence:
        barcode = sequence[:barcode_length]
        sequence = sequence[barcode_length:]
        qual_string = qual_string[barcode_length:]
    else:
        barcode = y_position_subfields[1][:barcode_length]

    if rev_comp_barcode:
        barcode = DNA.rc(barcode)

    result = {
        'Full description': ':'.join(fields[:5]),
        'Machine Name': fields[0],
        'Channel Number': int(fields[1]),
        'Tile Number': int(fields[2]),
        'X Position': int(fields[3]),
        'Y Position': y_position,
        'Barcode': barcode,
        'Full Y Position Field': fields[4],
        'Sequence': sequence,
        'Quality Score': qual_string}

    return result

示例#27

0

显示文件

文件： translateCogent.py 项目： belandbioinfo/GroundControl

def findBestSeq(seqobject):
    dna_seq = str(seqobject.seq)
    my_seq = DNA.makeSequence(dna_seq,seqobject.id)
#    x=0
#    framedict = dict()
#    while x  < 3:
#        temp1 = my_seq[x:]
#        temp2 = temp1..withoutTerminalStopCodon()
#        framedict[x] = temp2.getTranslation()
#        x+=1
    
    all_six = standard_code.sixframes(my_seq)
    seqlist = list()
    for frame in all_six:
        seqreturned = frame.split('*')[0]
        seqlist.append(seqreturned)
    longestseq = ''
    x=0
    while x < 3:
        if len(longestseq) < len(seqlist[x]):
            longestseq = seqlist[x]
            correctdnaseq = my_seq[x:]
        x+=1
    #longest_seq = max(seqlist, key=len)
    return longestseq, correctdnaseq

示例#28

0

显示文件

文件： test_annotation.py 项目： miklou/pycogent

    def test_inherit_feature(self):
        """should be able to subclass and extend _Feature"""
        class NewFeat(_Feature):
            def __init__(self, *args, **kwargs):
                super(NewFeat, self).__init__(*args, **kwargs)

            def newMethod(self):
                if len(self.map.spans) > 1:
                    as_one = self.asOneSpan(
                    )  # should create new instance of NewFeat
                    return as_one.newMethod()
                return True

        seq = DNA.makeSequence('ACGTACGTACGT')
        f = seq.addAnnotation(NewFeat,
                              as_map([(1, 3), (5, 7)], len(seq)),
                              type='gene',
                              Name='abcd')
        self.assertEqual(type(f.asOneSpan()), NewFeat)
        self.assertEqual(type(f.getShadow()), NewFeat)
        f2 = seq.addAnnotation(NewFeat,
                               as_map([(3, 5)], len(seq)),
                               type='gene',
                               Name='def')

        self.assertEqual(
            type(seq.getRegionCoveringAll([f, f2], feature_class=NewFeat)),
            NewFeat)
        # now use the new method
        f.newMethod()

示例#29

0

显示文件

文件： region.py 项目： chungtseng/pycogent

 def _get_flanking_seq_data(self):
     # maps to flanking_sequence through variation_feature_id
     # if this fails, we grab from genomic sequence
     variation_id = self._table_rows['variation_feature']['variation_id']
     flanking_seq_table = self.flanking_sequence_table
     query = sql.select([flanking_seq_table],
                 flanking_seq_table.c.variation_id == variation_id)
     record = asserted_one(query.execute())
     self._table_rows['flanking_sequence'] = record
     up_seq = record['up_seq']
     down_seq = record['down_seq']
     # the following two lines are because -- wait for it -- someone has
     # entered the string 'NULL' instead of NULL in the MySQL tables!!!
     up_seq = [up_seq, None][up_seq == 'NULL']
     down_seq = [down_seq, None][down_seq == 'NULL']
     seqs = dict(up=up_seq, down=down_seq)
     for name, seq in seqs.items():
         if seq is not None:
             seq = DNA.makeSequence(seq)
         else:
             resized = [(-301, -1), (1, 301)][name == 'down']
             if self.Location.Strand == -1:
                 resized = [(1, 301), (-301, -1)][name == 'down']
             flank = self.Location.resized(*resized)
             flanking = self.genome.getRegion(region=flank)
             seq = flanking.Seq
         seqs[name] = seq
     
     self._cached[('FlankingSeq')] = (seqs['up'][-300:],seqs['down'][:300])

示例#30

0

显示文件

文件： conserved_sequences.py 项目： ctSkennerton/Amplishot

    def _generate_unambiguous_sequences(self):
        unambiguous_conserved_sequences = dict()
        rev_unambiguous_conserved_sequences = dict()
        for pos,seq in self._CONSERVED_SEQUENCES.items():
            dnaseq = DNA.makeSequence(seq)
            ret = self._disambiguate(dnaseq)
            if isinstance(ret, list):
                for dnaseq_r in ret:
                    self.conserved_sequences[str(dnaseq_r)] = ConservedSequence(dnaseq_r, pos)
            else:
                self.conserved_sequences[str(ret)] = ConservedSequence(ret, pos)

        for seq,con_seq in self.conserved_sequences.items():
            rc_seq = DNA.makeSequence(seq)
            rc_seq.rc()
            self.conserved_sequences[str(rc_seq)] = ConservedSequence(rc_seq, con_seq.pos, 
                    rc=True)

示例#31

0

显示文件

def rc_fasta_lines(fasta_lines, seq_desc_mapper=append_rc):
    """
    """
    for seq_id, seq in parse_fasta(fasta_lines):
        seq_id = seq_desc_mapper(seq_id)
        seq = DNA.rc(seq.upper())
        yield seq_id, seq
    return

示例#32

0

显示文件

文件： test_genetic_code.py 项目： cxhernandez/pycogent

 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.SGC)
     seq = DNA.makeSequence('ATGCTAACATAAA')
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.getStopIndices(seq, start=frame)
         self.assertEqual(got, expect)

示例#33

0

显示文件

文件： test_draw.py 项目： carze/clovr-base

def makeSampleSequence():
    seq = DNA.makeSequence('aaaccggttt' * 10)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20, 35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit',
                          [(39, 49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2',
                          [(49, 60)])
    return seq

示例#34

0

显示文件

文件： test_genetic_code.py 项目： blankenberg/pycogent

 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.SGC)
     seq = DNA.makeSequence("ATGCTAACATAAA")
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.getStopIndices(seq, start=frame)
         self.assertEqual(got, expect)

示例#35

0

显示文件

def adjust_alignment(template, candidate, new_gaps):
    """adjust template/candidate aln to remove gaps added by pairwise alignment
    
        This step adjusts the alignment to reduce the length back to the 
         template alignment length by introducing local misalignments to
         remove gap characters that are present in the pairwise alignment
         but not in the template alignment.
    
    """
    template_l = list(template)
    candidate_l = list(candidate)
    new_gaps.reverse()
    for pos in new_gaps:
        del template_l[pos]
        del candidate_l[nearest_gap(candidate_l, pos)]

    return (DNA.makeSequence(''.join(template_l)), \
            DNA.makeSequence(''.join(candidate_l)))

示例#36

0

显示文件

文件： util.py 项目： kylebittinger/pynast

def adjust_alignment(template,candidate,new_gaps):
    """adjust template/candidate aln to remove gaps added by pairwise alignment
    
        This step adjusts the alignment to reduce the length back to the 
         template alignment length by introducing local misalignments to
         remove gap characters that are present in the pairwise alignment
         but not in the template alignment.
    
    """
    template_l = list(template)
    candidate_l = list(candidate)
    new_gaps.reverse()
    for pos in new_gaps:
        del template_l[pos]
        del candidate_l[nearest_gap(candidate_l,pos)]
        
    return (DNA.makeSequence(''.join(template_l)), \
            DNA.makeSequence(''.join(candidate_l)))

示例#37

0

显示文件

文件： test_draw.py 项目： yatisht/pycogent

def makeSampleSequence():
    seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
    seq = DNA.makeSequence(seq)
    v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20, 35)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit',
                          [(39, 49)])
    v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2',
                          [(49, 60)])
    return seq

示例#38

0

显示文件

文件： test_annotation.py 项目： miklou/pycogent

def makeSampleSequence(with_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if with_gaps:
        raw_seq = raw_seq[:5] + '-----' +raw_seq[10:-2] + '--'
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq

示例#39

0

显示文件

文件： test_annotation.py 项目： miklou/pycogent

def makeSampleSequence(with_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if with_gaps:
        raw_seq = raw_seq[:5] + '-----' + raw_seq[10:-2] + '--'
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq

示例#40

0

显示文件

文件： test_maps.py 项目： pombredanne/pycogent-1

    def test_maps_on_maps(self):
        seq = DNA.makeSequence("ATCGATCGAT" * 5, Name="base")
        feat1 = annotate(seq, 10, 20, "fake")
        feat2 = annotate(feat1, 3, 5, "fake2")
        feat3 = annotate(seq, 1, 3, "left")

        seq2 = seq[5:]
        self.assertEqual(
            structure(seq), ("seq", 50, [("fake", "[10:20]/50", [("fake2", "[3:5]/10")]), ("left", "[1:3]/50")])
        )
        self.assertEqual(structure(seq2), ("seq", 45, [("fake", "[5:15]/45", [("fake2", "[3:5]/10")])]))

示例#41

0

显示文件

def introduce_terminal_gaps(template, aligned_template, aligned_candidate):
    """ introduce terminal gaps from template into the aligned candidate seq
    """

    # count the 5' gaps in the original aligned template
    original_five_prime_gaps = 0
    for c in template:
        if c == '-':
            original_five_prime_gaps += 1
        else:
            break

    # count the 5' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_five_prime_gaps = 0
    for c in aligned_template:
        if c == '-':
            aligned_template_five_prime_gaps += 1
        else:
            break

    # compute the number of 5' gaps that need to be added to get to the
    # original alignment length
    five_prime_gaps_to_add = \
     original_five_prime_gaps - aligned_template_five_prime_gaps

    # count the 3' gaps in the original aligned template
    original_three_prime_gaps = 0
    for c in reversed(template):
        if c == '-':
            original_three_prime_gaps += 1
        else:
            break

    # count the 3' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_three_prime_gaps = 0
    for c in reversed(aligned_template):
        if c == '-':
            aligned_template_three_prime_gaps += 1
        else:
            break

    # compute the number of 3' gaps that need to be added to get to the
    # original alignment length
    three_prime_gaps_to_add = \
     original_three_prime_gaps - aligned_template_three_prime_gaps

    # return the sequence with the 5' and 3' gaps added
    return DNA.makeSequence(''.join([\
     '-'*five_prime_gaps_to_add,\
     str(aligned_candidate),\
     '-'*three_prime_gaps_to_add]),\
     Name=aligned_candidate.Name)

示例#42

0

显示文件

文件： util.py 项目： kylebittinger/pynast

def introduce_terminal_gaps(template,aligned_template,aligned_candidate):
    """ introduce terminal gaps from template into the aligned candidate seq
    """
    
    # count the 5' gaps in the original aligned template
    original_five_prime_gaps = 0
    for c in template:
        if c == '-':
            original_five_prime_gaps +=1
        else:
            break
            
    # count the 5' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_five_prime_gaps = 0
    for c in aligned_template:
        if c == '-':
            aligned_template_five_prime_gaps += 1
        else:
            break
            
    # compute the number of 5' gaps that need to be added to get to the
    # original alignment length
    five_prime_gaps_to_add = \
     original_five_prime_gaps - aligned_template_five_prime_gaps
            
    # count the 3' gaps in the original aligned template
    original_three_prime_gaps = 0
    for c in reversed(template):
        if c == '-':
            original_three_prime_gaps +=1
        else:
            break
            
    # count the 3' gaps already existing in the pairwise aligned template
    # (because we don't need to add these)
    aligned_template_three_prime_gaps = 0
    for c in reversed(aligned_template):
        if c == '-':
            aligned_template_three_prime_gaps += 1
        else:
            break
            
    # compute the number of 3' gaps that need to be added to get to the
    # original alignment length
    three_prime_gaps_to_add = \
     original_three_prime_gaps - aligned_template_three_prime_gaps

    # return the sequence with the 5' and 3' gaps added
    return DNA.makeSequence(''.join([\
     '-'*five_prime_gaps_to_add,\
     str(aligned_candidate),\
     '-'*three_prime_gaps_to_add]),\
     Name=aligned_candidate.Name)

示例#43

0

显示文件

def get_reverse_primers(id_map):
    """ Return a dictionary with barcodes and rev-complement of rev primers """
    
    rev_primers = {}
    for n in id_map.items():
        # Generate a dictionary with Barcode:reverse primer
        # Convert to reverse complement of the primer so its in the 
        # proper orientation with the input fasta sequences
        rev_primers[n[1]['BarcodeSequence']]=DNA.rc(n[1]['ReversePrimer'])
        
    return rev_primers

示例#44

0

显示文件

文件： benchmark_aligning.py 项目： cxhernandez/pycogent

def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1,
                                     seq2,
                                     S,
                                     10,
                                     2,
                                     local=False,
                                     return_alignment=False,
                                     **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int((len(seq1) * len(seq2)) / t / 1000)

示例#45

0

显示文件

文件： region.py 项目： chungtseng/pycogent

 def _get_sequence(self):
     if 'Seq' not in self._cached:
         try:
             seq = get_sequence(self.Location)
         except NoItemError:
             try:
                 alt_loc = assembly_exception_coordinate(self.Location)
                 seq = get_sequence(alt_loc)
             except NoItemError:
                 seq = DNA.makeSequence("N"*len(self))
         seq.Name = str(self.Location)
         self._cached['Seq'] = seq
     return self._cached['Seq']

示例#46

0

显示文件

    def test_maps_on_maps(self):
        seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
        feat1 = annotate(seq, 10, 20, 'fake')
        feat2 = annotate(feat1, 3, 5, 'fake2')
        feat3 = annotate(seq, 1, 3, 'left')

        seq2 = seq[5:]
        self.assertEqual(
            structure(seq),
            ('seq', 50, [('fake', '[10:20]/50', [('fake2', '[3:5]/10')]),
                         ('left', '[1:3]/50')]))
        self.assertEqual(
            structure(seq2),
            ('seq', 45, [('fake', '[5:15]/45', [('fake2', '[3:5]/10')])]))

示例#47

0

显示文件

def makeSampleSequence(mid_gaps=False):
    raw_seq = 'AACCCAAAATTTTTTGGGGGGGGGGCCCC'
    cds = (15, 25)
    utr = (12, 15)
    if mid_gaps:
        rev_seq = raw_seq[:5] + '-----' +raw_seq[10:]
        raw_seq = rev_seq
        # annotations only make sense when they're on the raw sequence
        cds = (10, 20)
        utr = (5, 8)
    seq = DNA.makeSequence(raw_seq)
    seq.addAnnotation(Feature, 'CDS', 'CDS', [cds])
    seq.addAnnotation(Feature, "5'UTR", "5' UTR", [utr])
    return seq

示例#48

0

显示文件

文件： test_align.py 项目： GavinHuttley/pycogent

 def _make_aln(self, orig, model=dna_model, param_vals=None, 
         indel_rate=0.1, indel_length=0.5, **kw):
     kw['indel_rate'] = indel_rate
     kw['indel_length'] = indel_length
     seqs = dict((key, DNA.makeSequence(value)) 
             for (key, value) in orig.items())
     if len(seqs) == 2:
         tree = cogent.LoadTree(tip_names=seqs.keys())
         tree = cogent.LoadTree(treestring="(A:.1,B:.1)")
     else:
         tree = cogent.LoadTree(treestring="(((A:.1,B:.1):.1,C:.1):.1,D:.1)")
     aln, tree = cogent.align.progressive.TreeAlign(model, seqs,
             tree=tree, param_vals=param_vals, show_progress=False, **kw)
     return aln

示例#49

0

显示文件

文件： util.py 项目： kylebittinger/pynast

def remove_template_terminal_gaps(candidate,template):
    """Remove template terminal gaps and corresponding bases in candidate 
    """
    if len(template) != len(candidate):
        raise ValueError, \
         "Sequences must be aligned, but their "+\
         "lengths aren't equal. %d != %d" % (len(candidate),len(template))
         
    if len(template) == 0:
        return candidate, template
    
    degapped_candidate_len = len(candidate.degap())
    
    candidate = DNA.makeSequence(candidate)
    template = DNA.makeSequence(template)
    
    template_gap_vector = template.gapVector()
    first_non_gap = template_gap_vector.index(False)
    num_three_prime_gaps = template_gap_vector[::-1].index(False)
    last_non_gap = len(template_gap_vector) - num_three_prime_gaps
    
    # Construct the candidate name, which will include the range of bases
    # from the original sequence
    candidate = candidate[first_non_gap:last_non_gap]
    template = template[first_non_gap:last_non_gap]
    candidate_start_pos = first_non_gap + 1
    candidate_end_pos = degapped_candidate_len - num_three_prime_gaps
    candidate_name = candidate.Name
    if candidate_name.endswith('RC'):
        name_delimiter = ':'
    else:
        name_delimiter = ' '
    candidate_name = '%s%s%d..%d' %\
     (candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos)
    
    return DNA.makeSequence(candidate,Name=candidate_name), template

示例#50

0

显示文件

def remove_template_terminal_gaps(candidate, template):
    """Remove template terminal gaps and corresponding bases in candidate 
    """
    if len(template) != len(candidate):
        raise ValueError, \
         "Sequences must be aligned, but their "+\
         "lengths aren't equal. %d != %d" % (len(candidate),len(template))

    if len(template) == 0:
        return candidate, template

    degapped_candidate_len = len(candidate.degap())

    candidate = DNA.makeSequence(candidate)
    template = DNA.makeSequence(template)

    template_gap_vector = template.gapVector()
    first_non_gap = template_gap_vector.index(False)
    num_three_prime_gaps = template_gap_vector[::-1].index(False)
    last_non_gap = len(template_gap_vector) - num_three_prime_gaps

    # Construct the candidate name, which will include the range of bases
    # from the original sequence
    candidate = candidate[first_non_gap:last_non_gap]
    template = template[first_non_gap:last_non_gap]
    candidate_start_pos = first_non_gap + 1
    candidate_end_pos = degapped_candidate_len - num_three_prime_gaps
    candidate_name = candidate.Name
    if candidate_name.endswith('RC'):
        name_delimiter = ':'
    else:
        name_delimiter = ' '
    candidate_name = '%s%s%d..%d' %\
     (candidate_name,name_delimiter,candidate_start_pos,candidate_end_pos)

    return DNA.makeSequence(candidate, Name=candidate_name), template

示例#51

0

显示文件

def _assemble_seq(frags, start, end, frag_positions):
    """returns a single string in which missing sequence is replaced by 'N'"""
    prev_end = start
    assert len(frag_positions) == len(frags), "Mismatched number of "\
                                                    "fragments and positions"
    assembled = []
    for index, (frag_start, frag_end) in enumerate(frag_positions):
        diff = frag_start - prev_end
        assert diff >= 0, 'fragment position start < previous end: %s, %s' %\
                                                (frag_start, prev_end)
        assembled += ['N' * diff, frags[index]]
        prev_end = frag_end
    diff = end - frag_end
    assert diff >= 0, 'end[%s] < previous frag_end[%s]' % (end, frag_end)
    assembled += ['N' * diff]
    return DNA.makeSequence(''.join(assembled))

示例#52

0

显示文件

    def test_simulateAlignment_root_sequence(self):
        """provide a root sequence for simulating an alignment"""
        def use_root_seq(root_sequence):
            al = LoadSeqs(data={'a': 'ggaatt', 'c': 'cctaat'})
            t = LoadTree(treestring="(a,c);")
            sm = substitution_model.Dinucleotide(mprob_model='tuple')
            lf = sm.makeParamController(t)
            lf.setAlignment(al)
            simalign = lf.simulateAlignment(exclude_internal=False,
                                            root_sequence=root_sequence)
            root = simalign.NamedSeqs['root']
            self.assertEqual(str(root), str(root_sequence))

        root_sequence = DNA.makeSequence('GTAATT')
        use_root_seq(root_sequence)  # as a sequence instance
        use_root_seq('GTAATC')  # as a string

示例#53

0

显示文件

文件： test_maps.py 项目： miklou/pycogent

 def test_maps_on_maps(self):
     seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
     feat1 = annotate(seq, 10, 20, 'fake')
     feat2 = annotate(feat1, 3, 5, 'fake2')
     feat3 = annotate(seq, 1, 3, 'left')
     
     seq2 = seq[5:]
     self.assertEqual(structure(seq), ('seq', 50,
         [('fake', '[10:20]/50',
             [('fake2', '[3:5]/10')]),
         ('left', '[1:3]/50')])
         )
     self.assertEqual(structure(seq2), ('seq', 45,
         [('fake', '[5:15]/45',
             [('fake2', '[3:5]/10')])])
         )

示例#54

0

显示文件

文件： cigar.py 项目： cxhernandez/pycogent

def CigarParser(seqs,
                cigars,
                sliced=False,
                ref_seqname=None,
                start=None,
                end=None,
                moltype=DNA):
    """return an alignment from raw sequences and cigar strings
    if sliced, will return an alignment correspondent to ref sequence start to end
    
    Arguments:
        seqs - raw sequences as {seqname: seq}
        cigars - corresponding cigar text as {seqname: cigar_text}
        cigars and seqs should have the same seqnames
        MolType - optional default to DNA
    """
    data = {}
    if not sliced:
        for seqname in list(seqs.keys()):
            aligned_seq = aligned_from_cigar(cigars[seqname],
                                             seqs[seqname],
                                             moltype=moltype)
            data[seqname] = aligned_seq
    else:
        ref_aln_seq = aligned_from_cigar(cigars[ref_seqname],
                                         seqs[ref_seqname],
                                         moltype=moltype)
        m, aln_loc = slice_cigar(cigars[ref_seqname],
                                 start,
                                 end,
                                 by_align=False)
        data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
        for seqname in [
                seqname for seqname in list(seqs.keys())
                if seqname != ref_seqname
        ]:
            m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
            if seq_loc:
                seq = seqs[seqname]
                if isinstance(seq, str):
                    seq = moltype.makeSequence(seq)
                data[seqname] = seq[seq_loc[0]:seq_loc[1]].gappedByMap(m)
            else:
                data[seqname] = DNA.makeSequence('-' *
                                                 (aln_loc[1] - aln_loc[0]))
    aln = LoadSeqs(data=data, aligned=True)
    return aln

示例#55

0

显示文件

def get_rev_primer_seqs(mapping_fp):
    """ Parses mapping file to get dictionary of SampleID:Rev primer
    mapping_fp:  mapping filepath
    """
    hds, mapping_data, run_description, errors, warnings = \
        process_id_map(mapping_fp, has_barcodes=False,
         disable_primer_check=True)
        
    if errors:
        for curr_err in errors:
            if curr_err.startswith("Duplicate SampleID"):
                raise ValueError,('Errors were found with mapping file, '+\
                 'please run check_id_map.py to identify problems.')
         
    # create dict of dicts with SampleID:{each header:mapping data}
    
    id_map = {}
    
    for curr_data in mapping_data:
        id_map[curr_data[0]] = {}
        
    
    for header in range(len(hds)):
        for curr_data in mapping_data:
            id_map[curr_data[0]][hds[header]] = curr_data[header]
    
    reverse_primers = {}
    
    for curr_id in id_map.keys():
        try:
            reverse_primers[curr_id] =\
             [DNA.rc(curr_rev_primer) for curr_rev_primer in\
             id_map[curr_id]['ReversePrimer'].split(',')]
        except KeyError:
            raise KeyError,("Reverse primer not found in mapping file, "+\
             "please include a 'ReversePrimer' column.")

             
    # Check for valid reverse primers
    # Will have been detected as warnings from mapping file
    for curr_err in errors:
        if curr_err.startswith("Invalid DNA sequence detected"):
            raise ValueError,("Problems found with reverse primers, please "+\
             "check mapping file with check_id_map.py")
    
    return reverse_primers

示例#56

0

显示文件

    def test_other_repeat(self):
        """should apply repeat feature data in a manner consistent with strand"""
        coord = dict(CoordName=13, Start=32890200, End=32890500)
        ps_repeat = self.human.getRegion(Strand=1, **coord)
        ms_repeat = self.human.getRegion(Strand=-1, **coord)
        exp = DNA.makeSequence('CTTACTGTGAGGATGGGAACATTTTACAGCTGTGCTG'\
          'TCCAAACCGGTGCCACTAGCCACATTAAGCACTCGAAACGTGGCTAGTGCGACTAGAGAAGAGGA'\
          'TTTTCATACGATTTAGTTTCAATCACGCTAACCAGTGACGCGTGGCTAGTGG')

        self.assertEquals(ms_repeat.Seq, ps_repeat.Seq.rc())

        ps_annot_seq = ps_repeat.getAnnotatedSeq(feature_types='repeat')
        ms_annot_seq = ms_repeat.getAnnotatedSeq(feature_types='repeat')
        ps_seq = ps_annot_seq.getAnnotationsMatching('repeat')[0]
        ms_seq = ms_annot_seq.getAnnotationsMatching('repeat')[0]
        self.assertEquals(ms_seq.getSlice(), ps_seq.getSlice())
        self.assertEquals(ps_seq.getSlice(), exp)

示例#57

0

显示文件

 def test_getByAnnotation(self):
     seq = DNA.makeSequence('ATCGATCGAT' * 5, Name='base')
     seq.addAnnotation(Feature, 'test_type', 'test_label', [(5,10)])
     seq.addAnnotation(Feature, 'test_type', 'test_label2', [(15,18)])
     
     answer = list(seq.getByAnnotation('test_type'))
     self.assertEqual( len(answer), 2)
     self.assertEqual( str(answer[0]), 'TCGAT')
     self.assertEqual( str(answer[1]), 'TCG')
     
     answer = list(seq.getByAnnotation('test_type', 'test_label'))
     self.assertEqual( len(answer), 1)
     self.assertEqual( str(answer[0]), 'TCGAT')
     
     # test ignoring of a partial annotation
     sliced_seq = seq[:17]
     answer = list(sliced_seq.getByAnnotation('test_type', ignore_partial=True))
     self.assertEqual(len(answer), 1)
     self.assertEqual( str(answer[0]), 'TCGAT')

示例#58

0

显示文件

    def test_other_repeat(self):
        """should apply repeat feature data in a manner consistent with strand"""
        coord = dict(CoordName=13, Start=32316063, End=32316363)
        # 13:32316063 -32316363
        ps_repeat = self.human.getRegion(Strand=1, **coord)
        ms_repeat = self.human.getRegion(Strand=-1, **coord)
        # note this MER3 repeat is annotated on the -1 strand
        exp = DNA.makeSequence('AGCTTACTGTGAGGATGGGAACATTTTACAGCTGTGCTGTCCAAA'\
                'CCGGTGCCACTAGCCACATTAAGCACTCGAAACGTGGCTAGTGCGACTAGAGAAGAGGAT'\
                'TTTCATACGATTTAGTTTCAATCACGCTAACCAGTGACGCGTGGCTAGTGG')

        self.assertEqual(ms_repeat.Seq, ps_repeat.Seq.rc())

        ps_annot_seq = ps_repeat.getAnnotatedSeq(feature_types='repeat')
        ms_annot_seq = ms_repeat.getAnnotatedSeq(feature_types='repeat')
        ps_seq = ps_annot_seq.getAnnotationsMatching('repeat')[0]
        ms_seq = ms_annot_seq.getAnnotationsMatching('repeat')[0]
        self.assertEqual(ms_seq.getSlice(), ps_seq.getSlice())
        self.assertEqual(ps_seq.getSlice(), exp)