Exemplo n.º 1
0
 def bioseq_to_bwa_seq(self):
   nseq = 100
   for fmt in "fastq-sanger", "fastq-illumina", "fastq-solexa":
     qkey = "solexa_quality" if fmt == "fastq-solexa" else "phred_quality"
     for unknown in False, True:
       g = u.random_reads_generator(nseq, fmt=fmt, unknown=unknown)
       for i, (seq, q) in enumerate(g):
         name = "foo-%d" % i
         seq_str = "".join(seq)
         bioseq = SeqRecord(Seq(seq_str, single_letter_alphabet),
                            id=name, name=name, description=name)
         bioseq.letter_annotations[qkey] = q
         n, m = len(bioseq), len(bioseq.name)
         bwseq = bwa.alloc_seq(1, n, m)[0]
         bwa.bioseq_to_bwa_seq(bioseq, bwseq, n, m, fmt)
         self.assertEqual(bioseq.name, bwseq.get_name())
         self.assertEqual(bioseq.seq.data[::-1], bwseq.get_seq())
         self.assertEqual(reverse_complement(bioseq.seq.data),
                          bwseq.get_rseq())
         # check that quality has been converted to sanger
         if fmt == "fastq-solexa":
           exp_q = [int(round(x+10*math.log10(1+10**(-x/10.)))) for x in q]
         else:
           exp_q = q
         exp_qstr = "".join(chr(x+sg.Q_OFFSET["fastq-sanger"]) for x in exp_q)
         self.assertEqual(bwseq.get_qual(), exp_qstr)
Exemplo n.º 2
0
 def bioseq_to_bwa_seq(self):
     nseq = 100
     for fmt in "fastq-sanger", "fastq-illumina", "fastq-solexa":
         qkey = "solexa_quality" if fmt == "fastq-solexa" else "phred_quality"
         for unknown in False, True:
             g = u.random_reads_generator(nseq, fmt=fmt, unknown=unknown)
             for i, (seq, q) in enumerate(g):
                 name = "foo-%d" % i
                 seq_str = "".join(seq)
                 bioseq = SeqRecord(Seq(seq_str, single_letter_alphabet),
                                    id=name,
                                    name=name,
                                    description=name)
                 bioseq.letter_annotations[qkey] = q
                 n, m = len(bioseq), len(bioseq.name)
                 bwseq = bwa.alloc_seq(1, n, m)[0]
                 bwa.bioseq_to_bwa_seq(bioseq, bwseq, n, m, fmt)
                 self.assertEqual(bioseq.name, bwseq.get_name())
                 self.assertEqual(bioseq.seq.data[::-1], bwseq.get_seq())
                 self.assertEqual(reverse_complement(bioseq.seq.data),
                                  bwseq.get_rseq())
                 # check that quality has been converted to sanger
                 if fmt == "fastq-solexa":
                     exp_q = [
                         int(round(x + 10 * math.log10(1 + 10**(-x / 10.))))
                         for x in q
                     ]
                 else:
                     exp_q = q
                 exp_qstr = "".join(
                     chr(x + sg.Q_OFFSET["fastq-sanger"]) for x in exp_q)
                 self.assertEqual(bwseq.get_qual(), exp_qstr)
Exemplo n.º 3
0
 def build_bws_array(self):
     # FIXME: plenty of UGLY code
     nseq = 100
     for fmt in "fastq-sanger", "fastq-illumina", "fastq-solexa":
         qkey = "solexa_quality" if fmt == "fastq-solexa" else "phred_quality"
         for unknown in False, True:
             g = u.random_reads_generator(nseq,
                                          fmt=fmt,
                                          unknown=unknown,
                                          pe=True)
             bioseq_pairs, qseq_pairs = [], []
             for i, read_pair in enumerate(g):
                 base_name = "foo-%d" % i
                 names = ["%s/%d" % (base_name, j) for j in 1, 2]
                 seq_strings = ["".join(read_pair[j][0]) for j in 0, 1]
                 q_strings = [
                     "".join(
                         chr(x + sg.Q_OFFSET[fmt]) for x in read_pair[j][1])
                     for j in 0, 1
                 ]
                 bioseq_p = [
                     SeqRecord(Seq(seq_strings[j], single_letter_alphabet),
                               id=names[j],
                               name=names[j],
                               description=names[j]) for j in 0, 1
                 ]
                 for j in 0, 1:
                     bioseq_p[j].letter_annotations[qkey] = read_pair[j][1]
                 bioseq_pairs.append(bioseq_p)
                 qseq_pairs.append([base_name])
                 for j in 0, 1:
                     for l in seq_strings, q_strings:
                         qseq_pairs[-1].append(l[j])
             n = len(bioseq_pairs[0][0])
             assert len(qseq_pairs[0][1]) == n
             for src in "bioseq", "qseq":
                 seq_pairs = bioseq_pairs if src == "bioseq" else qseq_pairs
                 bwsa = bwa.build_bws_array(seq_pairs, qtype=fmt, src=src)
                 for j in 0, 1:
                     self.assertTrue(type(bwsa[j]) is bwa.bwa_seq_p_t)
                     for i in xrange(nseq):
                         bwseq = bwsa[j][i]
                         self.assertEqual(bwseq.len, n)
                         if src == "bioseq":
                             exp_name = bioseq_pairs[i][0].name
                         else:
                             exp_name = "%s/%d" % (qseq_pairs[i][0],
                                                   (j + 1))
                         self.assertEqual(len(bwseq.get_name()),
                                          len(exp_name))
Exemplo n.º 4
0
 def build_bws_array(self):
   # FIXME: plenty of UGLY code
   nseq = 100
   for fmt in "fastq-sanger", "fastq-illumina", "fastq-solexa":
     qkey = "solexa_quality" if fmt == "fastq-solexa" else "phred_quality"
     for unknown in False, True:
       g = u.random_reads_generator(nseq, fmt=fmt, unknown=unknown, pe=True)
       bioseq_pairs, qseq_pairs = [], []
       for i, read_pair in enumerate(g):
         base_name = "foo-%d" % i
         names = ["%s/%d" % (base_name, j) for j in 1, 2]
         seq_strings = ["".join(read_pair[j][0]) for j in 0, 1]
         q_strings = ["".join(chr(x+sg.Q_OFFSET[fmt]) for x in read_pair[j][1])
                      for j in 0, 1]
         bioseq_p = [SeqRecord(Seq(seq_strings[j], single_letter_alphabet),
                               id=names[j],
                               name=names[j],
                               description=names[j]) for j in 0, 1]
         for j in 0, 1:
           bioseq_p[j].letter_annotations[qkey] = read_pair[j][1]
         bioseq_pairs.append(bioseq_p)
         qseq_pairs.append([base_name])
         for j in 0, 1:
           for l in seq_strings, q_strings:
             qseq_pairs[-1].append(l[j])
       n = len(bioseq_pairs[0][0])
       assert len(qseq_pairs[0][1]) == n
       for src in "bioseq", "qseq":
         seq_pairs = bioseq_pairs if src == "bioseq" else qseq_pairs
         bwsa = bwa.build_bws_array(seq_pairs, qtype=fmt, src=src)
         for j in 0, 1:
           self.assertTrue(type(bwsa[j]) is bwa.bwa_seq_p_t)
           for i in xrange(nseq):
             bwseq = bwsa[j][i]
             self.assertEqual(bwseq.len, n)
             if src == "bioseq":
               exp_name = bioseq_pairs[i][0].name
             else:
               exp_name = "%s/%d" % (qseq_pairs[i][0], (j+1))
             self.assertEqual(len(bwseq.get_name()), len(exp_name))