示例#1
0
    def test_illumina18_variant(self):
        # test entire range of possible ascii chars for illumina1.8
        all_illumina18_ascii = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL" "MNOPQRSTUVWXYZ[\\]^_"
        obs = _decode_qual_to_phred(all_illumina18_ascii, variant="illumina1.8")
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred("AaB", variant="illumina1.8")
        self.assertIn("[0, 62]", str(cm.exception))
示例#2
0
    def test_illumina13_variant(self):
        # test entire range of possible ascii chars for illumina1.3
        all_illumina13_ascii = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk" "lmnopqrstuvwxyz{|}~"
        obs = _decode_qual_to_phred(all_illumina13_ascii, variant="illumina1.3")
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred("a!b", variant="illumina1.3")
        self.assertIn("[0, 62]", str(cm.exception))
示例#3
0
    def test_sanger_variant(self):
        # test entire range of possible ascii chars for sanger
        all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP'
                            'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_sanger_ascii, variant='sanger')
        npt.assert_equal(obs, np.arange(94))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a b', variant='sanger')
        self.assertIn('[0, 93]', str(cm.exception))
示例#4
0
    def test_sanger_variant(self):
        # test entire range of possible ascii chars for sanger
        all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP'
                            'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_sanger_ascii, variant='sanger')
        npt.assert_equal(obs, np.arange(94))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a b', variant='sanger')
        self.assertIn('[0, 93]', str(cm.exception))
示例#5
0
    def test_illumina13_variant(self):
        # test entire range of possible ascii chars for illumina1.3
        all_illumina13_ascii = ('@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk'
                                'lmnopqrstuvwxyz{|}~')
        obs = _decode_qual_to_phred(all_illumina13_ascii,
                                    variant='illumina1.3')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('a!b', variant='illumina1.3')
        self.assertIn('[0, 62]', str(cm.exception))
示例#6
0
    def test_illumina18_variant(self):
        # test entire range of possible ascii chars for illumina1.8
        all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL'
                                'MNOPQRSTUVWXYZ[\\]^_')
        obs = _decode_qual_to_phred(all_illumina18_ascii,
                                    variant='illumina1.8')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('AaB', variant='illumina1.8')
        self.assertIn('[0, 62]', str(cm.exception))
示例#7
0
    def test_illumina18_variant(self):
        # test entire range of possible ascii chars for illumina1.8
        all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL'
                                'MNOPQRSTUVWXYZ[\\]^_')
        obs = _decode_qual_to_phred(all_illumina18_ascii,
                                    variant='illumina1.8')
        npt.assert_equal(obs, np.arange(63))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred('AaB', variant='illumina1.8')
        self.assertIn('[0, 62]', str(cm.exception))
示例#8
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset, prev):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh, skip_blanks=False):
        if chunk:
            if chunk.startswith('@') and qual_len == seq_len:
                return np.hstack(phred_scores), chunk
            else:
                if not prev:
                    _blank_error("after '+' or within quality scores")
                qual_len += len(chunk)

                if qual_len > seq_len:
                    raise FASTQFormatError(
                        "Found more quality score characters than sequence "
                        "characters. Extra quality score characters: %r" %
                        chunk[-(qual_len - seq_len):])

                phred_scores.append(
                    _decode_qual_to_phred(chunk,
                                          variant=variant,
                                          phred_offset=phred_offset))
        prev = chunk

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return np.hstack(phred_scores), None
示例#9
0
def _parse_quality_scores(fh, seq_len, variant, phred_offset, prev):
    phred_scores = []
    qual_len = 0
    for chunk in _line_generator(fh, skip_blanks=False):
        if chunk:
            if chunk.startswith('@') and qual_len == seq_len:
                return np.hstack(phred_scores), chunk
            else:
                if not prev:
                    _blank_error("after '+' or within quality scores")
                qual_len += len(chunk)

                if qual_len > seq_len:
                    raise FASTQFormatError(
                        "Found more quality score characters than sequence "
                        "characters. Extra quality score characters: %r" %
                        chunk[-(qual_len - seq_len):])

                phred_scores.append(
                    _decode_qual_to_phred(chunk, variant=variant,
                                          phred_offset=phred_offset))
        prev = chunk

    if qual_len != seq_len:
        raise FASTQFormatError(
            "Found incomplete/truncated FASTQ record at end of file.")
    return np.hstack(phred_scores), None
示例#10
0
def _qseq_to_generator(
    fh,
    constructor=Sequence,
    filter=_will_filter,
    phred_offset=_default_phred_offset,
    variant=_default_variant,
    **kwargs
):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual, filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = "%s_%s:%s:%s:%s:%s#%s/%s" % (machine_name, run, lane, tile, x, y, index, read)
            yield constructor(
                seq,
                metadata={
                    "id": seq_id,
                    "machine_name": machine_name,
                    "run_number": int(run),
                    "lane_number": int(lane),
                    "tile_number": int(tile),
                    "x": int(x),
                    "y": int(y),
                    "index": int(index),
                    "read_number": int(read),
                },
                positional_metadata={"quality": phred},
                **kwargs
            )
示例#11
0
def _qseq_to_generator(fh,
                       constructor=Sequence,
                       filter=_will_filter,
                       phred_offset=_default_phred_offset,
                       variant=_default_variant,
                       **kwargs):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual,
         filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = '%s_%s:%s:%s:%s:%s#%s/%s' % (machine_name, run, lane,
                                                  tile, x, y, index, read)
            yield constructor(seq,
                              metadata={
                                  'id': seq_id,
                                  'machine_name': machine_name,
                                  'run_number': int(run),
                                  'lane_number': int(lane),
                                  'tile_number': int(tile),
                                  'x': int(x),
                                  'y': int(y),
                                  'index': int(index),
                                  'read_number': int(read)
                              },
                              positional_metadata={'quality': phred},
                              **kwargs)
示例#12
0
    def test_custom_phred_offset(self):
        ascii_chars = '*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\'
        obs = _decode_qual_to_phred(ascii_chars, phred_offset=42)
        npt.assert_equal(obs, np.arange(51))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=43)
        self.assertIn('[0, 83]', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=0)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=127)
        self.assertIn('`phred_offset`', str(cm.exception))
        self.assertIn('printable', str(cm.exception))
示例#13
0
    def test_custom_phred_offset(self):
        ascii_chars = "*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\"
        obs = _decode_qual_to_phred(ascii_chars, phred_offset=42)
        npt.assert_equal(obs, np.arange(51))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=43)
        self.assertIn("[0, 83]", str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=0)
        self.assertIn("`phred_offset`", str(cm.exception))
        self.assertIn("printable", str(cm.exception))

        with self.assertRaises(ValueError) as cm:
            _decode_qual_to_phred(ascii_chars, phred_offset=127)
        self.assertIn("`phred_offset`", str(cm.exception))
        self.assertIn("printable", str(cm.exception))
示例#14
0
文件: qseq.py 项目: hainm/scikit-bio
def _qseq_to_generator(fh, constructor=Sequence, filter=_will_filter,
                       phred_offset=_default_phred_offset,
                       variant=_default_variant, **kwargs):
    for line in fh:
        (machine_name, run, lane, tile, x, y, index, read, seq, raw_qual,
         filtered) = _record_parser(line)
        if not filter or not filtered:
            phred = _decode_qual_to_phred(raw_qual, variant, phred_offset)
            seq_id = '%s_%s:%s:%s:%s:%s#%s/%s' % (
                machine_name, run, lane, tile, x, y, index, read)
            yield constructor(seq, metadata={'id': seq_id,
                                             'machine_name': machine_name,
                                             'run_number': int(run),
                                             'lane_number': int(lane),
                                             'tile_number': int(tile),
                                             'x': int(x),
                                             'y': int(y),
                                             'index': int(index),
                                             'read_number': int(read)},
                              positional_metadata={'quality': phred},
                              **kwargs)
示例#15
0
 def test_missing_variant_and_phred_offset(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd')
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
     self.assertIn('decode', str(cm.exception))
示例#16
0
 def test_solexa_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='solexa')
     self.assertIn('719', str(cm.exception))
示例#17
0
 def test_missing_variant_and_phred_offset(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred("abcd")
     self.assertIn("`variant`", str(cm.exception))
     self.assertIn("`phred_offset`", str(cm.exception))
     self.assertIn("decode", str(cm.exception))
示例#18
0
 def test_variant_and_phred_offset_provided(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred("abcd", variant="sanger", phred_offset=64)
     self.assertIn("both", str(cm.exception))
     self.assertIn("`variant`", str(cm.exception))
     self.assertIn("`phred_offset`", str(cm.exception))
示例#19
0
 def test_empty_qual_str(self):
     npt.assert_equal(_decode_qual_to_phred('', variant='sanger'),
                      np.array([], dtype=np.uint8))
示例#20
0
 def test_unrecognized_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='illumina')
     self.assertIn('variant', str(cm.exception))
     self.assertIn("'illumina'", str(cm.exception))
示例#21
0
 def test_solexa_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='solexa')
     self.assertIn('719', str(cm.exception))
示例#22
0
 def test_variant_and_phred_offset_provided(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='sanger', phred_offset=64)
     self.assertIn('both', str(cm.exception))
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
示例#23
0
 def test_missing_variant_and_phred_offset(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd')
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
     self.assertIn('decode', str(cm.exception))
示例#24
0
 def test_solexa_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred("abcd", variant="solexa")
     self.assertIn("719", str(cm.exception))
示例#25
0
 def test_variant_and_phred_offset_provided(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred('abcd', variant='sanger', phred_offset=64)
     self.assertIn('both', str(cm.exception))
     self.assertIn('`variant`', str(cm.exception))
     self.assertIn('`phred_offset`', str(cm.exception))
示例#26
0
 def test_empty_qual_str(self):
     npt.assert_equal(_decode_qual_to_phred("", variant="sanger"), np.array([], dtype=np.uint8))
示例#27
0
 def test_unrecognized_variant(self):
     with self.assertRaises(ValueError) as cm:
         _decode_qual_to_phred("abcd", variant="illumina")
     self.assertIn("variant", str(cm.exception))
     self.assertIn("'illumina'", str(cm.exception))