Пример #1
0
    def test_binaries(self):
        'It test the binaries'
        interleave_bin = os.path.join(BIN_DIR, 'interleave_pairs')
        deinterleave_bin = os.path.join(BIN_DIR, 'deinterleave_pairs')
        assert 'usage' in check_output([interleave_bin, '-h'])
        assert 'usage' in check_output([deinterleave_bin, '-h'])

        in_fpath1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        in_fpath2 = os.path.join(TEST_DATA_DIR, 'pairend1b.sfastq')
        out_fhand = NamedTemporaryFile()
        check_output(
            [interleave_bin, '-o', out_fhand.name, in_fpath1, in_fpath2])

        result = open(out_fhand.name).read()
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result
        assert '@seq5:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result

        out_fhand1 = NamedTemporaryFile()
        out_fhand2 = NamedTemporaryFile()
        check_output([
            deinterleave_bin, '-o', out_fhand1.name, out_fhand2.name,
            out_fhand.name
        ])
        assert open(in_fpath1).read() == open(out_fhand1.name).read()
        assert open(in_fpath2).read() == open(out_fhand2.name).read()

        out_fhand1 = NamedTemporaryFile()
        out_fhand2 = NamedTemporaryFile()
        check_output([
            deinterleave_bin, '-o', out_fhand1.name, out_fhand2.name,
            out_fhand.name, '-Z'
        ])

        assert open(in_fpath1).read() == BgzfReader(out_fhand1.name).read(2000)
        assert open(in_fpath2).read() == BgzfReader(out_fhand2.name).read(2000)

        # skip checks
        in_fpath1 = os.path.join(TEST_DATA_DIR, 'pairend1.sfastq')
        in_fpath2 = os.path.join(TEST_DATA_DIR, 'pairend2.sfastq')
        out_fhand = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        ret_code = call(
            [interleave_bin, '-o', out_fhand.name, in_fpath1, in_fpath2],
            stderr=stderr)
        assert int(ret_code)
        assert 'from the two files do not match' in open(stderr.name).read()
        check_output(
            [interleave_bin, '-o', out_fhand.name, '-s', in_fpath1, in_fpath2])
        result = open(out_fhand.name).read()
        assert 'seq4:136:FC706VJ:2:2104:15343:197393' in result
        assert 'seq3:136:FC706VJ:2:2104:15343:197393' in result
Пример #2
0
    def test_pair_matcher_bin(self):
        'It test the pair matcher binary'
        pair_matcher_bin = os.path.join(SEQ_BIN_DIR, 'pair_matcher')
        assert 'usage' in check_output([pair_matcher_bin, '-h'])

        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath
        ])

        result = open(out_fhand.name).read()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = open(orphan_fhand.name).read()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # compressed output
        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath, '-Z'
        ])
        result = BgzfReader(out_fhand.name).read(2000)
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = BgzfReader(orphan_fhand.name).read(2000)
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend6.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath, '-u'
        ])

        result = open(out_fhand.name).read()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = open(orphan_fhand.name).read()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
Пример #3
0
 def __init__(self, pbiFilename, chunk_size=10000000):
     self._chunk_start = None
     self._pbiFilename = abspath(expanduser(pbiFilename))
     self._get_blocks()
     with BgzfReader(self._pbiFilename, mode='rb') as f:
         self._loadHeader(f)
         holeNumbers = self._loadMainIndex(f,
                                           self._to_virtual_offset,
                                           zmw_only=True)
         self._make_chunks(chunk_size, holeNumbers)
Пример #4
0
 def __init__(self,
              pbiFilename,
              chunk_start=None,
              chunk_size=None,
              to_virtual_offset=None):
     self._chunk_start = chunk_start
     self._chunk_size = chunk_size
     pbiFilename = abspath(expanduser(pbiFilename))
     with BgzfReader(pbiFilename, mode='rb') as f:
         self._loadHeader(f)
         self._loadMainIndex(f, to_virtual_offset)
         self._loadOffsets(f)
Пример #5
0
    def test_pair_matcher_bin(self):
        'It test the pair matcher binary'
        pair_matcher_bin = os.path.join(BIN_DIR, 'pair_matcher')
        assert 'usage' in check_output([pair_matcher_bin, '-h'])

        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath
        ])

        result = open(out_fhand.name).read()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = open(orphan_fhand.name).read()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        stderr = NamedTemporaryFile()
        try:
            check_output([
                pair_matcher_bin, '-o', out_fhand.name, '-p',
                orphan_fhand.name, in_fpath, '-l', '1'
            ],
                         stderr=stderr)
            self.fail('error expected')
        except CalledProcessError:
            assert 'There are too many consecutive' in open(stderr.name).read()

        # compressed output
        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend5.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath, '-Z'
        ])
        result = BgzfReader(out_fhand.name).read(2000)
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = BgzfReader(orphan_fhand.name).read(2000)
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp

        # unordered file
        in_fpath = os.path.join(TEST_DATA_DIR, 'pairend6.sfastq')
        out_fhand = NamedTemporaryFile()
        orphan_fhand = NamedTemporaryFile()
        check_output([
            pair_matcher_bin, '-o', out_fhand.name, '-p', orphan_fhand.name,
            in_fpath, '-u'
        ])

        result = open(out_fhand.name).read()
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG' in result
        assert '@seq1:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in result

        orp = open(orphan_fhand.name).read()
        assert '@seq8:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG' in orp
Пример #6
0
 def __init__(self, pbiFilename):
     pbiFilename = abspath(expanduser(pbiFilename))
     with BgzfReader(pbiFilename, mode='rb') as f:
         self._loadHeader(f)