Exemplo n.º 1
0
def test_split_paired_reads_1_fa():
    # test input file
    infile = utils.get_test_data('paired.fa')

    ex_outfile1 = utils.get_test_data('paired.fa.1')
    ex_outfile2 = utils.get_test_data('paired.fa.2')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired.fa.1')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('paired.fa.2', in_dir)

    script = 'split-paired-reads.py'
    args = [infile]

    utils.runscript(script, args, in_dir)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 2
0
def test_split_paired_reads_3_output_files_right():
    # test input file
    infile = utils.get_test_data('paired.fq')

    ex_outfile1 = utils.get_test_data('paired.fq.1')
    ex_outfile2 = utils.get_test_data('paired.fq.2')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired.fq.1')
    output_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('yyy', output_dir)

    script = 'split-paired-reads.py'
    args = ['-2', outfile2, '-d', output_dir, infile]

    utils.runscript(script, args)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0
Exemplo n.º 3
0
def test_extract_paired_reads_3_output_dir():
    # test input file
    infile = utils.get_test_data('paired-mixed.fa')

    ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fa.se')

    # output directory
    out_dir = utils.get_temp_filename('output')

    script = 'extract-paired-reads.py'
    args = [infile, '-d', out_dir]

    utils.runscript(script, args)

    outfile1 = os.path.join(out_dir, 'paired-mixed.fa.pe')
    outfile2 = os.path.join(out_dir, 'paired-mixed.fa.se')
    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 4
0
def test_extract_paired_reads_2_fq():
    # test input file
    infile = utils.get_test_data('paired-mixed.fq')

    ex_outfile1 = utils.get_test_data('paired-mixed.fq.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fq.se')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired-mixed.fq.pe')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('paired-mixed.fq.se', in_dir)

    script = 'extract-paired-reads.py'
    args = [infile]

    utils.runscript(script, args, in_dir)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name, (r.name, q.name, n)
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0
Exemplo n.º 5
0
def test_extract_paired_reads_3_output_dir():
    # test input file
    infile = utils.get_test_data('paired-mixed.fa')

    ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fa.se')

    # output directory
    out_dir = utils.get_temp_filename('output')

    script = 'extract-paired-reads.py'
    args = [infile, '-d', out_dir]

    utils.runscript(script, args)

    outfile1 = os.path.join(out_dir, 'paired-mixed.fa.pe')
    outfile2 = os.path.join(out_dir, 'paired-mixed.fa.se')
    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 6
0
def test_extract_paired_reads_4_output_files():
    # test input file
    infile = utils.get_test_data('paired-mixed.fa')

    ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fa.se')

    # actual output files...
    outfile1 = utils.get_temp_filename('out_pe')
    outfile2 = utils.get_temp_filename('out_se')

    script = 'extract-paired-reads.py'
    args = [infile, '-p', outfile1, '-s', outfile2]

    utils.runscript(script, args)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 7
0
def test_split_paired_reads_1_fa():
    # test input file
    infile = utils.get_test_data('paired.fa')

    ex_outfile1 = utils.get_test_data('paired.fa.1')
    ex_outfile2 = utils.get_test_data('paired.fa.2')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired.fa.1')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('paired.fa.2', in_dir)

    script = 'split-paired-reads.py'
    args = [infile]

    utils.runscript(script, args, in_dir)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 8
0
def test_extract_paired_reads_4_output_files():
    # test input file
    infile = utils.get_test_data('paired-mixed.fa')

    ex_outfile1 = utils.get_test_data('paired-mixed.fa.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fa.se')

    # actual output files...
    outfile1 = utils.get_temp_filename('out_pe')
    outfile2 = utils.get_temp_filename('out_se')

    script = 'extract-paired-reads.py'
    args = [infile, '-p', outfile1, '-s', outfile2]

    utils.runscript(script, args)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 9
0
def test_split_paired_reads_3_output_files_right():
    # test input file
    infile = utils.get_test_data('paired.fq')

    ex_outfile1 = utils.get_test_data('paired.fq.1')
    ex_outfile2 = utils.get_test_data('paired.fq.2')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired.fq.1')
    output_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('yyy', output_dir)

    script = 'split-paired-reads.py'
    args = ['-2', outfile2, '-d', output_dir, infile]

    utils.runscript(script, args)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0
Exemplo n.º 10
0
def test_split_paired_reads_2_mixed_fq_orphans_to_file():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)
    outfile = utils.get_temp_filename('out.fq')

    script = 'split-paired-reads.py'
    args = ['-0', outfile, infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err

    n_orphans = len([1 for record in screed.open(outfile)])
    assert n_orphans == 5
    n_left = len([1 for record in screed.open(infile + '.1')])
    assert n_left == 3
    n_right = len([1 for record in screed.open(infile + '.2')])
    assert n_right == 3
    for filename in [outfile, infile + '.1', infile + '.2']:
        fp = gzip.open(filename)
        try:
            fp.read()
        except IOError as e:
            assert "Not a gzipped file" in str(e), str(e)
        fp.close()
Exemplo n.º 11
0
def test_split_paired_reads_2_stdin_no_out():
    script = 'split-paired-reads.py'
    args = ['-']

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "Accepting input from stdin; output filenames must " in err
Exemplo n.º 12
0
def test_extract_paired_reads_5_stdin_error():
    script = 'extract-paired-reads.py'
    args = ['-f', '/dev/stdin']

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "output filenames must be provided." in err
Exemplo n.º 13
0
def test_interleave_read_stdout():
    # create input files
    infile1 = utils.get_test_data('paired-slash1.fq.1')
    infile2 = utils.get_test_data('paired-slash1.fq.2')

    # correct output
    ex_outfile = utils.get_test_data('paired-slash1.fq')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2]

    (stats, out, err) = utils.runscript(script, args)

    with open(outfile, 'w') as ofile:
        ofile.write(out)

    n = 0
    for r, q in zip(screed.open(ex_outfile), screed.open(outfile)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 14
0
def test_split_paired_reads_2_stdin_no_out():
    script = 'split-paired-reads.py'
    args = ['-']

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "Accepting input from stdin; output filenames must " in err
Exemplo n.º 15
0
def test_split_paired_reads_2_mixed_fq_orphans_to_file():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)
    outfile = utils.get_temp_filename('out.fq')

    script = 'split-paired-reads.py'
    args = ['-0', outfile, infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err

    n_orphans = len([1 for record in screed.open(outfile)])
    assert n_orphans == 5
    n_left = len([1 for record in screed.open(infile + '.1')])
    assert n_left == 3
    n_right = len([1 for record in screed.open(infile + '.2')])
    assert n_right == 3
    for filename in [outfile, infile + '.1', infile + '.2']:
        fp = gzip.open(filename)
        try:
            fp.read()
        except IOError as e:
            assert "Not a gzipped file" in str(e), str(e)
        fp.close()
Exemplo n.º 16
0
def test_interleave_reads_no_reformat():
    infile1 = utils.get_test_data('paired.fq.1')
    infile2 = utils.get_test_data('paired.malformat.fq.2')

    ex_outfile = utils.get_test_data('paired.malformat.fq')
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '--no-reformat', '-o', outfile]

    utils.runscript(script, args)

    r = open(ex_outfile).read()
    q = open(outfile).read()

    assert r == q, (r, q)
Exemplo n.º 17
0
def test_interleave_reads_no_reformat():
    infile1 = utils.get_test_data('paired.fq.1')
    infile2 = utils.get_test_data('paired.malformat.fq.2')

    ex_outfile = utils.get_test_data('paired.malformat.fq')
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '--no-reformat', '-o', outfile]

    utils.runscript(script, args)

    r = open(ex_outfile).read()
    q = open(outfile).read()

    assert r == q, (r, q)
Exemplo n.º 18
0
def test_interleave_read_stdout():
    # create input files
    infile1 = utils.get_test_data('paired-slash1.fq.1')
    infile2 = utils.get_test_data('paired-slash1.fq.2')

    # correct output
    ex_outfile = utils.get_test_data('paired-slash1.fq')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2]

    (stats, out, err) = utils.runscript(script, args)

    with open(outfile, 'w') as ofile:
        ofile.write(out)

    n = 0
    for r, q in zip(screed.open(ex_outfile), screed.open(outfile)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 19
0
def test_extract_paired_reads_5_stdin_error():
    script = 'extract-paired-reads.py'
    args = ['-f', '/dev/stdin']

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "output filenames must be provided." in err
Exemplo n.º 20
0
def test_interleave_reads_1_fq():
    # test input files
    infile1 = utils.get_test_data('paired.fq.1')
    infile2 = utils.get_test_data('paired.fq.2')

    # correct output
    ex_outfile = utils.get_test_data('paired.fq')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    utils.runscript(script, args)

    r = open(ex_outfile).read()
    q = open(outfile).read()

    assert r == q, (r, q)
Exemplo n.º 21
0
def test_split_paired_reads_2_mixed_fq_require_pair():
    # test input file
    infile = utils.copy_test_data('paired-mixed.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = [infile]

    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
    assert status == 1, status
    assert "Unpaired reads found" in err
Exemplo n.º 22
0
def test_split_paired_reads_2_mixed_fq():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = ['-0', '/dev/null', infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err
Exemplo n.º 23
0
def test_split_paired_reads_2_mixed_fq_broken_pairing_format():
    # test input file
    infile = utils.copy_test_data('paired-mixed-broken.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = [infile]

    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
    assert status == 1
    assert "Unpaired reads found starting at 895:1:37:17593:9954" in err, err
Exemplo n.º 24
0
def test_interleave_reads_1_fq():
    # test input files
    infile1 = utils.get_test_data('paired.fq.1')
    infile2 = utils.get_test_data('paired.fq.2')

    # correct output
    ex_outfile = utils.get_test_data('paired.fq')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    utils.runscript(script, args)

    r = open(ex_outfile).read()
    q = open(outfile).read()

    assert r == q, (r, q)
Exemplo n.º 25
0
def test_split_paired_reads_2_mixed_fq():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = ['-0', '/dev/null', infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err
Exemplo n.º 26
0
def test_split_paired_reads_2_mixed_fq_broken_pairing_format():
    # test input file
    infile = utils.copy_test_data('paired-mixed-broken.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = [infile]

    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
    assert status == 1
    assert "Unpaired reads found starting at 895:1:37:17593:9954" in err, err
Exemplo n.º 27
0
def test_split_paired_reads_2_mixed_fq_require_pair():
    # test input file
    infile = utils.copy_test_data('paired-mixed.fq')
    in_dir = os.path.dirname(infile)

    script = 'split-paired-reads.py'
    args = [infile]

    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
    assert status == 1, status
    assert "Unpaired reads found" in err
Exemplo n.º 28
0
def test_interleave_read_badleft_badright():
    # create input files
    infile1 = utils.get_test_data('paired-broken.fq.badleft')
    infile2 = utils.get_test_data('paired-broken.fq.badright')

    # correct output
    ex_outfile = utils.get_test_data('paired-broken.fq.paired_bad')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    utils.runscript(script, args)

    n = 0
    for r, q in zip(screed.open(ex_outfile), screed.open(outfile)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 29
0
def test_interleave_read_seq1_fq():
    # create input files
    infile1 = utils.get_test_data('paired-slash1.fq.1')
    infile2 = utils.get_test_data('paired-slash1.fq.2')

    # correct output
    ex_outfile = utils.get_test_data('paired-slash1.fq')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    utils.runscript(script, args)

    n = 0
    for r, q in zip(screed.open(ex_outfile), screed.open(outfile)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 30
0
def test_interleave_reads_2_fa():
    # test input files
    infile1 = utils.get_test_data('paired.fa.1')
    infile2 = utils.get_test_data('paired.fa.2')

    # correct output
    ex_outfile = utils.get_test_data('paired.fa')

    # actual output file
    outfile = utils.get_temp_filename('out.fa')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    utils.runscript(script, args)

    n = 0
    for r, q in zip(screed.open(ex_outfile), screed.open(outfile)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
    assert n > 0
Exemplo n.º 31
0
def test_interleave_reads_broken_fq():
    # test input files
    infile1 = utils.get_test_data('paired-broken.fq.1')
    infile2 = utils.get_test_data('paired-broken.fq.2')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert 'ERROR: Input files contain different number of records.' in err
Exemplo n.º 32
0
def test_interleave_reads_broken_fq_5():
    # test input files
    infile1 = utils.get_test_data('paired-broken4.fq.1')
    infile2 = utils.get_test_data('paired-broken4.fq.2')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "ERROR: This doesn't look like paired data!" in err
Exemplo n.º 33
0
def test_extract_paired_reads_unpaired():
    # test input file
    infile = utils.get_test_data('random-20-a.fa')

    # actual output files...
    outfile1 = utils.get_temp_filename('unpaired.pe.fa')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('unpaired.se.fa', in_dir)

    script = 'extract-paired-reads.py'
    args = [infile]

    (_, _, err) = utils.runscript(script, args, in_dir, fail_ok=True)
    assert 'no paired reads!? check file formats...' in err, err
Exemplo n.º 34
0
def test_extract_paired_reads_unpaired():
    # test input file
    infile = utils.get_test_data('random-20-a.fa')

    # actual output files...
    outfile1 = utils.get_temp_filename('unpaired.pe.fa')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('unpaired.se.fa', in_dir)

    script = 'extract-paired-reads.py'
    args = [infile]

    (_, _, err) = utils.runscript(script, args, in_dir, fail_ok=True)
    assert 'no paired reads!? check file formats...' in err, err
Exemplo n.º 35
0
def test_interleave_reads_broken_fq():
    # test input files
    infile1 = utils.get_test_data('paired-broken.fq.1')
    infile2 = utils.get_test_data('paired-broken.fq.2')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert 'ERROR: Input files contain different number of records.' in err
Exemplo n.º 36
0
def test_interleave_reads_broken_fq_5():
    # test input files
    infile1 = utils.get_test_data('paired-broken4.fq.1')
    infile2 = utils.get_test_data('paired-broken4.fq.2')

    # actual output file
    outfile = utils.get_temp_filename('out.fq')

    script = 'interleave-reads.py'
    args = [infile1, infile2, '-o', outfile]

    status, out, err = utils.runscript(script, args, fail_ok=True)
    assert status == 1
    assert "ERROR: This doesn't look like paired data!" in err
Exemplo n.º 37
0
def test_extract_paired_reads_2_fq():
    # test input file
    infile = utils.get_test_data('paired-mixed.fq')

    ex_outfile1 = utils.get_test_data('paired-mixed.fq.pe')
    ex_outfile2 = utils.get_test_data('paired-mixed.fq.se')

    # actual output files...
    outfile1 = utils.get_temp_filename('paired-mixed.fq.pe')
    in_dir = os.path.dirname(outfile1)
    outfile2 = utils.get_temp_filename('paired-mixed.fq.se', in_dir)

    script = 'extract-paired-reads.py'
    args = [infile]

    utils.runscript(script, args, in_dir)

    assert os.path.exists(outfile1), outfile1
    assert os.path.exists(outfile2), outfile2

    n = 0
    for r, q in zip(screed.open(ex_outfile1),
                    screed.open(outfile1)):
        n += 1
        assert r.name == q.name, (r.name, q.name, n)
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0

    n = 0
    for r, q in zip(screed.open(ex_outfile2),
                    screed.open(outfile2)):
        n += 1
        assert r.name == q.name
        assert r.sequence == q.sequence
        assert r.quality == q.quality
    assert n > 0
Exemplo n.º 38
0
def test_split_paired_reads_2_mixed_fq_gzfile():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)
    outfile = utils.get_temp_filename('out.fq')

    script = 'split-paired-reads.py'
    args = ['-0', outfile, '--gzip', infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err

    n_orphans = len([1 for record in screed.open(outfile)])
    assert n_orphans == 5
    n_left = len([1 for record in screed.open(infile + '.1')])
    assert n_left == 3
    n_right = len([1 for record in screed.open(infile + '.2')])
    assert n_right == 3

    for filename in [outfile, infile + '.1', infile + '.2']:
        fp = gzip.open(filename)
        fp.read()  # this will fail if not gzip file.
        fp.close()
Exemplo n.º 39
0
def test_split_paired_reads_2_mixed_fq_gzfile():
    # test input file
    infile = utils.copy_test_data('paired-mixed-2.fq')
    in_dir = os.path.dirname(infile)
    outfile = utils.get_temp_filename('out.fq')

    script = 'split-paired-reads.py'
    args = ['-0', outfile, '--gzip', infile]

    status, out, err = utils.runscript(script, args, in_dir)
    assert status == 0
    assert "split 6 sequences (3 left, 3 right, 5 orphans)" in err, err

    n_orphans = len([1 for record in screed.open(outfile)])
    assert n_orphans == 5
    n_left = len([1 for record in screed.open(infile + '.1')])
    assert n_left == 3
    n_right = len([1 for record in screed.open(infile + '.2')])
    assert n_right == 3

    for filename in [outfile, infile + '.1', infile + '.2']:
        fp = gzip.open(filename)
        fp.read()                       # this will fail if not gzip file.
        fp.close()