示例#1
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
示例#2
0
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores):
    params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split()
    params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))]
    params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))]
    params += ["--cores", str(cores)]
    params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")]
    # third item in tuple says whether the file must exist
    combinations = [(a, b, True) for a, b in product("AC", "GT")]
    optional = [("unknown", "unknown")]
    optional += [(a, "unknown") for a in "AC"]
    optional += [("unknown", b) for b in "GT"]
    if discarduntrimmed:
        combinations.extend((a, b, False) for a, b in optional)
        params += ["--discard-untrimmed"]
    else:
        combinations.extend((a, b, True) for a, b in optional)
    main(params)
    for (name1, name2, should_exist) in combinations:
        for i in (1, 2):
            name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i)
            path = cutpath(os.path.join("combinatorial", name))
            if should_exist:
                assert tmpdir.join(name).check(), ("Output file missing", name)
                if os.path.exists(path):
                    assert_files_equal(path, str(tmpdir.join(name)))
            else:
                assert not tmpdir.join(name).check(), ("Output file should not exist", name)
def test_too_short_no_primer():
    """--too-short-output and --trim-primer"""
    run(
        "-c -m 5 -a 330201030313112312 --trim-primer --too-short-output tooshort.tmp.fa",
        "minlen.noprimer.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.noprimer.fa'), "tooshort.tmp.fa")
    os.remove('tooshort.tmp.fa')
示例#4
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join('separate_minlength.fasta'))
    expected = str(tmpdir.join('separate_minlength_expected.fasta'))
    outpath = str(tmpdir.join('out.fasta'))
    record = '>r{}:{}\n{}\n'.format(l1, l2, 'A' * l1)
    record += '>r{}:{}\n{}'.format(l1, l2, 'A' * l2)
    with open(inpath, 'w') as f:
        print(record, file=f)
    with open(expected, 'w') as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ''
    if m2 is None:
        m2 = ''

    main([
        '--interleaved', '-o', outpath, '-' + name, '{}:{}'.format(m1, m2),
        inpath
    ])
    assert_files_equal(expected, outpath)
示例#5
0
 def _run(params,
          inpath1,
          inpath2=None,
          expected1=None,
          expected2=None,
          cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
示例#6
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run([
        "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
示例#7
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join("separate_minlength.fasta"))
    expected = str(tmpdir.join("separate_minlength_expected.fasta"))
    outpath = str(tmpdir.join("out.fasta"))
    record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1)
    record += ">r{}:{}\n{}".format(l1, l2, "A" * l2)
    with open(inpath, "w") as f:
        print(record, file=f)
    with open(expected, "w") as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ""
    if m2 is None:
        m2 = ""

    main([
        "--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2),
        inpath
    ])
    assert_files_equal(expected, outpath)
示例#8
0
def test_info_file_times():
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '--times', '2', '-a',
            'adapt=GCCGAACTTCTTA', '-a', 'adapt2=GACTGCCTTAAGGACGT'
        ], "illumina5.fastq", "illumina5.fastq")
        assert_files_equal(cutpath('illumina5.info.txt'), infotmp)
示例#9
0
def test_untrimmed_output(run, cores, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
示例#10
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
示例#11
0
def run_interleaved(params,
                    inpath1,
                    inpath2=None,
                    expected1=None,
                    expected2=None,
                    cores=1):
    """
	Interleaved input or output (or both)
	"""
    assert not (inpath1 and inpath2 and expected1 and expected2)
    assert not (expected2 and not expected1)
    assert not (inpath2 and not inpath1)
    if type(params) is str:
        params = params.split()
    params += ['--interleaved', '--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as tmp1:
        params += ['-o', tmp1]
        paths = [datapath(inpath1)]
        if inpath2:
            paths += [datapath(inpath2)]
        if expected2:
            with temporary_path('tmp2-' + expected2) as tmp2:
                params += ['-p', tmp2]
                assert main(params + paths) is None
                assert_files_equal(cutpath(expected2), tmp2)
        else:
            assert main(params + paths) is None
        assert_files_equal(cutpath(expected1), tmp1)
示例#12
0
def test_too_short(run, tmp_path, cores):
    too_short_path = tmp_path / 'tooshort.fa'
    stats = run([
        "--cores",
        str(cores), "-m", "5", "-a", "TTAGACATATCTCCGTCG",
        "--too-short-output", too_short_path
    ], "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)
    assert stats.too_short == 5
示例#13
0
def test_linked_info_file(tmpdir):
    info_path = str(tmpdir.join('info.txt'))
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path,
        '-o',
        str(tmpdir.join('out.fasta')),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'), info_path)
示例#14
0
def test_too_short(run, tmpdir, cores):
    """--too-short-output"""
    too_short_path = str(tmpdir.join('tooshort.fa'))
    run([
        "--cores",
        str(cores), "-m", "5", "-a", "TTAGACATATCTCCGTCG",
        "--too-short-output", too_short_path
    ], "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)
示例#15
0
def test_too_long(run, tmpdir, cores):
    """--too-long-output"""
    too_long_path = str(tmpdir.join('toolong.fa'))
    run([
        "--cores",
        str(cores), "-M", "5", "-a", "TTAGACATATCTCCGTCG", "--too-long-output",
        too_long_path
    ], "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
示例#16
0
 def _run(params, expected, inpath):
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     assert main(params) is None
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
示例#17
0
def test_too_long(run, tmp_path, cores):
    """--too-long-output"""
    too_long_path = tmp_path / 'toolong.fa'
    stats = run([
        "--cores",
        str(cores), "-M", "5", "-a", "TTAGACATATCTCCGTCG", "--too-long-output",
        too_long_path
    ], "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
    assert stats.too_long == 5
示例#18
0
def test_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (not using --output/-o option)"""
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
示例#19
0
def test_info_file():
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different)
    #
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '-a',
            'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'
        ], "illumina.fastq", "illumina.fastq.gz")
        assert_files_equal(cutpath('illumina.info.txt'), infotmp)
示例#20
0
def test_interleaved_untrimmed_output(tmpdir):
    o1 = str(tmpdir.join("out.1.fastq"))
    o2 = str(tmpdir.join("out.2.fastq"))
    untrimmed = str(tmpdir.join("untrimmed.interleaved.fastq"))
    main([
        "--interleaved", "-a", "XXXX", "-o", o1, "-p", o2,
        "--untrimmed-output", untrimmed,
        datapath("interleaved.fastq")
    ])
    assert_files_equal(datapath("interleaved.fastq"), untrimmed)
示例#21
0
def test_issue_296(tmpdir):
    # Hang when using both --no-trim and --info-file together
    info_path = str(tmpdir.join('info.txt'))
    reads_path = str(tmpdir.join('reads.fasta'))
    out_path = str(tmpdir.join('out.fasta'))
    with open(reads_path, 'w') as f:
        f.write('>read\nCACAAA\n')
    main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path])
    # Output should be unchanged because of --no-trim
    assert_files_equal(reads_path, out_path)
示例#22
0
def test_info_file_times(run, tmp_path, cores):
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "--times", "2", "-a",
        "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'),
                       info_path,
                       ignore_trailing_space=True)
示例#23
0
def test_untrimmed_output(run, cores, tmp_path):
    path = tmp_path / "untrimmed.fastq"
    stats = run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
    assert stats.with_adapters[0] == 2
    assert stats.written == 2
    assert stats.written_bp[0] == 46
示例#24
0
def test_issue_296(tmpdir):
    # Hang when using both --no-trim and --info-file together
    info_path = str(tmpdir.join('info.txt'))
    reads_path = str(tmpdir.join('reads.fasta'))
    out_path = str(tmpdir.join('out.fasta'))
    with open(reads_path, 'w') as f:
        f.write('>read\nCACAAA\n')
    main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path])
    # Output should be unchanged because of --no-trim
    assert_files_equal(reads_path, out_path)
示例#25
0
 def _run(params, expected, inpath) -> Statistics:
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     stats = main(params)
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
     return stats
示例#26
0
def test_info_file_revcomp(run, tmp_path):
    info_path = tmp_path / "info-rc.txt"
    main([
        "--info-file",
        str(info_path), "-a", "adapt=GAGTCG", "--revcomp", "--rename={header}",
        "-o",
        str(tmp_path / "out.fasta"),
        datapath("info-rc.fasta")
    ])
    assert_files_equal(cutpath("info-rc.txt"), info_path)
示例#27
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
示例#28
0
def test_linked_info_file(tmp_path):
    info_path = tmp_path / 'info.txt'
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file',
        str(info_path), '-o',
        str(tmp_path / 'out.fasta'),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'),
                       info_path,
                       ignore_trailing_space=True)
示例#29
0
def run_paired(params, in1, in2, expected1, expected2, cores):
    if type(params) is str:
        params = params.split()
    params += ['--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as p1:
        with temporary_path('tmp2-' + expected2) as p2:
            params += ['-o', p1, '-p', p2]
            params += [datapath(in1), datapath(in2)]
            assert main(params) is None
            assert_files_equal(cutpath(expected1), p1)
            assert_files_equal(cutpath(expected2), p2)
示例#30
0
def test_force_fasta_output(tmpdir, cores):
    """Write FASTA to standard output even on FASTQ input"""

    out_path = str(tmpdir.join("out.fasta"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--fasta", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fasta"), out_path)
示例#31
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
示例#32
0
def test_info_file(run, tmp_path, cores):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "-a",
        "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"
    ], "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"),
                       info_path,
                       ignore_trailing_space=True)
示例#33
0
def test_explicit_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (using "-o -")"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
示例#34
0
def test_too_long_output():
    with temporary_path('temp-too-long.1.fastq') as p1:
        with temporary_path('temp-too-long.2.fastq') as p2:
            run_paired('-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output '
                       '{0} --too-long-paired-output {1}'.format(p1, p2),
                       in1='paired.1.fastq',
                       in2='paired.2.fastq',
                       expected1='paired-too-short.1.fastq',
                       expected2='paired-too-short.2.fastq',
                       cores=1)
            assert_files_equal(cutpath('paired.1.fastq'), p1)
            assert_files_equal(cutpath('paired.2.fastq'), p2)
示例#35
0
def test_too_long_output(run_paired, tmpdir):
    p1 = str(tmpdir.join("too-long.1.fastq"))
    p2 = str(tmpdir.join("too-long.2.fastq"))
    run_paired(
        "-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output "
        "{0} --too-long-paired-output {1}".format(p1, p2),
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-too-short.1.fastq", expected2="paired-too-short.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired.1.fastq"), p1)
    assert_files_equal(cutpath("paired.2.fastq"), p2)
示例#36
0
def test_untrimmed_paired_output(tmpdir, run_paired):
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT", "--pair-filter=first",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
示例#37
0
def test_standard_input_pipe(tmpdir, cores):
    """Read FASTQ from standard input"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    in_path = datapath("small.fastq")
    # Use 'cat' to simulate that no file name is available for stdin
    cat = subprocess.Popen(["cat", in_path], stdout=subprocess.PIPE)
    py = subprocess.Popen([
        sys.executable, "-m", "cutadapt", "--cores", str(cores),
        "-a", "TTAGACATATCTCCGTCG", "-o", out_path, "-"],
        stdin=cat.stdout)
    _ = py.communicate()
    cat.stdout.close()
    _ = py.communicate()[0]
    assert_files_equal(cutpath("small.fastq"), out_path)
示例#38
0
def test_pair_adapters_demultiplexing(tmpdir):
    params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split()
    params += ["--pair-adapters"]
    params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))]
    params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))]
    params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")]
    assert main(params) is None
    for name in [
        "dual-i1.1.fastq",
        "dual-i1.2.fastq",
        "dual-i2.1.fastq",
        "dual-i2.2.fastq",
        "dual-unknown.1.fastq",
        "dual-unknown.2.fastq",
    ]:
        assert tmpdir.join(name).check()
        assert_files_equal(cutpath(name), str(tmpdir.join(name)))
示例#39
0
def test_untrimmed_paired_output_automatic_pair_filter(tmpdir, run_paired):
    # When no R2 adapters are given, --pair-filter should be ignored for
    # --discard-untrimmed, --untrimmed-output, --untrimmed-paired-output
    # and always be "both" (with --pair-filter=any, all pairs would be
    # considered untrimmed because the R1 read is always untrimmed)
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
示例#40
0
 def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
示例#41
0
def test_demultiplex():
    tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.')
    multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta')
    params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
    assert main(params) is None
    assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
    assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
    assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
    shutil.rmtree(tempdir)
示例#42
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join("separate_minlength.fasta"))
    expected = str(tmpdir.join("separate_minlength_expected.fasta"))
    outpath = str(tmpdir.join("out.fasta"))
    record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1)
    record += ">r{}:{}\n{}".format(l1, l2, "A" * l2)
    with open(inpath, "w") as f:
        print(record, file=f)
    with open(expected, "w") as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ""
    if m2 is None:
        m2 = ""

    main(["--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2), inpath])
    assert_files_equal(expected, outpath)
示例#43
0
def test_too_long(run, tmpdir):
    """--too-long-output"""
    too_long_path = str(tmpdir.join('toolong.fa'))
    run("-M 5 -a TTAGACATATCTCCGTCG --too-long-output " + too_long_path, "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
示例#44
0
def test_paired_demultiplex(tmpdir):
    multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq"))
    multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq"))
    params = [
        "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG",
        "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA",
        "-o", multiout1, "-p", multiout2,
        datapath("paired.1.fastq"), datapath("paired.2.fastq")]
    assert main(params) is None
    assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown"))
    assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
示例#45
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
示例#46
0
def test_untrimmed_output(run, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run(["-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
示例#47
0
def test_rest(run, tmpdir):
    """-r/--rest-file"""
    rest = str(tmpdir.join("rest.tmp"))
    run(['-b', 'ADAPTER', '-N', '-r', rest], "rest.fa", "rest.fa")
    assert_files_equal(datapath('rest.txt'), rest)
示例#48
0
def test_restfront(run, tmpdir):
    path = str(tmpdir.join("rest.txt"))
    run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa")
    assert_files_equal(datapath('restfront.txt'), path)
示例#49
0
def test_too_short(run, tmpdir):
    """--too-short-output"""
    too_short_path = str(tmpdir.join('tooshort.fa'))
    run("-m 5 -a TTAGACATATCTCCGTCG --too-short-output " + too_short_path, "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)