Пример #1
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
Пример #2
0
def test_combinatorial_demultiplexing(tmpdir, discarduntrimmed, cores):
    params = "-g A=^AAAAAAAAAA -g C=^CCCCCCCCCC -G G=^GGGGGGGGGG -G T=^TTTTTTTTTT".split()
    params += ["-o", str(tmpdir.join("combinatorial.{name1}_{name2}.1.fastq"))]
    params += ["-p", str(tmpdir.join("combinatorial.{name1}_{name2}.2.fastq"))]
    params += ["--cores", str(cores)]
    params += [datapath("combinatorial.1.fastq"), datapath("combinatorial.2.fastq")]
    # third item in tuple says whether the file must exist
    combinations = [(a, b, True) for a, b in product("AC", "GT")]
    optional = [("unknown", "unknown")]
    optional += [(a, "unknown") for a in "AC"]
    optional += [("unknown", b) for b in "GT"]
    if discarduntrimmed:
        combinations.extend((a, b, False) for a, b in optional)
        params += ["--discard-untrimmed"]
    else:
        combinations.extend((a, b, True) for a, b in optional)
    main(params)
    for (name1, name2, should_exist) in combinations:
        for i in (1, 2):
            name = "combinatorial.{name1}_{name2}.{i}.fastq".format(name1=name1, name2=name2, i=i)
            path = cutpath(os.path.join("combinatorial", name))
            if should_exist:
                assert tmpdir.join(name).check(), ("Output file missing", name)
                if os.path.exists(path):
                    assert_files_equal(path, str(tmpdir.join(name)))
            else:
                assert not tmpdir.join(name).check(), ("Output file should not exist", name)
Пример #3
0
def test_too_short_no_primer():
    """--too-short-output and --trim-primer"""
    run(
        "-c -m 5 -a 330201030313112312 --trim-primer --too-short-output tooshort.tmp.fa",
        "minlen.noprimer.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.noprimer.fa'), "tooshort.tmp.fa")
    os.remove('tooshort.tmp.fa')
Пример #4
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join('separate_minlength.fasta'))
    expected = str(tmpdir.join('separate_minlength_expected.fasta'))
    outpath = str(tmpdir.join('out.fasta'))
    record = '>r{}:{}\n{}\n'.format(l1, l2, 'A' * l1)
    record += '>r{}:{}\n{}'.format(l1, l2, 'A' * l2)
    with open(inpath, 'w') as f:
        print(record, file=f)
    with open(expected, 'w') as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ''
    if m2 is None:
        m2 = ''

    main([
        '--interleaved', '-o', outpath, '-' + name, '{}:{}'.format(m1, m2),
        inpath
    ])
    assert_files_equal(expected, outpath)
Пример #5
0
 def _run(params,
          inpath1,
          inpath2=None,
          expected1=None,
          expected2=None,
          cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
Пример #6
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run([
        "--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
Пример #7
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join("separate_minlength.fasta"))
    expected = str(tmpdir.join("separate_minlength_expected.fasta"))
    outpath = str(tmpdir.join("out.fasta"))
    record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1)
    record += ">r{}:{}\n{}".format(l1, l2, "A" * l2)
    with open(inpath, "w") as f:
        print(record, file=f)
    with open(expected, "w") as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ""
    if m2 is None:
        m2 = ""

    main([
        "--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2),
        inpath
    ])
    assert_files_equal(expected, outpath)
Пример #8
0
def test_info_file_times():
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '--times', '2', '-a',
            'adapt=GCCGAACTTCTTA', '-a', 'adapt2=GACTGCCTTAAGGACGT'
        ], "illumina5.fastq", "illumina5.fastq")
        assert_files_equal(cutpath('illumina5.info.txt'), infotmp)
Пример #9
0
def test_untrimmed_output(run, cores, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
Пример #10
0
def test_info_file(run, tmpdir):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "-a", "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"],
        "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"), info_path)
Пример #11
0
def run_interleaved(params,
                    inpath1,
                    inpath2=None,
                    expected1=None,
                    expected2=None,
                    cores=1):
    """
	Interleaved input or output (or both)
	"""
    assert not (inpath1 and inpath2 and expected1 and expected2)
    assert not (expected2 and not expected1)
    assert not (inpath2 and not inpath1)
    if type(params) is str:
        params = params.split()
    params += ['--interleaved', '--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as tmp1:
        params += ['-o', tmp1]
        paths = [datapath(inpath1)]
        if inpath2:
            paths += [datapath(inpath2)]
        if expected2:
            with temporary_path('tmp2-' + expected2) as tmp2:
                params += ['-p', tmp2]
                assert main(params + paths) is None
                assert_files_equal(cutpath(expected2), tmp2)
        else:
            assert main(params + paths) is None
        assert_files_equal(cutpath(expected1), tmp1)
Пример #12
0
def test_too_short(run, tmp_path, cores):
    too_short_path = tmp_path / 'tooshort.fa'
    stats = run([
        "--cores",
        str(cores), "-m", "5", "-a", "TTAGACATATCTCCGTCG",
        "--too-short-output", too_short_path
    ], "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)
    assert stats.too_short == 5
Пример #13
0
def test_linked_info_file(tmpdir):
    info_path = str(tmpdir.join('info.txt'))
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file', info_path,
        '-o',
        str(tmpdir.join('out.fasta')),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'), info_path)
Пример #14
0
def test_too_short(run, tmpdir, cores):
    """--too-short-output"""
    too_short_path = str(tmpdir.join('tooshort.fa'))
    run([
        "--cores",
        str(cores), "-m", "5", "-a", "TTAGACATATCTCCGTCG",
        "--too-short-output", too_short_path
    ], "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)
Пример #15
0
def test_too_long(run, tmpdir, cores):
    """--too-long-output"""
    too_long_path = str(tmpdir.join('toolong.fa'))
    run([
        "--cores",
        str(cores), "-M", "5", "-a", "TTAGACATATCTCCGTCG", "--too-long-output",
        too_long_path
    ], "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
Пример #16
0
 def _run(params, expected, inpath):
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     assert main(params) is None
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
Пример #17
0
def test_too_long(run, tmp_path, cores):
    """--too-long-output"""
    too_long_path = tmp_path / 'toolong.fa'
    stats = run([
        "--cores",
        str(cores), "-M", "5", "-a", "TTAGACATATCTCCGTCG", "--too-long-output",
        too_long_path
    ], "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
    assert stats.too_long == 5
Пример #18
0
def test_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (not using --output/-o option)"""
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
Пример #19
0
def test_info_file():
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different)
    #
    with temporary_path("infotmp.txt") as infotmp:
        run([
            "--info-file", infotmp, '-a',
            'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'
        ], "illumina.fastq", "illumina.fastq.gz")
        assert_files_equal(cutpath('illumina.info.txt'), infotmp)
Пример #20
0
def test_interleaved_untrimmed_output(tmpdir):
    o1 = str(tmpdir.join("out.1.fastq"))
    o2 = str(tmpdir.join("out.2.fastq"))
    untrimmed = str(tmpdir.join("untrimmed.interleaved.fastq"))
    main([
        "--interleaved", "-a", "XXXX", "-o", o1, "-p", o2,
        "--untrimmed-output", untrimmed,
        datapath("interleaved.fastq")
    ])
    assert_files_equal(datapath("interleaved.fastq"), untrimmed)
Пример #21
0
def test_issue_296(tmpdir):
    # Hang when using both --no-trim and --info-file together
    info_path = str(tmpdir.join('info.txt'))
    reads_path = str(tmpdir.join('reads.fasta'))
    out_path = str(tmpdir.join('out.fasta'))
    with open(reads_path, 'w') as f:
        f.write('>read\nCACAAA\n')
    main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path])
    # Output should be unchanged because of --no-trim
    assert_files_equal(reads_path, out_path)
Пример #22
0
def test_info_file_times(run, tmp_path, cores):
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "--times", "2", "-a",
        "adapt=GCCGAACTTCTTA", "-a", "adapt2=GACTGCCTTAAGGACGT"
    ], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'),
                       info_path,
                       ignore_trailing_space=True)
Пример #23
0
def test_untrimmed_output(run, cores, tmp_path):
    path = tmp_path / "untrimmed.fastq"
    stats = run([
        "--cores",
        str(cores), "-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path
    ], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
    assert stats.with_adapters[0] == 2
    assert stats.written == 2
    assert stats.written_bp[0] == 46
Пример #24
0
def test_issue_296(tmpdir):
    # Hang when using both --no-trim and --info-file together
    info_path = str(tmpdir.join('info.txt'))
    reads_path = str(tmpdir.join('reads.fasta'))
    out_path = str(tmpdir.join('out.fasta'))
    with open(reads_path, 'w') as f:
        f.write('>read\nCACAAA\n')
    main(['--info-file', info_path, '--no-trim', '-g', 'TTTCAC', '-o', out_path, reads_path])
    # Output should be unchanged because of --no-trim
    assert_files_equal(reads_path, out_path)
Пример #25
0
 def _run(params, expected, inpath) -> Statistics:
     if type(params) is str:
         params = params.split()
     tmp_fastaq = str(tmpdir.join(expected))
     params += ['-o', tmp_fastaq]
     params += [datapath(inpath)]
     stats = main(params)
     # TODO redirect standard output
     assert_files_equal(cutpath(expected), tmp_fastaq)
     return stats
Пример #26
0
def test_info_file_revcomp(run, tmp_path):
    info_path = tmp_path / "info-rc.txt"
    main([
        "--info-file",
        str(info_path), "-a", "adapt=GAGTCG", "--revcomp", "--rename={header}",
        "-o",
        str(tmp_path / "out.fasta"),
        datapath("info-rc.fasta")
    ])
    assert_files_equal(cutpath("info-rc.txt"), info_path)
Пример #27
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
Пример #28
0
def test_linked_info_file(tmp_path):
    info_path = tmp_path / 'info.txt'
    main([
        '-a linkedadapter=^AAAAAAAAAA...TTTTTTTTTT', '--info-file',
        str(info_path), '-o',
        str(tmp_path / 'out.fasta'),
        datapath('linked.fasta')
    ])
    assert_files_equal(cutpath('linked-info.txt'),
                       info_path,
                       ignore_trailing_space=True)
Пример #29
0
def run_paired(params, in1, in2, expected1, expected2, cores):
    if type(params) is str:
        params = params.split()
    params += ['--cores', str(cores), '--buffer-size=512']
    with temporary_path('tmp1-' + expected1) as p1:
        with temporary_path('tmp2-' + expected2) as p2:
            params += ['-o', p1, '-p', p2]
            params += [datapath(in1), datapath(in2)]
            assert main(params) is None
            assert_files_equal(cutpath(expected1), p1)
            assert_files_equal(cutpath(expected2), p2)
Пример #30
0
def test_force_fasta_output(tmpdir, cores):
    """Write FASTA to standard output even on FASTQ input"""

    out_path = str(tmpdir.join("out.fasta"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "--fasta", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fasta"), out_path)
Пример #31
0
 def _run(params, in1, in2, expected1, expected2, cores):
     if type(params) is str:
         params = params.split()
     params += ["--cores", str(cores), "--buffer-size=512"]
     path1 = str(tmpdir.join(expected1))
     path2 = str(tmpdir.join(expected2))
     params += ["-o", path1, "-p", path2]
     params += [datapath(in1), datapath(in2)]
     assert main(params) is None
     assert_files_equal(cutpath(expected1), path1)
     assert_files_equal(cutpath(expected2), path2)
Пример #32
0
def test_info_file(run, tmp_path, cores):
    # The true adapter sequence in the illumina.fastq.gz data set is
    # GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different from the sequence shown here)
    info_path = tmp_path / "info.txt"
    run([
        "--cores",
        str(cores), "--info-file", info_path, "-a",
        "adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT"
    ], "illumina.fastq", "illumina.fastq.gz")
    assert_files_equal(cutpath("illumina.info.txt"),
                       info_path,
                       ignore_trailing_space=True)
Пример #33
0
def test_explicit_standard_output(tmpdir, cores):
    """Write FASTQ to standard output (using "-o -")"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    with open(out_path, "w") as out_file:
        py = subprocess.Popen([
            sys.executable, "-m", "cutadapt", "-o", "-", "--cores", str(cores),
            "-a", "TTAGACATATCTCCGTCG", datapath("small.fastq")],
            stdout=out_file)
        _ = py.communicate()
    assert_files_equal(cutpath("small.fastq"), out_path)
Пример #34
0
def test_too_long_output():
    with temporary_path('temp-too-long.1.fastq') as p1:
        with temporary_path('temp-too-long.2.fastq') as p2:
            run_paired('-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output '
                       '{0} --too-long-paired-output {1}'.format(p1, p2),
                       in1='paired.1.fastq',
                       in2='paired.2.fastq',
                       expected1='paired-too-short.1.fastq',
                       expected2='paired-too-short.2.fastq',
                       cores=1)
            assert_files_equal(cutpath('paired.1.fastq'), p1)
            assert_files_equal(cutpath('paired.2.fastq'), p2)
Пример #35
0
def test_too_long_output(run_paired, tmpdir):
    p1 = str(tmpdir.join("too-long.1.fastq"))
    p2 = str(tmpdir.join("too-long.2.fastq"))
    run_paired(
        "-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output "
        "{0} --too-long-paired-output {1}".format(p1, p2),
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-too-short.1.fastq", expected2="paired-too-short.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired.1.fastq"), p1)
    assert_files_equal(cutpath("paired.2.fastq"), p2)
Пример #36
0
def test_untrimmed_paired_output(tmpdir, run_paired):
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT", "--pair-filter=first",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
Пример #37
0
def test_standard_input_pipe(tmpdir, cores):
    """Read FASTQ from standard input"""

    import subprocess
    out_path = str(tmpdir.join("out.fastq"))
    in_path = datapath("small.fastq")
    # Use 'cat' to simulate that no file name is available for stdin
    cat = subprocess.Popen(["cat", in_path], stdout=subprocess.PIPE)
    py = subprocess.Popen([
        sys.executable, "-m", "cutadapt", "--cores", str(cores),
        "-a", "TTAGACATATCTCCGTCG", "-o", out_path, "-"],
        stdin=cat.stdout)
    _ = py.communicate()
    cat.stdout.close()
    _ = py.communicate()[0]
    assert_files_equal(cutpath("small.fastq"), out_path)
Пример #38
0
def test_pair_adapters_demultiplexing(tmpdir):
    params = "-g i1=AAAA -G i1=GGGG -g i2=CCCC -G i2=TTTT".split()
    params += ["--pair-adapters"]
    params += ["-o", str(tmpdir.join("dual-{name}.1.fastq"))]
    params += ["-p", str(tmpdir.join("dual-{name}.2.fastq"))]
    params += [datapath("dual-index.1.fastq"), datapath("dual-index.2.fastq")]
    assert main(params) is None
    for name in [
        "dual-i1.1.fastq",
        "dual-i1.2.fastq",
        "dual-i2.1.fastq",
        "dual-i2.2.fastq",
        "dual-unknown.1.fastq",
        "dual-unknown.2.fastq",
    ]:
        assert tmpdir.join(name).check()
        assert_files_equal(cutpath(name), str(tmpdir.join(name)))
Пример #39
0
def test_untrimmed_paired_output_automatic_pair_filter(tmpdir, run_paired):
    # When no R2 adapters are given, --pair-filter should be ignored for
    # --discard-untrimmed, --untrimmed-output, --untrimmed-paired-output
    # and always be "both" (with --pair-filter=any, all pairs would be
    # considered untrimmed because the R1 read is always untrimmed)
    untrimmed1 = str(tmpdir.join("untrimmed.1.fastq"))
    untrimmed2 = str(tmpdir.join("untrimmed.2.fastq"))
    run_paired(
        ["-a", "TTAGACATAT",
            "--untrimmed-output", untrimmed1,
            "--untrimmed-paired-output", untrimmed2],
        in1="paired.1.fastq", in2="paired.2.fastq",
        expected1="paired-trimmed.1.fastq", expected2="paired-trimmed.2.fastq",
        cores=1
    )
    assert_files_equal(cutpath("paired-untrimmed.1.fastq"), untrimmed1)
    assert_files_equal(cutpath("paired-untrimmed.2.fastq"), untrimmed2)
Пример #40
0
 def _run(params, inpath1, inpath2=None, expected1=None, expected2=None, cores=1):
     assert not (inpath1 and inpath2 and expected1 and expected2)
     assert not (expected2 and not expected1)
     assert not (inpath2 and not inpath1)
     if type(params) is str:
         params = params.split()
     params += ["--interleaved", "--cores", str(cores), "--buffer-size=512"]
     tmp1 = str(tmpdir.join("out1-" + expected1))
     params += ["-o", tmp1]
     paths = [datapath(inpath1)]
     if inpath2:
         paths += [datapath(inpath2)]
     if expected2:
         tmp2 = str(tmpdir.join("out2-" + expected2))
         params += ["-p", tmp2]
         assert main(params + paths) is None
         assert_files_equal(cutpath(expected2), tmp2)
     else:
         assert main(params + paths) is None
     assert_files_equal(cutpath(expected1), tmp1)
Пример #41
0
def test_demultiplex():
    tempdir = tempfile.mkdtemp(prefix='cutadapt-tests.')
    multiout = os.path.join(tempdir, 'tmp-demulti.{name}.fasta')
    params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
    assert main(params) is None
    assert_files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
    assert_files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
    assert_files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
    shutil.rmtree(tempdir)
Пример #42
0
def test_separate_minmaxlength(tmpdir, name_op, l1, l2, m):
    """Separate minimum lengths for R1 and R2"""
    m1, m2 = m
    name, func = name_op
    inpath = str(tmpdir.join("separate_minlength.fasta"))
    expected = str(tmpdir.join("separate_minlength_expected.fasta"))
    outpath = str(tmpdir.join("out.fasta"))
    record = ">r{}:{}\n{}\n".format(l1, l2, "A" * l1)
    record += ">r{}:{}\n{}".format(l1, l2, "A" * l2)
    with open(inpath, "w") as f:
        print(record, file=f)
    with open(expected, "w") as f:
        if (m1 is None or func(l1, m1)) and (m2 is None or func(l2, m2)):
            print(record, file=f)

    assert os.path.exists(inpath)
    assert os.path.exists(expected)
    if m1 is None:
        m1 = ""
    if m2 is None:
        m2 = ""

    main(["--interleaved", "-o", outpath, "-" + name, "{}:{}".format(m1, m2), inpath])
    assert_files_equal(expected, outpath)
Пример #43
0
def test_too_long(run, tmpdir):
    """--too-long-output"""
    too_long_path = str(tmpdir.join('toolong.fa'))
    run("-M 5 -a TTAGACATATCTCCGTCG --too-long-output " + too_long_path, "maxlen.fa", "lengths.fa")
    assert_files_equal(datapath('toolong.fa'), too_long_path)
Пример #44
0
def test_paired_demultiplex(tmpdir):
    multiout1 = str(tmpdir.join("demultiplexed.{name}.1.fastq"))
    multiout2 = str(tmpdir.join("demultiplexed.{name}.2.fastq"))
    params = [
        "-a", "first=AACATTAGACA", "-a", "second=CATTAGACATATCGG",
        "-A", "ignored=CAGTGGAGTA", "-A", "alsoignored=AATAACAGTGGAGTA",
        "-o", multiout1, "-p", multiout2,
        datapath("paired.1.fastq"), datapath("paired.2.fastq")]
    assert main(params) is None
    assert_files_equal(cutpath("demultiplexed.first.1.fastq"), multiout1.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.1.fastq"), multiout1.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.1.fastq"), multiout1.format(name="unknown"))
    assert_files_equal(cutpath("demultiplexed.first.2.fastq"), multiout2.format(name="first"))
    assert_files_equal(cutpath("demultiplexed.second.2.fastq"), multiout2.format(name="second"))
    assert_files_equal(cutpath("demultiplexed.unknown.2.fastq"), multiout2.format(name="unknown"))
Пример #45
0
def test_info_file_times(run, tmpdir):
    info_path = str(tmpdir.join("info.txt"))
    run(["--info-file", info_path, "--times", "2", "-a", "adapt=GCCGAACTTCTTA",
        "-a", "adapt2=GACTGCCTTAAGGACGT"], "illumina5.fastq", "illumina5.fastq")
    assert_files_equal(cutpath('illumina5.info.txt'), info_path)
Пример #46
0
def test_untrimmed_output(run, tmpdir):
    path = str(tmpdir.join("untrimmed.fastq"))
    run(["-a", "TTAGACATATCTCCGTCG", "--untrimmed-output", path], "small.trimmed.fastq", "small.fastq")
    assert_files_equal(cutpath("small.untrimmed.fastq"), path)
Пример #47
0
def test_rest(run, tmpdir):
    """-r/--rest-file"""
    rest = str(tmpdir.join("rest.tmp"))
    run(['-b', 'ADAPTER', '-N', '-r', rest], "rest.fa", "rest.fa")
    assert_files_equal(datapath('rest.txt'), rest)
Пример #48
0
def test_restfront(run, tmpdir):
    path = str(tmpdir.join("rest.txt"))
    run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa")
    assert_files_equal(datapath('restfront.txt'), path)
Пример #49
0
def test_too_short(run, tmpdir):
    """--too-short-output"""
    too_short_path = str(tmpdir.join('tooshort.fa'))
    run("-m 5 -a TTAGACATATCTCCGTCG --too-short-output " + too_short_path, "minlen.fa", "lengths.fa")
    assert_files_equal(datapath('tooshort.fa'), too_short_path)