Пример #1
0
def test_summarize_sample_info():
    # run summarize_sample_info
    global _tempdir

    conf = utils.relative_file('tests/test-data/SRR5950647_subset.conf')
    test_data = utils.relative_file("tests/test-data")

    extra_args = ["summarize_sample_info"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status == 0

    info_file = f"{_tempdir}/SRR5950647_subset.info.yaml"
    assert os.path.exists(info_file)

    with open(info_file, 'rt') as fp:
        info = yaml.safe_load(fp)

    print('XXX', info)
    #{'kmers': 928685, 'known_hashes': 807, 'n_bases': 2276334, 'n_reads': 24663, 'sample': 'HSMA33MX-subset', 'total_hashes': 907, 'unknown_hashes': 100}

    assert info['kmers'] == 928685
    assert info['sample'] == 'SRR5950647_subset'
    assert info['known_hashes'] == 653
    assert info['n_bases'] == 2276334
    assert info['n_reads'] == 24663
    assert info['total_hashes'] == 907
    assert info['unknown_hashes'] == 254
Пример #2
0
def test_smash_sig():
    # run 'smash_reads'
    global _tempdir

    abundtrim_dir = os.path.join(_tempdir, "abundtrim")
    os.mkdir(abundtrim_dir)

    conf = utils.relative_file('tests/test-data/SRR5950647_subset.conf')
    src = utils.relative_file("tests/test-data/SRR5950647_subset.abundtrim.fq.gz")
    shutil.copy(src, abundtrim_dir)

    extra_args = ["smash_reads"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status == 0

    output_sig = f"{_tempdir}/sigs/SRR5950647_subset.abundtrim.sig.zip"
    assert os.path.exists(output_sig)
    sigs = list(sourmash.load_file_as_signatures(output_sig))
    assert len(sigs) == 3
    for s in sigs:
        assert s.minhash.track_abundance
Пример #3
0
def test_bad_config_2():
    # check for presence of 'sample' instead of 'samples', old config
    global _tempdir

    conf = utils.relative_file('tests/test-data/bad-2.conf')

    status = run_snakemake(conf, verbose=True, outdir=_tempdir,
                           extra_args=["check"])

    assert status != 0
Пример #4
0
def test_bad_config_1():
    # check for presence of sourmash_database_glob_pattern, old config
    global _tempdir

    conf = utils.relative_file('tests/test-data/bad-1.conf')

    status = run_snakemake(conf, verbose=True, outdir=_tempdir,
                           extra_args=["check"])

    assert status != 0
Пример #5
0
def test_bad_config_4():
    # check for presence of 'taxonomies' as a string, not a list.
    # old config
    global _tempdir

    conf = utils.relative_file('tests/test-data/bad-4.conf')

    status = run_snakemake(conf, verbose=True, outdir=_tempdir,
                           extra_args=["check"])

    assert status != 0
Пример #6
0
def test_bad_config_3():
    # check for presence of 'database_taxonomy' instead of 'taxonomies',
    # old config
    global _tempdir

    conf = utils.relative_file('tests/test-data/bad-3.conf')

    status = run_snakemake(conf, verbose=True, outdir=_tempdir,
                           extra_args=["check"])

    assert status != 0
Пример #7
0
def test_block_sra_downloads():
    # run 'smash_reads' with a non-existent metagenome file & make sure
    # that it doesn't work.
    global _tempdir

    conf = utils.relative_file('tests/test-data/test-block-sra.conf')

    extra_args = ["smash_reads"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status != 0
Пример #8
0
def test_map_reads():
    global _tempdir

    conf = utils.relative_file('tests/test-data/SRR5950647_subset.conf')
    test_data = utils.relative_file("tests/test-data")

    genomes_dir = os.path.join(_tempdir, "genomes")
    os.mkdir(genomes_dir)

    extra_args = ["map_reads", "-j", "4"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status == 0
Пример #9
0
def test_gather_to_tax():
    # run gather_to_tax
    global _tempdir

    conf = utils.relative_file('tests/test-data/SRR5950647_subset.conf')
    test_data = utils.relative_file("tests/test-data")

    extra_args = ["gather_to_tax"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status == 0
    
    tax_output = f"{_tempdir}/gather/SRR5950647_subset.gather.with-lineages.csv"
    assert os.path.exists(tax_output)

    tax_results = list(utils.load_csv(tax_output))
    assert len(tax_results) == 2
Пример #10
0
def test_gather_reads_with_picklist():
    # check gather with picklist
    global _tempdir

    conf = utils.relative_file('tests/test-data/SRR5950647_picklist.conf')
    test_data = utils.relative_file("tests/test-data")

    # note: the prefetch command & CSV are what are actually limited by the
    # passed in picklist.
    prefetch_output = f"{_tempdir}/gather/SRR5950647_subset.prefetch.csv"
    if os.path.exists(prefetch_output):
        os.unlink(prefetch_output)

    gather_output = f"{_tempdir}/gather/SRR5950647_subset.gather.csv"
    if os.path.exists(gather_output):
        os.unlink(gather_output)

    extra_args = ["gather_reads"]
    status = run_snakemake(
        conf,
        verbose=True,
        outdir=_tempdir,
        extra_args=extra_args,
    )
    assert status == 0
    
    assert os.path.exists(gather_output)

    prefetch_results = list(utils.load_csv(prefetch_output))
    assert len(prefetch_results) == 1
    assert prefetch_results[0]['match_name'].startswith('GCF_902167755.1 ')

    gather_results = list(utils.load_csv(gather_output))
    assert len(gather_results) == 1
    assert gather_results[0]['name'].startswith('GCF_902167755.1 ')

    # make sure the picklist version of the CSVs is cleaned up!
    os.unlink(prefetch_output)
    os.unlink(gather_output)