示例#1
0
文件: its_se.py 项目: niaid/nephele2
def main(args):
    pipe = Pipeline(args)
    fastq = {}

    # process the mapping file
    with open(pipe.args.map_file.name) as f:
        reader = csv.DictReader(f, delimiter="\t")
        for r in reader:
            fq[r["#SampleID"]] = os.path.join(pipe.inputs_dir,
                                              r['ForwardFastqFile'])
            pipe.check_file(fastq[r["#SampleID"]], "Sample file")

    # run otu picking; its97, its99
    otu = pick_otu.pick_otu(pipe.log.info, pipe.args.phred_quality,
                            pipe.args.max_bad_run, pipe.args.max_n,
                            pipe.args.phred_offset, pipe.inputs_dir,
                            pipe.outputs_dir)
    otu.run_split(fastq)

    otu_biom = ''
    # specify otu picking strategy and run core diversity
    if pipe.args.otu_strategy == "de_novo":
        otu.run_denovo(pipe.args.ref_db)
        o = otu.get_output()
        otu_biom = o["otu_table.biom"]
    elif pipe.args.otu_strategy == "closed":
        otu.run_closed(pipe.args.ref_db)
        o = otu.get_output()
        otu_biom = o["otu_table.biom"]
    else:
        otu.run_open(pipe.args.ref_db)
        o = otu.get_output()
        otu_biom = o["otu_table_mc2_w_tax.biom"]

    # make sure the biom file is not empty
    if not os.path.getsize(otu_biom) > 0:
        pipe.log.info("BIOM file is empty.")
        exit(0)

    pipe.check_file(otu_biom, "OTU table")
    summarize_table.callback(
        otu_biom, os.path.join(pipe.outputs_dir, "otu_summary_table.txt"),
        False, False)
    pipe.exec_cmnd("biom convert -i %s -o %s --to-tsv --header-key taxonomy" %
                   (otu_biom, os.path.join(pipe.outputs_dir, "otu_table.txt")))
    pipe.exec_cmnd(
        "biom convert -i %s -o %s --to-json --table-type 'OTU table' --process-obs-metadata sc_separated"
        % (os.path.join(pipe.outputs_dir, "otu_table.txt"),
           os.path.join(pipe.outputs_dir, "otu_table.v1.biom")))
    depth = pipe.get_depth(otu_biom, pipe.args.sampling_depth)

    # no sample has the number of reads greater than 10,000
    if depth < 0:
        pipe.log.info(
            "Visualization pipeline will not be run, as there are not enough samples."
        )
        exit(0)

    try:
        core = diversity.diversity(pipe.log.info, pipe.args.map_file.name,
                                   pipe.inputs_dir, pipe.outputs_dir)
        core.run(depth, otu_biom, "", [])
    except Exception:
        m = "QIIME core diversity pipeline failed with unknown errors."
        pipe.log.error(m)
        pipe.log_to_db(job_id=pipe.job_id, stack=traceback.format_exc(), msg=m)
        exit(0)

    # run additional visualization
    if pipe.args.job_id is not None:
        r_mod_name = 'datavis16s'
        pipe.log.info(
            'Loading R module: {r_mod_name}.'.format(r_mod_name=r_mod_name))
        pipe.load_R_mod(config.PIPELINES_LOC_ON_WRKR + r_mod_name)
    vis = importr("datavis16s")
    vis.trygraphwrapper(o["otu_table_mc2_w_tax.biom"],
                        pipe.outputs_dir,
                        pipe.args.map_file.name,
                        logfilename=os.path.join(pipe.outputs_dir,
                                                 "logfile.txt"),
                        FUN="allgraphs",
                        sampdepth=depth)

    # remove the join pair directory
    pipe.exec_cmnd("rm -rf %s" % os.path.join(pipe.outputs_dir, "join_pair"))
示例#2
0
def main(args):
    pipe = Pipeline(args)
    db_tree = {
        "homd": "/mnt/EFS/dbs/homd/HOMD_16S_rRNA_RefSeq_V15.11.tre",
        "sv97": "/mnt/EFS/dbs/SILVA_97/97_otus.tre",
        "sv99": "/mnt/EFS/dbs/SILVA_99/99_otus.tre",
        "gg97": "/mnt/EFS/dbs/Greengenes_97/97_otus.tree",
        "gg99": "/mnt/EFS/dbs/Greengenes_99/99_otus.tree"
    }
    fastq = {}

    # process the mapping file
    with open(pipe.args.map_file.name) as f:
        reader = csv.DictReader(f, delimiter="\t")
        for r in reader:
            fastq[r["#SampleID"]] = os.path.join(pipe.inputs_dir,
                                                 r['ForwardFastqFile'])
            pipe.check_file(fastq[r["#SampleID"]], "Sample file")

    # run otu picking; gg99, gg97, sv99, or sv97
    otu = pick_otu.pick_otu(pipe.log.info, pipe.args.phred_quality,
                            pipe.args.max_bad_run, pipe.args.max_n,
                            pipe.args.phred_offset, pipe.inputs_dir,
                            pipe.outputs_dir)
    otu.run_split(fastq)

    otu_tree = ''
    otu_biom = ''
    # specify otu picking strategy and run core diversity
    if pipe.args.otu_strategy == "de_novo":
        otu.run_denovo(pipe.args.ref_db)
        o = otu.get_output()
        otu_tree = o["rep_set.tre"]
        otu_biom = o["otu_table.biom"]
    elif pipe.args.otu_strategy == "closed":
        otu.run_closed(pipe.args.ref_db)
        o = otu.get_output()
        otu_tree = db_tree[pipe.args.ref_db]
        otu_biom = o["otu_table.biom"]
    else:
        otu.run_open(pipe.args.ref_db)
        o = otu.get_output()
        otu_tree = o["rep_set.tre"]
        otu_biom = o["otu_table_mc2_w_tax_no_pynast_failures.biom"]

    # make sure the biom file is not empty
    if not os.path.getsize(otu_biom) > 0:
        pipe.log.info("BIOM file is empty.")
        exit(0)

    pipe.check_file(otu_biom, "OTU table")
    summarize_table.callback(
        otu_biom, os.path.join(pipe.outputs_dir, "otu_summary_table.txt"),
        False, False)
    pipe.exec_cmnd("biom convert -i %s -o %s --to-tsv --header-key taxonomy" %
                   (otu_biom, os.path.join(pipe.outputs_dir, "otu_table.txt")))
    pipe.exec_cmnd(
        "biom convert -i %s -o %s --to-json --table-type 'OTU table' --process-obs-metadata sc_separated"
        % (os.path.join(pipe.outputs_dir, "otu_table.txt"),
           os.path.join(pipe.outputs_dir, "otu_table.v1.biom")))
    depth = pipe.get_depth(otu_biom, pipe.args.sampling_depth)

    # no sample has the number of reads greater than 10,000
    if depth < 0:
        pipe.log.info(
            "Visualization pipeline will not be run, as there are not enough samples."
        )
        exit(0)

    try:
        core = diversity.diversity(pipe.log.info, pipe.args.map_file.name,
                                   pipe.inputs_dir, pipe.outputs_dir)
        core.run(depth, otu_biom, otu_tree)
    except Exception:
        m = "QIIME core diversity pipeline failed with unknown errors."
        pipe.log.error(m)
        pipe.log_to_db(job_id=pipe.job_id, stack=traceback.format_exc(), msg=m)
        exit(0)

    # run picrust analysis
    if pipe.args.picrust is True:
        pt = picrust.picrust(pipe.log.info, pipe.inputs_dir, pipe.outputs_dir)
        pipe.check_file(o["seqs.fna"], "Sequence file")
        pt.run(o["seqs.fna"])

    # run additional visualization
    if pipe.args.job_id is not None:
        r_mod_name = 'datavis16s'
        pipe.log.info(
            'Loading R module: {r_mod_name}.'.format(r_mod_name=r_mod_name))
        pipe.load_R_mod(config.PIPELINES_LOC_ON_WRKR + r_mod_name)
    vis = importr("datavis16s")
    vis.trygraphwrapper(otu_biom,
                        pipe.outputs_dir,
                        pipe.args.map_file.name,
                        logfilename=os.path.join(pipe.outputs_dir,
                                                 "logfile.txt"),
                        FUN="allgraphs",
                        sampdepth=depth)