示例#1
0
 def run():
     return strategies.backup(
         (CmdAction(cmd+opts, verbose=verbose),
          strategies.Group(
              PythonAction(_revcomp),
              CmdAction(cmd+revcomp_opts,verbose=verbose))),
         extra_conditions=[output_exists]
     )
示例#2
0
 def run(targets):
     strategies.backup(
         (CmdAction(cmd.format(input_fname),verbose=verbose),
          strategies.Group(
              CmdAction(revcomp_cmd),
              CmdAction(cmd.format(revcomp_fname),verbose=verbose))),
         extra_conditions = [ 
             lambda ret, output_fname: os.stat(output_fname).st_size == 0
         ],
         output_fname=output_fname
     )
示例#3
0
 def _maybe_mangle():
     if not do_mangle:
         return
     if not os.path.exists(
             output_fasta) or os.stat(output_fasta).st_size < 1:
         return
     m = rmext(output_fasta, all=True) if mangle_to is False else mangle_to
     cmd = "sequence_convert -m {m} -f fasta -t fasta {o} > {o}.tmp".format(
         m=m, o=output_fasta)
     CmdAction(cmd, verbose=verbose).execute()
     CmdAction("mv {o}.tmp {o}".format(o=output_fasta),
               verbose=verbose).execute()
示例#4
0
 def _run():
     ret = CmdAction(usearch_cmd).execute()
     if ret is None or not issubclass(type(ret), Exception):
         ret = CmdAction(biom_cmd).execute()
         if not keep_tempfiles:
             for f in os.listdir(opts['tmp_dir']):
                 if f != "nonchimeric.fa" and \
                    os.path.isfile(join(opts['tmp_dir'], f)):
                     os.remove(join(opts['tmp_dir'], f))
     else:
         for t in targets:
             if not os.path.exists(t):
                 open(t, 'w').close()
     return ret
示例#5
0
    def run(targets):
        # try to run without converting to json, if that fails,
        # convert first, then run on the json-converted biom file
        if os.stat(file).st_size < 1:
            for target in targets:
                open(target, "w").close()
            return True

        return strategies.backup(
            (strategies.Group(CmdAction(cmd1%(file), verbose=verbose),
                              CmdAction(cmd2%(norm_out), verbose=verbose)),
             strategies.Group(format_cmd,
                              CmdAction(cmd1%(converted), verbose=verbose),
                              CmdAction(cmd2%(norm_out), verbose=verbose)))
        )
示例#6
0
 def run():
     if os.stat(input_fastq).st_size > 1:
         ret = CmdAction(cmd, verbose=verbose).execute()
         if ret is None or not issubclass(type(ret), Exception):
             _maybe_mangle()
     else:
         open(output_fasta, 'w').close()
示例#7
0
 def run(pcoa_cmd=pcoa_cmd):
     if default_opts['meta'] is True or not default_opts['meta']:
         default_opts['meta'] = last_meta_name(pcl_fname)
     if default_opts['id'] is True or not default_opts['id']:
         default_opts['id'] = sample_id(pcl_fname)
     pcoa_cmd += dict_to_cmd_opts(default_opts)
     pcoa_cmd += " " + pcl_fname + " "
     return CmdAction(pcoa_cmd, verbose=True).execute()
示例#8
0
 def run():
     files = [
         f for f in input_sams
         if os.path.exists(f) and os.stat(f).st_size > 0
     ]
     if files:
         return CmdAction(cmd + " ".join(files), verbose=True).execute()
     else:
         open(output_table, 'w').close()
示例#9
0
 def merge_filter(deps,targets):
     files = [file for file in deps
              if os.path.exists(file) and os.stat(file).st_size > 0]
     pat = _reduce_to_glob(files)
     if list(sorted(glob.glob(pat))) == list(sorted(files)):
         inputs = pat
     else:
         inputs = ",".join(files)
     cmd = "merge_otu_tables.py -i '{filenames}' -o {output}"
     cmd = cmd.format( filenames = inputs,
                       output    = name  )
     return CmdAction(cmd, verbose=True).execute()
示例#10
0
    def run():
        r1, r2 = input_fastq_pair
        paired = lambda s: s.replace('.fastq', '_paired.fastq')
        ret = strategies.backup(
            (CmdAction(stitch_cmd.format(
                r1=r1, r2=r2, opts_str=opts_str, output_fastq=output_fastq),
                       verbose=verbose),
             strategies.Group(
                 CmdAction(pair_cmd.format(
                     r1=r1, r2=r2, r1out=paired(r1), r2out=paired(r2)),
                           verbose=verbose),
                 CmdAction(stitch_cmd.format(r1=paired(r1),
                                             r2=paired(r2),
                                             opts_str=opts_str,
                                             output_fastq=output_fastq),
                           verbose=verbose)),
             CmdAction('cp {r1} {out}'.format(r1=r1, out=output_fastq),
                       verbose=verbose)), )

        if remove_tempfiles:
            for f in (paired(r1), paired(r2)):
                if os.path.exists(f):
                    os.remove(f)
        return ret
示例#11
0
 def run():
     if any(os.stat(f).st_size < 1 for f in deps):
         open(output_sam, 'w').close()
     else:
         return CmdAction(cmd, verbose=True).execute()
示例#12
0
 def run():
     if os.stat(infile).st_size > 1:
         return CmdAction(cmd.format(infile=infile, outfile=outfile),
                          verbose=verbose).execute()
     else:
         open(outfile, 'w').close()
示例#13
0
def picrust(file, output_dir=None, verbose=True, **opts):
    """Workflow to predict metagenome functional content from 16S OTU tables.

    :param file: String; input OTU table.
    :keyword tab_in: Boolean; True if the input is a tabulated 
                     file (default:0)
    :keyword tab_out: Boolean; True if the output file is to be
                      tabulated (default:False)
    :keyword gg_version: String; the greengenes version to be used
                         (default:most recent version)
    :keyword t: String; option to use a different type of prediction
                   (default:KO)
    :keyword with_confidence: Boolean; Set to True to output confidence 
                              intervals (default:0)
    :keyword custom: String; specify a file containing a custom trait to 
                     predict metagenomes

    External Dependencies:
      - PICRUSt: Version 1.0.0, 
        http://picrust.github.io/picrust/install.html#install

    """
    norm_out = new_file(addtag(file, "normalized_otus"), basedir=output_dir)
    predict_out = new_file(addtag(file, "picrust"), basedir=output_dir)

    all_opts = { 'tab_in'          : 0,  'tab_out' : 0,
                 'gg_version'      : '', 't'       : '', 
                 'with_confidence' : 0,  'custom'  : '',
                 'drop_unknown'    : True}
    all_opts.update(opts)
    drop_unknown = all_opts.pop("drop_unknown", True)

    _copy_fname = settings.workflows.picrust.copy_number
    def _drop_unknown():
        import os
        import gzip
        import json
        from biom.table import DenseOTUTable
        from biom.parse import (
            OBS_META_TYPES,
            parse_biom_table,
            parse_classic_table_to_rich_table
        )
        idx = set([ row.strip().split('\t')[0]
                    for row in gzip.open(_copy_fname) ])
        filter_func = lambda a, otu_id, c: str(otu_id) in idx
        tmpfile = file+"_tmp.biom"
        with open(file) as f, open(tmpfile, 'w') as f_out:
            try:
                table = parse_biom_table(f)
            except Exception as e:
                table = parse_classic_table_to_rich_table(
                    f, None, None, OBS_META_TYPES['taxonomy'], DenseOTUTable)
            table = table.filterObservations(filter_func)
            json.dump( table.getBiomFormatObject("AnADAMA"), f_out )
        os.rename(file, addtag(file, "unfiltered"))
        os.rename(tmpfile, file)


    cmd1 = ("normalize_by_copy_number.py "
            + "-i %s"
            + " -o " + norm_out)
    if all_opts['gg_version']:
        cmd1 += " -g " + all_opts['gg_version']
    if all_opts['tab_in']:
        cmd1 += " -f"

    cmd2 = ("predict_metagenomes.py "
            + "-i %s"
            + " -o " + predict_out)
    if all_opts['gg_version']:
        cmd2 += " -g " + all_opts['gg_version']
    if all_opts['tab_out']:
        cmd2 += " -f"
    if all_opts['t']:
        cmd2 += " -t " + all_opts['t']
    if all_opts['with_confidence']:
        cmd2 += " --with_confidence"
    if all_opts['custom']:
        cmd2 += " -c " + all_opts['custom']


    converted = addtag(file, "json")
    format_cmd = CmdAction('biom convert --table-type="OTU table"'
                           ' --header-key taxonomy --to-json'
                           ' -i {} -o {} '.format(file, converted),
                           verbose=verbose)
    def run(targets):
        # try to run without converting to json, if that fails,
        # convert first, then run on the json-converted biom file
        if os.stat(file).st_size < 1:
            for target in targets:
                open(target, "w").close()
            return True

        return strategies.backup(
            (strategies.Group(CmdAction(cmd1%(file), verbose=verbose),
                              CmdAction(cmd2%(norm_out), verbose=verbose)),
             strategies.Group(format_cmd,
                              CmdAction(cmd1%(converted), verbose=verbose),
                              CmdAction(cmd2%(norm_out), verbose=verbose)))
        )
             
    actions = [run]
    if drop_unknown:
        actions = [_drop_unknown, run]

    def _rusage(task):
        msg = task.name+" Estimated mem={mem} time={time} threads=1"
        s = os.stat(list(task.file_dep)[0]).st_size
        return msg.format(
            mem=100+(s/1024.),
            time=100+(s*2.5e-4)
        )
        
    return dict(
        name = "picrust:"+predict_out,
        actions = actions,
        file_dep = [file],
        targets = [predict_out, norm_out],
        title = _rusage,
    )