def run(): return strategies.backup( (CmdAction(cmd+opts, verbose=verbose), strategies.Group( PythonAction(_revcomp), CmdAction(cmd+revcomp_opts,verbose=verbose))), extra_conditions=[output_exists] )
def run(targets): strategies.backup( (CmdAction(cmd.format(input_fname),verbose=verbose), strategies.Group( CmdAction(revcomp_cmd), CmdAction(cmd.format(revcomp_fname),verbose=verbose))), extra_conditions = [ lambda ret, output_fname: os.stat(output_fname).st_size == 0 ], output_fname=output_fname )
def _maybe_mangle(): if not do_mangle: return if not os.path.exists( output_fasta) or os.stat(output_fasta).st_size < 1: return m = rmext(output_fasta, all=True) if mangle_to is False else mangle_to cmd = "sequence_convert -m {m} -f fasta -t fasta {o} > {o}.tmp".format( m=m, o=output_fasta) CmdAction(cmd, verbose=verbose).execute() CmdAction("mv {o}.tmp {o}".format(o=output_fasta), verbose=verbose).execute()
def _run(): ret = CmdAction(usearch_cmd).execute() if ret is None or not issubclass(type(ret), Exception): ret = CmdAction(biom_cmd).execute() if not keep_tempfiles: for f in os.listdir(opts['tmp_dir']): if f != "nonchimeric.fa" and \ os.path.isfile(join(opts['tmp_dir'], f)): os.remove(join(opts['tmp_dir'], f)) else: for t in targets: if not os.path.exists(t): open(t, 'w').close() return ret
def run(targets): # try to run without converting to json, if that fails, # convert first, then run on the json-converted biom file if os.stat(file).st_size < 1: for target in targets: open(target, "w").close() return True return strategies.backup( (strategies.Group(CmdAction(cmd1%(file), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose)), strategies.Group(format_cmd, CmdAction(cmd1%(converted), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose))) )
def run(): if os.stat(input_fastq).st_size > 1: ret = CmdAction(cmd, verbose=verbose).execute() if ret is None or not issubclass(type(ret), Exception): _maybe_mangle() else: open(output_fasta, 'w').close()
def run(pcoa_cmd=pcoa_cmd): if default_opts['meta'] is True or not default_opts['meta']: default_opts['meta'] = last_meta_name(pcl_fname) if default_opts['id'] is True or not default_opts['id']: default_opts['id'] = sample_id(pcl_fname) pcoa_cmd += dict_to_cmd_opts(default_opts) pcoa_cmd += " " + pcl_fname + " " return CmdAction(pcoa_cmd, verbose=True).execute()
def run(): files = [ f for f in input_sams if os.path.exists(f) and os.stat(f).st_size > 0 ] if files: return CmdAction(cmd + " ".join(files), verbose=True).execute() else: open(output_table, 'w').close()
def merge_filter(deps,targets): files = [file for file in deps if os.path.exists(file) and os.stat(file).st_size > 0] pat = _reduce_to_glob(files) if list(sorted(glob.glob(pat))) == list(sorted(files)): inputs = pat else: inputs = ",".join(files) cmd = "merge_otu_tables.py -i '{filenames}' -o {output}" cmd = cmd.format( filenames = inputs, output = name ) return CmdAction(cmd, verbose=True).execute()
def run(): r1, r2 = input_fastq_pair paired = lambda s: s.replace('.fastq', '_paired.fastq') ret = strategies.backup( (CmdAction(stitch_cmd.format( r1=r1, r2=r2, opts_str=opts_str, output_fastq=output_fastq), verbose=verbose), strategies.Group( CmdAction(pair_cmd.format( r1=r1, r2=r2, r1out=paired(r1), r2out=paired(r2)), verbose=verbose), CmdAction(stitch_cmd.format(r1=paired(r1), r2=paired(r2), opts_str=opts_str, output_fastq=output_fastq), verbose=verbose)), CmdAction('cp {r1} {out}'.format(r1=r1, out=output_fastq), verbose=verbose)), ) if remove_tempfiles: for f in (paired(r1), paired(r2)): if os.path.exists(f): os.remove(f) return ret
def run(): if any(os.stat(f).st_size < 1 for f in deps): open(output_sam, 'w').close() else: return CmdAction(cmd, verbose=True).execute()
def run(): if os.stat(infile).st_size > 1: return CmdAction(cmd.format(infile=infile, outfile=outfile), verbose=verbose).execute() else: open(outfile, 'w').close()
def picrust(file, output_dir=None, verbose=True, **opts): """Workflow to predict metagenome functional content from 16S OTU tables. :param file: String; input OTU table. :keyword tab_in: Boolean; True if the input is a tabulated file (default:0) :keyword tab_out: Boolean; True if the output file is to be tabulated (default:False) :keyword gg_version: String; the greengenes version to be used (default:most recent version) :keyword t: String; option to use a different type of prediction (default:KO) :keyword with_confidence: Boolean; Set to True to output confidence intervals (default:0) :keyword custom: String; specify a file containing a custom trait to predict metagenomes External Dependencies: - PICRUSt: Version 1.0.0, http://picrust.github.io/picrust/install.html#install """ norm_out = new_file(addtag(file, "normalized_otus"), basedir=output_dir) predict_out = new_file(addtag(file, "picrust"), basedir=output_dir) all_opts = { 'tab_in' : 0, 'tab_out' : 0, 'gg_version' : '', 't' : '', 'with_confidence' : 0, 'custom' : '', 'drop_unknown' : True} all_opts.update(opts) drop_unknown = all_opts.pop("drop_unknown", True) _copy_fname = settings.workflows.picrust.copy_number def _drop_unknown(): import os import gzip import json from biom.table import DenseOTUTable from biom.parse import ( OBS_META_TYPES, parse_biom_table, parse_classic_table_to_rich_table ) idx = set([ row.strip().split('\t')[0] for row in gzip.open(_copy_fname) ]) filter_func = lambda a, otu_id, c: str(otu_id) in idx tmpfile = file+"_tmp.biom" with open(file) as f, open(tmpfile, 'w') as f_out: try: table = parse_biom_table(f) except Exception as e: table = parse_classic_table_to_rich_table( f, None, None, OBS_META_TYPES['taxonomy'], DenseOTUTable) table = table.filterObservations(filter_func) json.dump( table.getBiomFormatObject("AnADAMA"), f_out ) os.rename(file, addtag(file, "unfiltered")) os.rename(tmpfile, file) cmd1 = ("normalize_by_copy_number.py " + "-i %s" + " -o " + norm_out) if all_opts['gg_version']: cmd1 += " -g " + all_opts['gg_version'] if all_opts['tab_in']: cmd1 += " -f" cmd2 = ("predict_metagenomes.py " + "-i %s" + " -o " + predict_out) if all_opts['gg_version']: cmd2 += " -g " + all_opts['gg_version'] if all_opts['tab_out']: cmd2 += " -f" if all_opts['t']: cmd2 += " -t " + all_opts['t'] if all_opts['with_confidence']: cmd2 += " --with_confidence" if all_opts['custom']: cmd2 += " -c " + all_opts['custom'] converted = addtag(file, "json") format_cmd = CmdAction('biom convert --table-type="OTU table"' ' --header-key taxonomy --to-json' ' -i {} -o {} '.format(file, converted), verbose=verbose) def run(targets): # try to run without converting to json, if that fails, # convert first, then run on the json-converted biom file if os.stat(file).st_size < 1: for target in targets: open(target, "w").close() return True return strategies.backup( (strategies.Group(CmdAction(cmd1%(file), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose)), strategies.Group(format_cmd, CmdAction(cmd1%(converted), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose))) ) actions = [run] if drop_unknown: actions = [_drop_unknown, run] def _rusage(task): msg = task.name+" Estimated mem={mem} time={time} threads=1" s = os.stat(list(task.file_dep)[0]).st_size return msg.format( mem=100+(s/1024.), time=100+(s*2.5e-4) ) return dict( name = "picrust:"+predict_out, actions = actions, file_dep = [file], targets = [predict_out, norm_out], title = _rusage, )