def gmap_setup(gsnap_dir, out_dir, ref_fasta): ref_base = op.splitext(op.basename(ref_fasta))[0] ref_dir = op.dirname(ref_fasta) ref_name = op.basename(ref_base) # have to cd to the out_dir because gsnap writes to cwd. cmd = "set -e\n cd %(ref_dir)s && \n" cmd += "gmap_build" cmd += " -k 12 -D %(ref_dir)s -d %(ref_base)s %(ref_fasta)s > %(out_dir)s/gmap_build.log && " cmd += "\ncmetindex -d %(ref_base)s -F %(ref_dir)s > gmap_cmetindex.log 2> gmap_cmetindex.error.log" cmd %= locals() print >>sys.stderr, "[ command ] $", cmd cmd_last = op.join(out_dir, "ran_gsnap_setup.sh") rerun = False if not op.exists(cmd_last) or not is_up_to_date_b(ref_fasta, cmd_last) or not is_same_cmd(cmd, cmd_last): fh = open(cmd_last, "w") print >>fh, cmd fh.close() rerun = True elif is_up_to_date_b(ref_fasta, cmd_last) and not is_same_cmd(cmd, cmd_last): fh = open(cmd_last, "w") print >>fh, cmd fh.close() rerun = True # TODO: check time-stamp rerun = True if rerun: p = Popen(cmd.replace('\n', ' '), shell=True) print >>sys.stderr, "^ executing gmap/gsnap setup^" if p.wait() != 0: pass else: print >>sys.stderr, "gsnap setup stuff is up to date, re-using" return ref_base
def gmap_setup(gsnap_dir, out_dir, ref_fasta): ref_base = op.splitext(op.basename(ref_fasta))[0] ref_dir = op.dirname(ref_fasta) ref_name = op.basename(ref_base) # have to cd to the out_dir because gsnap writes to cwd. cmd = "set -e\n cd %(ref_dir)s && \n" cmd += "gmap_build -w 2" cmd += " -k 15 -D %(ref_dir)s -d %(ref_base)s %(ref_fasta)s > %(out_dir)s/gmap_build.log && " cmd += "\ncmetindex -d %(ref_base)s -F %(ref_dir)s -k 15 > gmap_cmetindex.log 2> gmap_cmetindex.error.log" cmd %= locals() print >> sys.stderr, "[ command ] $", cmd cmd_last = op.join(out_dir, "ran_gsnap_setup.sh") rerun = False if not op.exists(cmd_last) or not is_up_to_date_b( ref_fasta, cmd_last) or not is_same_cmd(cmd, cmd_last): fh = open(cmd_last, "w") print >> fh, cmd fh.close() rerun = True elif is_up_to_date_b(ref_fasta, cmd_last) and not is_same_cmd(cmd, cmd_last): fh = open(cmd_last, "w") print >> fh, cmd fh.close() rerun = True # TODO: check time-stamp rerun = True if rerun: p = Popen(cmd.replace('\n', ' '), shell=True) print >> sys.stderr, "^ executing gmap/gsnap setup^" if p.wait() != 0: pass else: print >> sys.stderr, "gsnap setup stuff is up to date, re-using" return ref_base
def run_gsnap(gsnap_dir, gsnap_args, out_dir, ref_fasta, reads_paths, cpu_count): ref_base = op.splitext(ref_fasta)[0] ref_name = op.basename(ref_base) ref_dir = op.dirname(ref_fasta) reads_paths = [op.abspath(r) for r in reads_paths] log = op.join(out_dir, "gsnap_run.log") if not "--mode" in gsnap_args: cmet = "--mode=cmet-nonstranded" print >>sys.stderr, "using non-stranded mode, specify " \ + " --mode=cmet-stranded in --extra-args for stranded mode" if any(r.endswith(".gz") for r in reads_paths): gsnap_args += " --gunzip " reads_paths_str = " ".join(reads_paths) out_sam = op.abspath(op.join(out_dir, "methylcoded.gsnap.sam")) cmd = "gsnap --quiet-if-excessive -A sam -k 15 " cmd += " --nofails --nthreads %(cpu_count)i -D %(ref_dir)s %(gsnap_args)s" cmd += " -d %(ref_name)s %(cmet)s %(reads_paths_str)s > %(out_sam)s 2> %(log)s" cmd %= locals() print >> sys.stderr, cmd cmd_path = op.join(out_dir, "ran_gsnap.sh") new_cmd = False if not is_same_cmd(cmd, cmd_path): fh = open(cmd_path, "w") print >> fh, cmd fh.close() new_cmd = True if not new_cmd and all(is_up_to_date_b(r, out_sam) for r in reads_paths) \ and all(is_up_to_date_b(r, cmd_path) for r in reads_paths): print >> sys.stderr, "^ NOT executing gsnap. everything is up to date.^" else: fh = open(cmd_path, "w") print >> fh, cmd fh.close() print >> sys.stderr, "^ executing gsnap. ^" p = Popen(cmd.replace('\n', ' '), shell=True) if p.wait() != 0: os.unlink(out_sam) print >> sys.stderr, "ERROR:\n", open(log).read() sys.exit(1) return out_sam
def run_gsnap(gsnap_dir, gsnap_args, out_dir, ref_fasta, reads_paths, cpu_count): ref_base = op.splitext(ref_fasta)[0] ref_name = op.basename(ref_base) ref_dir = op.dirname(ref_fasta) reads_paths = [op.abspath(r) for r in reads_paths] log = op.join(out_dir, "gsnap_run.log") if not "--mode" in gsnap_args: cmet = "--mode=cmet-nonstranded" print >>sys.stderr, "using non-stranded mode, specify " \ + " --mode=cmet-stranded in --extra-args for stranded mode" if any(r.endswith(".gz") for r in reads_paths): gsnap_args += " --gunzip " reads_paths_str = " ".join(reads_paths) out_sam = op.abspath(op.join(out_dir, "methylcoded.gsnap.sam")) cmd = "gsnap --quiet-if-excessive -A sam -k 15 " cmd += " --nofails --nthreads %(cpu_count)i -D %(ref_dir)s %(gsnap_args)s" cmd += " -d %(ref_name)s %(cmet)s %(reads_paths_str)s > %(out_sam)s 2> %(log)s" cmd %= locals() print >>sys.stderr, cmd cmd_path = op.join(out_dir, "ran_gsnap.sh") new_cmd = False if not is_same_cmd(cmd, cmd_path): fh = open(cmd_path, "w") print >>fh, cmd fh.close() new_cmd = True if not new_cmd and all(is_up_to_date_b(r, out_sam) for r in reads_paths) \ and all(is_up_to_date_b(r, cmd_path) for r in reads_paths): print >>sys.stderr, "^ NOT executing gsnap. everything is up to date.^" else: fh = open(cmd_path, "w") print >>fh, cmd fh.close() print >>sys.stderr, "^ executing gsnap. ^" p = Popen(cmd.replace('\n', ' '), shell=True) if p.wait() != 0: os.unlink(out_sam) print >>sys.stderr, "ERROR:\n", open(log).read() sys.exit(1) return out_sam