Пример #1
0
def gmap_setup(gsnap_dir, out_dir, ref_fasta):
    ref_base = op.splitext(ref_fasta)[0]
    ref_dir = op.dirname(ref_fasta)
    ref_name = op.basename(ref_base)
    make_path = "%(out_dir)s/Makefile.%(ref_name)s" % locals()
    gsnap_setup = op.join(gsnap_dir, "util", "gmap_setup")
    # have to cd to the out_dir because gsnap writes to cwd.
    cmd = "\n cd %(ref_dir)s && \n"
    cmd += gsnap_setup
    cmd += " -D %(ref_dir)s -o %(make_path)s -d %(ref_base)s %(ref_fasta)s > %(out_dir)s/gmap_setup.log && "
    cmd += "\n make -f %(make_path)s coords > gmap_coords.log && "
    cmd += "\n make -f %(make_path)s gmapdb > gmap_gmapdb.log &&"
    cmd += "\n %(gsnap_dir)s/src/cmetindex -d %(ref_name)s -D %(ref_dir)s > gmap_cmetindex.log 2> gmap_cmetindex.error.log"
    cmd %= locals()
    print >>sys.stderr, "[ command ] $", cmd
    cmd_last = op.join(out_dir, "ran_gsnap_setup.sh")
    if not op.exists(cmd_last) or not is_up_to_date_b(ref_fasta, cmd_last) or not is_same_cmd(cmd, cmd_last):
        fh = open(cmd_last, "w")
        print >>fh, cmd
        fh.close()
    elif is_up_to_date_b(ref_fasta, cmd_last) and not is_same_cmd(cmd, cmd_last):
        fh = open(cmd_last, "w")
        print >>fh, cmd
        fh.close()
    if not op.exists(op.join(out_dir, make_path)) or not (is_up_to_date_b(op.join(out_dir, make_path), cmd_last)):
        # TODO ^ add check for cmetindex files as well.
        p = Popen(cmd.replace('\n', ' '), shell=True)
        print >>sys.stderr, "^ executing gmap/gsnap setup^"
        if p.wait() != 0:
            pclean = Popen("cd %(out_dir)s; make -f %(make_path)s clean" % locals())
            pclean.wait()
            pass
    else:
        print >>sys.stderr, "^ NOT executing gmap/gsnap setup. everything is up to date.^"
    return ref_base
Пример #2
0
def run_gsnap(gsnap_dir, gsnap_args, out_dir, ref_fasta, reads_paths, cpu_count):
    #/opt/src/gmap/gmap-2010-03-09/src/gsnap --npaths 1 --quiet-if-excessive -A sam --nofails --nthreads 4 -D ./ -d hg19e_gmap --cmet bs_reads.fasta > bs.align.sam
    ref_base = op.splitext(ref_fasta)[0]
    ref_name = op.basename(ref_base)
    ref_dir = op.dirname(ref_fasta)
    reads_paths = [op.abspath(r) for r in reads_paths]
    log = op.join(out_dir, "gsnap_run.log")

    reads_paths_str = " ".join(reads_paths)
    out_sam = op.abspath(op.join(out_dir, "methylcoded.gsnap.sam"))
    cmd = "%(gsnap_dir)s/src/gsnap --quiet-if-excessive -A sam"
    cmd += " --nofails --nthreads %(cpu_count)i -D %(ref_dir)s %(gsnap_args)s"
    cmd += " -d %(ref_name)s --cmet %(reads_paths_str)s > %(out_sam)s 2> %(log)s"
    cmd %= locals()
    cmd_path = op.join(out_dir, "ran_gsnap.sh")
    new_cmd = False
    if not is_same_cmd(cmd, cmd_path):
        fh = open(cmd_path, "w")
        print >>fh, cmd
        fh.close()
        new_cmd = True

    print >>sys.stderr, "\n" + cmd
    if not new_cmd and all(is_up_to_date_b(r, out_sam) for r in reads_paths) \
        and all(is_up_to_date_b(r, cmd_path) for r in reads_paths):
        print >>sys.stderr, "^ NOT executing gsnap. everything is up to date.^"
    else:
        fh = open(cmd_path, "w")
        print >>fh, cmd
        fh.close()
        print >>sys.stderr, "^ executing gsnap. ^"
        p = Popen(cmd.replace('\n', ' '), shell=True)
        if p.wait() != 0:
            os.unlink(out_sam)
            print >>sys.stderr, "ERROR:\n", open(log).read()
            sys.exit(1)
    return out_sam
Пример #3
0
def main(out_dir, ref_fasta, reads, gsnap_path, gsnap_args):
    fa_reads = out_dir + "/" + op.basename(reads[0]).rstrip("_1") + ".fasta"
    if all(is_fastq(r) for r in reads):
        print >>sys.stderr, "using existing reads files"
        gsnap_reads = reads
    elif all(is_up_to_date_b(r, fa_reads) for r in reads):
        # fasta reads. up to date.
        gsnap_reads = reads
    else:
        # its a pair of fasta files. put into gsnap format.
        if len(reads) > 1:
            out_fa = open(fa_reads, "w")
            fastx_to_gsnap_fasta(reads, out_fa)
            out_fa.close()
            gsnap_reads = (fa_reads,)
        else:
            # if it's a single fasta, don't need to write any new files.
            gsnap_reads = (reads[0],)
    gmap_setup(gsnap_path, out_dir, ref_fasta)
    gsnap_sam = run_gsnap(gsnap_path, gsnap_args, out_dir, ref_fasta, gsnap_reads, cpu_count=CPU_COUNT)
    paired_end = len(reads) > 1

    parse_gsnap_sam(gsnap_sam, ref_fasta, out_dir, paired_end)