Пример #1
0
def faster_split_bam(log, sorted_reduced_bam, sample_dir_iter, iteration):
    start_time = time.time()
    sample_dir_iter_locus_temp = os.path.join(sample_dir_iter, "loci", "temp")
    # make a temp dir in locus folder in which to store locus-specific SAM data
    os.makedirs(sample_dir_iter_locus_temp)
    os.chdir(sample_dir_iter_locus_temp)
    cmd1 = [
        get_user_path("executables", "samtools"), "view", sorted_reduced_bam
    ]
    cmd2 = [get_user_path("executables", "grep"), "-v", "^@"]
    cmd3 = [get_user_path("executables", "gawk"), "-F\t", '{print > $3}']
    proc1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
    proc2 = subprocess.Popen(cmd2, stdin=proc1.stdout, stdout=subprocess.PIPE)
    proc3 = subprocess.Popen(cmd3, stdin=proc2.stdout, stdout=subprocess.PIPE)
    proc1.stdout.close()
    proc2.stdout.close()
    stdout = proc3.communicate()
    if proc3.returncode is not 0:
        raise IOError("Splitting BAM file has failed")
    else:
        os.chdir(sample_dir_iter)
    end_time = time.time()
    time_delta_sec = round(end_time - start_time, 3)
    log.info("\tSplit SAMs took {} seconds".format(time_delta_sec))
    return sample_dir_iter_locus_temp
Пример #2
0
def bwa_mem_pe_align(log, sample, sample_dir, ref, cores, r1, r2, iteration=0):
    #pdb.set_trace()
    cmd1 = [
        get_user_path("executables", "bwa"), "mem", "-t",
        str(cores), ref, r1.pth, r2.pth
    ]
    cmd2 = [get_user_path("executables", "samtools"), "view", "-bS", "-"]
    sampe_out_fname = os.path.join(sample_dir,
                                   'iter-{}.pe.bwa.log'.format(iteration))
    samtools_out_fname = os.path.join(
        sample_dir, 'iter-{}.pe.samtools.log'.format(iteration))
    bam_out_fname = os.path.join(sample_dir, 'iter-{}.bam'.format(iteration))
    log.info("Building BAM for {}, iteration {}".format(sample, iteration))
    with open(sampe_out_fname, 'w') as sampe_out:
        with open(samtools_out_fname, 'w') as samtools_out:
            with open(bam_out_fname, 'w') as bam_out:
                proc1 = subprocess.Popen(cmd1,
                                         stdout=subprocess.PIPE,
                                         stderr=sampe_out)
                proc2 = subprocess.Popen(cmd2,
                                         stdin=proc1.stdout,
                                         stdout=bam_out,
                                         stderr=samtools_out)
                proc1.stdout.close()
                proc2.communicate()
    return bam_out_fname
Пример #3
0
def samtools_split_sam(sample, sample_dir_iter_locus, locus, clean,
                       only_single_locus):
    sam_out_fname = os.path.join(sample_dir_iter_locus, '{}.sam'.format(locus))
    # split the reduced files into properly paired and singleton reads
    bam_out_fname_paired = os.path.join(sample_dir_iter_locus,
                                        '{}.paired.bam'.format(locus))
    # -f 2 -F 2048 gets properly paired, non-supplementary alignments
    cmd2 = [
        get_user_path("executables", "samtools"), "view", "-f", "2", "-F",
        "2048", "-b", sam_out_fname, "-o", bam_out_fname_paired
    ]
    proc2 = subprocess.Popen(cmd2)
    stdout = proc2.communicate()
    # sort the paired bam
    bam_out_fname_paired_sorted = os.path.join(
        sample_dir_iter_locus, '{}.paired.sorted.bam'.format(locus))
    cmd1 = [
        get_user_path("executables", "samtools"), "sort", "-n",
        bam_out_fname_paired, "-o", bam_out_fname_paired_sorted
    ]
    proc1 = subprocess.Popen(cmd1)
    stdout = proc1.communicate()
    bam_out_fname_singleton = os.path.join(sample_dir_iter_locus,
                                           '{}.singleton.bam'.format(locus))
    cmd3 = [
        get_user_path("executables", "samtools"), "view", "-f", "8", "-b",
        sam_out_fname, "-o", bam_out_fname_singleton
    ]
    proc3 = subprocess.Popen(cmd3)
    stdout = proc3.communicate()
    if clean:
        os.remove(sam_out_fname)
        os.remove(bam_out_fname_paired)
    return bam_out_fname_paired_sorted, bam_out_fname_singleton
Пример #4
0
def bedtools_to_fastq(sample, sample_dir, bam_paired, bam_singleton, locus,
                      clean):
    fastq_out_fname_r1 = os.path.join(sample_dir,
                                      '{}.read1.fastq'.format(locus))
    fastq_out_fname_r2 = os.path.join(sample_dir,
                                      '{}.read2.fastq'.format(locus))
    fastq_out_fname_s = os.path.join(sample_dir,
                                     '{}.singleton.fastq'.format(locus))
    cmd0 = [
        get_user_path("executables", "bedtools"), "bamtofastq", "-i",
        bam_paired, "-fq", fastq_out_fname_r1, "-fq2", fastq_out_fname_r2
    ]
    proc0 = subprocess.Popen(cmd0,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    # stderr may contain entries when chimeric reads are present.  these are not
    # included in the output.
    stdout, stderr = proc0.communicate()
    cmd1 = [
        get_user_path("executables", "bedtools"), "bamtofastq", "-i",
        bam_singleton, "-fq", fastq_out_fname_s
    ]
    proc1 = subprocess.Popen(cmd1,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
    stdout, stderr = proc1.communicate()
    fastqs = {
        1: fastq_out_fname_r1,
        2: fastq_out_fname_r2,
        's': fastq_out_fname_s
    }
    if clean:
        os.remove(bam_paired)
        os.remove(bam_singleton)
    return fastqs
Пример #5
0
def bwa_version():
    cmd = [get_user_path("executables", "bwa")]
    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
    stdout, stderr = proc.communicate()
    return stdout.split("\n")[2].split(' ')[1]
Пример #6
0
def samtools_version():
    cmd = [get_user_path("executables", "samtools"), '--version']
    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
    stdout, stderr = proc.communicate()
    return stdout.split("\n")[0].split(' ')[1]
Пример #7
0
def samtools_get_locus_names_from_bam(log, bam, iteration):
    #pdb.set_trace()
    cmd1 = [get_user_path("executables", "samtools"), "view", bam]
    cmd2 = [
        get_user_path("executables", "gawk"),
        '{print $3}',
    ]
    proc1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
    proc2 = subprocess.Popen(cmd2, stdin=proc1.stdout, stdout=subprocess.PIPE)
    proc1.stdout.close()
    stdout = proc2.communicate()
    # return unique list of locus names
    locus_names = list(set(stdout[0].split("\n")))
    locus_names.sort()
    # make sure empty is removed
    locus_names.remove('')
    log.info("Recovered {} loci for iteration {}".format(
        len(locus_names), iteration))
    return locus_names
Пример #8
0
def samtools_index(log, sample, sample_dir, bam, iteration=0):
    log.info("Indexing BAM for {}".format(sample))
    cmd = [get_user_path("executables", "samtools"), "index", bam]
    samtools_out_fname = os.path.join(
        sample_dir, 'iter-{}.samtools-idx.log'.format(sample))
    with open(samtools_out_fname, 'w') as samtools_out:
        proc = subprocess.Popen(cmd,
                                stdout=samtools_out,
                                stderr=subprocess.STDOUT)
        proc.communicate()
Пример #9
0
def bwa_index_seeds(seeds, log):
    log.info("Running bwa indexing against {}".format(os.path.basename(seeds)))
    cwd = os.getcwd()
    # move into reference directory
    os.chdir(os.path.dirname(seeds))
    cmd = [get_user_path("executables", "bwa"), "index", seeds]
    with open('bwa-index-file.log', 'a') as outf:
        proc = subprocess.Popen(cmd, stdout=outf, stderr=subprocess.STDOUT)
        proc.communicate()
    # mvoe back to working directory
    os.chdir(cwd)
Пример #10
0
def samtools_sort(log, sample, sample_dir, bam, iteration=0):
    #pdb.set_trace()
    bam_out_fname = os.path.join(sample_dir,
                                 'iter-{}.reduce.sorted.bam'.format(iteration))
    cmd1 = [
        get_user_path("executables", "samtools"), "sort", bam, "-o",
        bam_out_fname
    ]
    samtools_out_fname = os.path.join(sample_dir,
                                      'iter-{}.sort.log'.format(iteration))
    with open(samtools_out_fname, 'w') as samtools_out:
        proc = subprocess.Popen(cmd1,
                                stdout=samtools_out,
                                stderr=subprocess.STDOUT)
        proc.communicate()
    return bam_out_fname
Пример #11
0
def samtools_reduce(log, sample, sample_dir, bam, iteration=0):
    #pdb.set_trace()
    log.info("Reducing BAM for {}, iteration {}".format(sample, iteration))
    bam_out_fname = os.path.join(sample_dir,
                                 'iter-{}.reduce.bam'.format(iteration))
    cmd = [
        get_user_path("executables", "samtools"), "view", "-F", "4", "-bq",
        "1", bam, "-o", bam_out_fname
    ]
    samtools_out_fname = os.path.join(sample_dir,
                                      'iter-{}.reduce.log'.format(iteration))
    with open(samtools_out_fname, 'w') as samtools_out:
        proc = subprocess.Popen(cmd,
                                stdout=samtools_out,
                                stderr=subprocess.STDOUT)
        proc.communicate()
    return bam_out_fname
Пример #12
0
def spades_paired_end_assembly(iteration, sample, sample_dir, fastqs, locus,
                               clean):
    assembly_out_fname = os.path.join(sample_dir, '{}-assembly'.format(locus))
    # go ahead and assemble without error correction, for speed.
    # explcitly set threads = 1
    cmd1 = [
        get_user_path("executables", "spades"), "-t", "1", "-1", fastqs[1],
        "-2", fastqs[2], "-s", fastqs['s'], "-k",
        get_user_param('spades', 'kmer'), "--cov-cutoff",
        get_user_param('spades', 'coverage_cutoff'), "--memory",
        get_user_param('spades', 'memory'), "-o", assembly_out_fname
    ]
    # turn off error correction for non-final rounds, turn on error-correction
    # for final round and also use --careful assembly option (both of these are
    # slower)
    if not iteration == 'final':
        cmd1.append("--only-assembler")
    if iteration == 'final':
        cmd1.append("--careful")
    # spades creates its own log file in the assembly dir - redirect to /dev/null
    fnull_file = open(os.devnull, 'w')
    proc = subprocess.Popen(cmd1, stdout=fnull_file, stderr=subprocess.STDOUT)
    stdout, stderr = proc.communicate()
    return assembly_out_fname
Пример #13
0
def get_bam_header(log, bam, iteration):
    cmd1 = [get_user_path("executables", "samtools"), "view", "-H", bam]
    proc1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
    stdout = proc1.communicate()
    log.info("Got BAM header for iteration {}".format(iteration))
    return stdout[0]