Exemplo n.º 1
0
def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file, process):
    write_bam_pool = Pool(int(process))
    exclude_bam_list = [exclude_bam_file_tmp]
    usedBamList = []
    for chrom in chrom_list:
        excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom)
        exclude_bam_list.append(excludeBam_chr)
        usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom)
        usedBamList.append(usedBam_chr)

        write_bam_pool.apply_async(
            write_bam_byChr,
            args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file))
    write_bam_pool.close()
    write_bam_pool.join()

    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    bamMerge(exclude_bam_list, exclude_bam_file)
    used_bam_file = os.path.join(out_dir, "used.bam")
    if len(usedBamList) != 1:
        bamMerge(usedBamList, used_bam_file)
    else:
        used_bam_file = usedBamList[0]

    bamSort(used_bam_file, os.path.join(out_dir, "used.sort"))
    used_sort_bam_file = os.path.join(out_dir, "used.sort.bam")
    bamIndex(used_sort_bam_file)
    return used_sort_bam_file, exclude_bam_file
Exemplo n.º 2
0
def get_reads_by_region(bam_file, sv_list, out_dir):
    # get reads by region bed
    region_bed_file = os.path.join(out_dir, "consider_region.bed")
    chrom_list = write_region_bed(region_bed_file, sv_list)
    exclude_bam_file_tmp = os.path.join(out_dir, "exclude_tmp.bam")
    used_bam_file_tmp = os.path.join(out_dir, "used_tmp.bam")
    getRegionReads(bam_file, region_bed_file, used_bam_file_tmp,
                   exclude_bam_file_tmp)
    bamIndex(used_bam_file_tmp)
    return chrom_list, used_bam_file_tmp, exclude_bam_file_tmp
Exemplo n.º 3
0
def main(run_args):
    start_time = time.asctime(time.localtime(time.time()))
    # print start_time
    temp_out_dir = os.path.join(run_args.outdir, "tempDir")
    os.system("mkdir -p %s" % temp_out_dir)
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = InvalidLog(invalid_log_file)

    # step1: deal with mutfile and get haplotypes
    print "step1: deal with mutfile and get haplotypes"
    haplotype_list = get_haplotypes(run_args.bamfile, run_args.reffasta,
                                    run_args.mutfile, int(run_args.haplosize),
                                    float(run_args.snpfrac), invalid_log)

    # step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts
    print "step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts"
    success_list_file = os.path.join(run_args.outdir, 'success_list.txt')
    total_chosen_reads, total_chosen_reads_muts = deal_haplotype_multi(
        run_args.bamfile, haplotype_list, temp_out_dir, run_args.reffasta,
        int(run_args.process), int(run_args.mindepth),
        int(run_args.minmutreads), int(run_args.minmapq),
        float(run_args.diffcover), run_args.single, run_args.multmapfilter,
        run_args.aligner, run_args.alignerIndex, invalid_log,
        success_list_file)
    invalid_log.close()
    if len(total_chosen_reads) == 0:
        print "Warning: No reads to deal with of all these sv, checkout your sv file"
        return

    # step3: modify the reads in total_chosen_reads itself
    print "step3: modify the reads in total_chosen_reads itself"
    reads_modify(total_chosen_reads, total_chosen_reads_muts,
                 run_args.reffasta, int(run_args.process))

    # step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference
    print "step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference"
    edit_remap_bam_file, exclude_bam_file = reads_replace(
        run_args.bamfile, total_chosen_reads, run_args.seqer,
        run_args.floworder, run_args.libkey, run_args.barcode, run_args.tag,
        temp_out_dir, run_args.aligner, run_args.alignerIndex, run_args.single)

    # step5: merge remap.edit.bam and exclude exclude.bam and sort
    print "step5: merge remap.edit.bam and exclude exclude.bam and sort"
    # edit_remap_bam_file, exclude_bam_file = os.path.join(temp_out_dir, "edit.remap.sort.bam"), os.path.join(
    #     temp_out_dir, "exclude.bam")
    out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)
    bamIndex(out_bam_file)
    end_time = time.asctime(time.localtime(time.time()))
    # speed_time = end_time - start_time
    print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % (
        out_bam_file, success_list_file, invalid_log_file)
Exemplo n.º 4
0
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key,
                  barcode, tag, out_dir, aligner, aligner_index, is_single):
    bam = pysam.AlignmentFile(bam_file)
    edit_bam_reads = {}
    for read in bam.fetch():
        read_name = read.query_name
        if read_name in total_chosen_reads:
            strand = getReadStrand(read)
            if read_name not in edit_bam_reads:
                edit_bam_reads[read_name] = {}
            if strand in total_chosen_reads[read_name]:
                my_read = total_chosen_reads[read_name][strand]
                read.query_sequence = my_read.query_sequence
                read.query_qualities = my_read.query_qualities
                if seqer == "life":
                    read = deal_life_reads(read, flow_order, lib_key, barcode)
                if tag:
                    read = add_tag(read)

                edit_bam_reads[read_name][strand] = read
            else:
                edit_bam_reads[read_name][strand] = read

    # write edited reads into edit.bam
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    for read_name, readInfo in edit_bam_reads.items():
        for strand, read in readInfo.items():
            edit_bam.write(read)
    edit_bam.close()

    # write not edited reads into exclude.bam
    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam)
    for read in bam.fetch():
        read_name = read.query_name
        if read_name not in edit_bam_reads:
            exclude_bam.write(read)
    exclude_bam.close()

    # remap the edited reads
    header = os.path.join(out_dir, 'bam.header')
    os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header))
    head = open(header, 'r').readline().rstrip()
    if not head:
        head = None
    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index,
          edit_bam_file,
          edit_remap_bam_file,
          aligner,
          is_single,
          header=head)
    edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort")
    edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam")
    bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix)
    bamIndex(edit_remap_bam_sorted_file)
    if tag:
        edit_remap_addtag_file = os.path.join(out_dir, "edit.remap.sort.bam")
        bam_add_tag(edit_remap_bam_sorted_file, edit_remap_addtag_file)
    else:
        edit_remap_addtag_file = edit_remap_bam_sorted_file

    return edit_remap_addtag_file, exclude_bam_file
Exemplo n.º 5
0
def main(run_args):
    start_time = time.asctime(time.localtime(time.time()))
    # print start_time
    if not os.path.exists(run_args.outdir):
        os.mkdir(run_args.outdir)
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = InvalidLog(invalid_log_file)

    run_log_file = os.path.join(run_args.outdir, 'run.log')
    run_log = RunLog(run_log_file)

    temp_out_dir = os.path.join(run_args.outdir, "tempDir")
    if not os.path.exists(temp_out_dir):
        os.mkdir(temp_out_dir)

    # step0: prepare sv list
    sv_list = check_sv_file(run_args.svfile, run_args.reffasta, invalid_log)

    if not sv_list:
        exit("no sv list to deal with")

    # step1: get insert size of paired reads
    print "step1: get insert size of paired reads"
    insert_size = get_insertSize_range(run_args.bamfile, run_args.readlength, run_args.single)

    # step2: deal with sv and get total edited reads
    print "step2: deal with sv and get total edited reads"
    success_file = os.path.join(run_args.outdir, 'success_list.txt')
    total_modify_reads_file, total_delete_reads_file, total_add_reads_file, total_modify_list, total_delete_list, total_add_list = deal_sv(
        run_args.bamfile, run_args.reffasta, sv_list,
        run_args.single,
        int(run_args.minmapq),
        run_args.multmapfilter,
        int(run_args.mindepth),
        int(run_args.minmutreads),
        int(run_args.readlength),
        temp_out_dir,
        insert_size,
        invalid_log,
        run_log, success_file)
    invalid_log.close()

    # step3: get reads by region bed and write bam file
    print "step3: get reads by region bed and write bam file"
    chrom_list, used_bam_file_tmp, exclude_bam_file_tmp = get_reads_by_region(run_args.bamfile, sv_list,
                                                                              temp_out_dir)

    # write reads which may probably used to used.bam and reads should not be used to exclude.bam
    used_bam_file, exclude_bam_file = write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp,
                                                    temp_out_dir, total_modify_reads_file,
                                                    total_delete_reads_file, total_add_reads_file,
                                                    int(run_args.process))

    # step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads
    print "step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads"
    edit_remap_bam_file = merge_edit_bam(run_args.bamfile, temp_out_dir, run_args.single, total_modify_reads_file,
                                         total_add_reads_file, used_bam_file, total_modify_list, total_add_list,
                                         run_args.seqer, run_args.aligner, run_args.alignerIndex,
                                         run_args.floworder,
                                         run_args.libkey, run_args.barcode, run_args.tag)

    # step5: remapped edit reads and merge
    print "step5: remapped edit reads and merge"
    out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)
    bamIndex(out_bam_file)

    end_time = time.asctime(time.localtime(time.time()))
    # print end_time
    # speed_time = end_time - start_time
    print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % (
        out_bam_file, success_file, invalid_log_file)