Exemplo n.º 1
0
def write_bam_byChr(bamFile, chr, excludeBamFile, editBamFile, modifyReadsName,
                    deleteReadsName, addReadsName):
    print chr
    bam = pysam.AlignmentFile(bamFile, 'rb')
    excludeBam = pysam.AlignmentFile(excludeBamFile, 'wb', template=bam)
    editBam = pysam.AlignmentFile(editBamFile, 'wb', template=bam)
    # print bam
    delete = open(editBamFile + ".del", 'w')
    m = 0
    for read in bam.fetch(chr):
        # print read
        m += 1
        keyname = getKeyName(read)
        if keyname in modifyReadsName:
            editBam.write(read)
        elif keyname in deleteReadsName:
            delete.write(keyname + "\n")
            continue
        elif keyname in addReadsName:
            editBam.write(read)
            excludeBam.write(read)
        else:
            excludeBam.write(read)
    delete.close()
    print "Total reads: ", m
    bam.close()
    excludeBam.close()
    editBam.close()
Exemplo n.º 2
0
def write_bam_byChr(bamFile, chr, excludeBamFile, editBamFile,
                    total_modify_reads_file, total_delete_reads_file,
                    total_add_reads_file):
    print chr
    bam = pysam.AlignmentFile(bamFile, 'rb')
    excludeBam = pysam.AlignmentFile(excludeBamFile, 'wb', template=bam)
    editBam = pysam.AlignmentFile(editBamFile, 'wb', template=bam)
    delete = open(editBamFile + ".del", 'w')
    m = 0
    modifyReadsName = get_name_list(total_modify_reads_file, chr)
    deleteReadsName = get_name_list(total_delete_reads_file, chr)
    addReadsName = get_name_list(total_add_reads_file, chr)
    for read in bam.fetch(chr):
        m += 1
        keyname = getKeyName(read)
        if keyname in modifyReadsName:
            editBam.write(read)
        elif keyname in deleteReadsName:
            delete.write(keyname + "\n")
            continue
        elif keyname in addReadsName:
            editBam.write(read)
            excludeBam.write(read)
        else:
            excludeBam.write(read)
    delete.close()
    print "Total reads: ", m
    bam.close()
    excludeBam.close()
    editBam.close()
Exemplo n.º 3
0
def _get_write(total_reads, reads_file_out, reads_pair_out):
    reads_file = open(reads_file_out, 'w')
    reads_pair = open(reads_pair_out, 'w')
    for read_pair in total_reads:
        tmp = []
        for read in read_pair:
            read_name = getKeyName(read)
            tmp.append(read_name)
            reads_file.write(str(read))
        reads_pair.write("%s\n" % ",".join(tmp))
    reads_file.close()
    reads_pair.close()
Exemplo n.º 4
0
def merge_edit_reads(total_modify_reads, total_add_reads, total_delete_reads):
    total_modify_readname_list, total_add_readname_list, total_delete_readname_list = [], [], []
    total_modify_reads_list, total_add_reads_list, total_delete_reads_list = [], [], []

    for read_pair in total_modify_reads:
        for read in read_pair:
            read_name = getKeyName(read)
            total_modify_readname_list.append(read_name)
            total_modify_reads_list.append(read)
    for read_pair in total_add_reads:
        for read in read_pair:
            read_name = getKeyName(read)
            total_add_readname_list.append(read_name)
            total_add_reads_list.append(read)
    for read_pair in total_delete_reads:
        for read in read_pair:
            read_name = getKeyName(read)
            total_delete_readname_list.append(read_name)
            total_delete_reads_list.append(read)

    return total_modify_readname_list, total_delete_readname_list, total_add_readname_list
Exemplo n.º 5
0
def get_write_reads(total_modify_reads, total_delete_reads, total_add_reads,
                    total_reads_file_dict, total_reads_list_dict):
    for typ, reads_dict in zip(
        ('modify', 'delete', 'add'),
        (total_modify_reads, total_delete_reads, total_add_reads)):
        reads_file = total_reads_file_dict[typ]
        reads_pair = total_reads_list_dict[typ]
        for read_pair in reads_dict:
            tmp = []
            for read in read_pair:
                read_name = getKeyName(read)
                tmp.append(read_name)
                reads_file.write(str(read))
            reads_pair.write("%s\n" % ",".join(tmp))
Exemplo n.º 6
0
def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir,
                  total_modify_readname_list, total_delete_readname_list,
                  total_add_readname_list, process):
    write_bam_pool = Pool(int(process))
    exclude_bam_list = [exclude_bam_file_tmp]
    usedBamList = []
    for chrom in chrom_list:
        excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom)
        exclude_bam_list.append(excludeBam_chr)
        usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom)
        usedBamList.append(usedBam_chr)

        write_bam_pool.apply_async(
            write_bam_byChr,
            args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr,
                  total_modify_readname_list, total_delete_readname_list,
                  total_add_readname_list))
    write_bam_pool.close()
    write_bam_pool.join()

    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    bamMerge(exclude_bam_list, exclude_bam_file)
    used_bam_file = os.path.join(out_dir, "used.bam")
    if len(usedBamList) != 1:
        bamMerge(usedBamList, used_bam_file)
    else:
        used_bam_file = usedBamList[0]

    bamSort(used_bam_file, os.path.join(out_dir, "used.sort"))
    used_sort_bam_file = os.path.join(out_dir, "used.sort.bam")
    bamIndex(used_sort_bam_file)
    used_bam = pysam.AlignmentFile(used_sort_bam_file, 'rb')
    used_reads = {}
    for read in used_bam.fetch():
        keyname = getKeyName(read)
        used_reads[keyname] = read
    used_bam.close()
    return used_reads, used_bam_file, exclude_bam_file
Exemplo n.º 7
0
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads,
                   total_add_reads, used_reads, seqer, aligner, aligner_index,
                   flow_order, lib_key, barcode, tag):
    bam = pysam.AlignmentFile(bam_file, 'rb')
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    readname_convert_file = os.path.join(out_dir, "readname_convert.txt")
    fout_convert = open(readname_convert_file, 'w')
    edit_bam_reads = {}
    if is_single:
        for read_pair in total_modify_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_name = read1.query_name.split(
                ":")[0] + ":" + get_new_readname()
            new_read1.query_name = new_name
            if seqer == "life":
                new_read1 = deal_life_reads(new_read1, flow_order, lib_key,
                                            barcode)
            if tag:
                new_read1 = add_tag(new_read1)
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

        for read_pair in total_add_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_name = get_new_readname()
            new_read1.query_name = new_name
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

    else:
        for read_pair in total_modify_reads + total_add_reads:
            read1 = read_pair[0]
            read2 = read_pair[1]
            keyname_read1 = getKeyName(read1)
            keyname_read2 = getKeyName(read2)
            orig_read1 = used_reads[keyname_read1]
            orig_read2 = used_reads[keyname_read2]
            orig_read1_name = orig_read1.query_name
            orig_read2_name = orig_read2.query_name
            new_read1 = copy.deepcopy(orig_read1)
            new_read2 = copy.deepcopy(orig_read2)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_read2.query_sequence = read2.query_sequence
            new_read2.query_qualities = read2.query_qualities
            new_name = get_new_readname()
            new_read1.query_name = new_name
            new_read2.query_name = new_name
            strand1 = getReadStrand(new_read1)
            strand2 = getReadStrand(new_read2)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand1] = new_read1
            edit_bam_reads[new_name][strand2] = new_read2
            if tag:
                new_read1 = add_tag(new_read1)
                new_read2 = add_tag(new_read2)

            fout_convert.write("%s: %s, %s, %s, %s, %s-%s, %s-%s\n" % (
                new_name,
                orig_read1_name,
                orig_read2_name,
                new_read1.is_read1,
                new_read2.is_read2,
                new_read1.reference_start,
                new_read1.reference_end,
                new_read2.reference_start,
                new_read2.reference_end,
            ))
            edit_bam.write(new_read1)
            edit_bam.write(new_read2)
    fout_convert.close()
    edit_bam.close()

    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner,
          is_single)

    if not is_single:
        editRemap = pysam.AlignmentFile(edit_remap_bam_file, 'rb')
        editRemapBam_addRG_File = os.path.join(out_dir, "edit.remap.addRG.bam")
        bamAddRG(editRemap, edit_bam_reads, bam, editRemapBam_addRG_File)
        editRemap.close()
    else:
        editRemapBam_addRG_File = edit_remap_bam_file
    bamIndex(editRemapBam_addRG_File)
    return editRemapBam_addRG_File
Exemplo n.º 8
0
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads_file,
                   total_add_reads_file, used_bam_file, total_modify_list,
                   total_add_list, seqer, aligner, aligner_index, flow_order,
                   lib_key, barcode, tag):
    bam = pysam.AlignmentFile(bam_file, 'rb')
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    readname_convert_file = os.path.join(out_dir, "readname_convert.txt")
    fout_convert = open(readname_convert_file, 'w')
    # edit_bam_reads = {}
    used_bam = pysam.AlignmentFile(used_bam_file, 'rb')
    used_reads = {}
    for read in used_bam.fetch():
        keyname = getKeyName(read)
        used_reads[keyname] = read
    used_bam.close()
    # modify_read_name_dict = get_newname_dict(total_modify_list)
    # add_read_name_dict = get_newname_dict(total_add_list)

    modify_reads_seq, modify_reads_quan = get_sequence_dict(
        total_modify_reads_file)
    add_reads_seq, add_reads_quan = get_sequence_dict(total_add_reads_file)
    if is_single:
        with open(total_modify_list) as fin:
            for line in fin:
                if not line:
                    break
                data = line.strip().split(",")
                read1_name = data[0]
                if read1_name not in used_reads:
                    continue
                orig_read1 = used_reads[read1_name]
                new_read1 = copy.deepcopy(orig_read1)
                new_read1.query_sequence = modify_reads_seq[read1_name]
                new_read1.query_qualities = modify_reads_quan[read1_name]
                new_name = get_new_readname()
                new_read1.query_name = new_name
                if seqer == "life":
                    new_read1 = deal_life_reads(new_read1, flow_order, lib_key,
                                                barcode)
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read1.query_name, new_read1.is_read1,
                     new_read1.reference_start, new_read1.reference_end))
                edit_bam.write(new_read1)
        fin.close()
        with open(total_add_list) as fin:
            for line in fin:
                if not line:
                    break
                data = line.strip().split(",")
                read1_name = data[0]
                if read1_name not in used_reads:
                    continue
                orig_read1 = used_reads[read1_name]
                new_read1 = copy.deepcopy(orig_read1)
                new_read1.query_sequence = add_reads_seq[read1_name]
                new_read1.query_qualities = add_reads_quan[read1_name]
                new_name = get_new_readname()
                new_read1.query_name = new_name
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read1.query_name, new_read1.is_read1,
                     new_read1.reference_start, new_read1.reference_end))
                edit_bam.write(new_read1)
        fin.close()

    else:
        with open(total_modify_list) as fin:
            for line in fin:
                if not line:
                    break
                data = line.strip().split(",")
                read1_name, read2_name = data[0], data[1]
                if read1_name not in used_reads or read2_name not in used_reads:
                    continue
                orig_read1 = used_reads[read1_name]
                orig_read2 = used_reads[read2_name]
                new_read1 = copy.deepcopy(orig_read1)
                new_read2 = copy.deepcopy(orig_read2)
                print read1_name
                new_read1.query_sequence = modify_reads_seq[read1_name]
                new_read1.query_qualities = modify_reads_quan[read1_name]
                new_read2.query_sequence = modify_reads_seq[read2_name]
                new_read2.query_qualities = modify_reads_quan[read2_name]
                new_name = get_new_readname()
                new_read1.query_name = new_name
                new_read2.query_name = new_name
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read1.query_name, new_read1.is_read1,
                     new_read1.reference_start, new_read1.reference_end))
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read2.query_name, new_read2.is_read1,
                     new_read2.reference_start, new_read2.reference_end))
                edit_bam.write(new_read1)
                edit_bam.write(new_read2)
        fin.close()
        with open(total_add_list) as fin:
            for line in fin:
                if not line:
                    break
                data = line.strip().split(",")
                read1_name, read2_name = data[0], data[1]
                if read1_name not in used_reads or read2_name not in used_reads:
                    continue
                orig_read1 = used_reads[read1_name]
                orig_read2 = used_reads[read2_name]
                new_read1 = copy.deepcopy(orig_read1)
                new_read2 = copy.deepcopy(orig_read2)
                new_read1.query_sequence = add_reads_seq[read1_name]
                new_read1.query_qualities = add_reads_quan[read1_name]
                new_read2.query_sequence = add_reads_seq[read2_name]
                new_read2.query_qualities = add_reads_quan[read2_name]
                new_name = get_new_readname()
                new_read1.query_name = new_name
                new_read2.query_name = new_name
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read1.query_name, new_read1.is_read1,
                     new_read1.reference_start, new_read1.reference_end))
                fout_convert.write(
                    "%s: %s, %s, %s-%s\n" %
                    (new_name, orig_read2.query_name, new_read2.is_read1,
                     new_read2.reference_start, new_read2.reference_end))
                edit_bam.write(new_read1)
                edit_bam.write(new_read2)
        fin.close()
    edit_bam.close()

    header = os.path.join(out_dir, 'bam.header')
    os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header))
    head = open(header, 'r').readline().rstrip()
    if not head:
        head = None
    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner,
          is_single, head)
    edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort")
    edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam")
    bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix)
    return edit_remap_bam_sorted_file