Пример #1
0
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key,
                  barcode, tag, out_dir, aligner, aligner_index, is_single):
    bam = pysam.AlignmentFile(bam_file)
    edit_bam_reads = {}
    for read in bam.fetch():
        read_name = read.query_name
        if read_name in total_chosen_reads:
            strand = getReadStrand(read)
            if read_name not in edit_bam_reads:
                edit_bam_reads[read_name] = {}
            if strand in total_chosen_reads[read_name]:
                my_read = total_chosen_reads[read_name][strand]
                read.query_sequence = my_read.query_sequence
                read.query_qualities = my_read.query_qualities
                if seqer == "life":
                    read = deal_life_reads(read, flow_order, lib_key, barcode)
                if tag:
                    read = add_tag(read)

                edit_bam_reads[read_name][strand] = read
            else:
                edit_bam_reads[read_name][strand] = read

    # write edited reads into edit.bam
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    for read_name, readInfo in edit_bam_reads.items():
        for strand, read in readInfo.items():
            edit_bam.write(read)
    edit_bam.close()

    # write not edited reads into exclude.bam
    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam)
    for read in bam.fetch():
        read_name = read.query_name
        if read_name not in edit_bam_reads:
            exclude_bam.write(read)
    exclude_bam.close()

    # remap the edited reads
    header = os.path.join(out_dir, 'bam.header')
    os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header))
    head = open(header, 'r').readline().rstrip()
    if not head:
        head = None
    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index,
          edit_bam_file,
          edit_remap_bam_file,
          aligner,
          is_single,
          header=head)
    edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort")
    edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam")
    bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix)
    return edit_remap_bam_sorted_file, exclude_bam_file
Пример #2
0
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key, barcode, tag, out_dir, aligner,
                  aligner_index, is_single):
    bam = pysam.AlignmentFile(bam_file)
    edit_bam_reads = {}
    for read in bam.fetch():
        read_name = read.query_name
        if read_name in total_chosen_reads:
            strand = getReadStrand(read)
            if read_name not in edit_bam_reads:
                edit_bam_reads[read_name] = {}
            if strand in total_chosen_reads[read_name]:
                my_read = total_chosen_reads[read_name][strand]
                read.query_sequence = my_read.query_sequence
                read.query_qualities = my_read.query_qualities
                if seqer == "life":
                    read = deal_life_reads(read, flow_order, lib_key, barcode)
                if tag:
                    read = add_tag(read)

                edit_bam_reads[read_name][strand] = read
            else:
                edit_bam_reads[read_name][strand] = read

    # write edited reads into edit.bam
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    for read_name, readInfo in edit_bam_reads.items():
        for strand, read in readInfo.items():
            edit_bam.write(read)
    edit_bam.close()

    # write not edited reads into exclude.bam
    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam)
    for read in bam.fetch():
        read_name = read.query_name
        if read_name not in edit_bam_reads:
            exclude_bam.write(read)
    exclude_bam.close()

    # remap the edited reads
    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner, is_single)

    if not is_single:
        edit_remap = pysam.AlignmentFile(edit_remap_bam_file, 'rb')
        editRemapBam_addRG_File = os.path.join(out_dir, "edit.remap.addRG.bam")
        bamAddRG(edit_remap, edit_bam_reads, bam, editRemapBam_addRG_File)
        edit_remap.close()
    else:
        editRemapBam_addRG_File = edit_remap_bam_file

    return editRemapBam_addRG_File, exclude_bam_file
Пример #3
0
def bamAddRG(editRemap, editBamReads, templateBamFile, outBamFile):
    # editRemapBam_addRG_File = tempOutDir + "/edit.remap.addRG.bam"
    head = editRemap.header
    head["RG"] = templateBamFile.header["RG"]
    addRGBam = pysam.AlignmentFile(outBamFile, 'wb', header=head)
    RG = _getRGs(templateBamFile)
    for read in editRemap.fetch():
        readName = read.query_name
        strand = getReadStrand(read)
        if readName in editBamReads:
            orig = editBamReads[readName][strand]
        else:
            orig = None
        newRead = readAddRG(read, orig, RG)
        # print newRead
        addRGBam.write(newRead)
    addRGBam.close()
Пример #4
0
def deal_haplotype(bam_file, haplotype, reffasta, haplotype_prefix, mindepth,
                   minmutreads, minmapq, diffcover, is_single,
                   is_multmapfilter, aligner, aligner_index, **kwargs):
    reads_dict = OrderedDict()
    bam = pysam.AlignmentFile(bam_file, 'rb')
    reads = bam.fetch(reference=haplotype.chrom,
                      start=haplotype.start,
                      end=haplotype.end + 1)
    depth = 0
    for read in reads:
        depth += 1
        if read.reference_start is not None and not read.is_secondary and bin(
                read.flag & 2048) != bin(2048):
            if read.query_name not in reads_dict:
                reads_dict[read.query_name] = {}
            strand = getReadStrand(read)
            reads_dict[read.query_name][strand] = read

    # judge depth and mut reads whether qualified
    if depth < int(mindepth):
        print "depth less than min depth!"
        return False, "haplotype in position %s:%s-%s: depth less than min depth(%s)" % (
            haplotype.chrom, haplotype.start, haplotype.end, mindepth)
    else:
        mut_reads_num = int(depth * haplotype.freq)
        if mut_reads_num < int(minmutreads):
            print "mutation reads num less than minmutreads!"
            return False, "haplotype in position %s:%s-%s: mut reads less than min mut reads(%s)" % (
                haplotype.chrom, haplotype.start, haplotype.end, minmutreads)

    print "start pick reads"
    # print str(haplotype)
    res = pick_reads(bam, reads_dict, mut_reads_num, is_single, minmapq,
                     is_multmapfilter)
    if res[0] is False:
        return False, "haplotype in position %s:%s-%s: %s" % (
            haplotype.chrom, haplotype.start, haplotype.end, res[1])
    chosen_reads, mate_reads = res
    print "end pick reads"
    # edit
    my_chosen_reads = {}
    my_mate_reads = {}
    tmp_bam_file = haplotype_prefix + ".chosen.edited.bam"
    tmp_bam = pysam.AlignmentFile(tmp_bam_file, 'wb', template=bam)
    chosen_reads_num = 0

    real_mut_reads_num = 0
    for readName, readInfo in chosen_reads.items():
        my_chosen_reads[readName] = {}
        tmp_dict = {}
        tmp_dict2 = {}
        for strand, read in readInfo.items():
            my_read = Read(read)
            res = editRead(my_read, reffasta, haplotype.mutList)
            if res is False:
                continue
            real_mut_reads_num += 1
            sequence, quality, shift = res
            read.query_sequence = sequence
            read.query_qualities = quality
            tmp_dict[strand] = my_read
            tmp_dict2[strand] = read
        if is_single:
            for strand in tmp_dict:
                my_chosen_reads[readName][strand] = tmp_dict[strand]
                tmp_bam.write(tmp_dict2[strand])
                chosen_reads_num += 1
        else:
            if len(tmp_dict) == 0:
                continue
            elif len(tmp_dict) == 1 and readName in mate_reads:
                for strand in tmp_dict:
                    my_chosen_reads[readName][strand] = tmp_dict[strand]
                    tmp_bam.write(tmp_dict2[strand])
                    chosen_reads_num += 1
                mate_read = mate_reads[readName]
                my_mate_reads[readName] = Read(mate_read)
                tmp_bam.write(mate_read)
            elif len(tmp_dict) == 2:
                for strand in tmp_dict:
                    my_chosen_reads[readName][strand] = tmp_dict[strand]
                    tmp_bam.write(tmp_dict2[strand])
                    chosen_reads_num += 1
    tmp_bam.close()

    # alignment and judge coverdiff whether qualified
    chosen_bam_file = haplotype_prefix + ".chosen.remap.bam"
    genome_index = aligner_index
    remap(genome_index, tmp_bam_file, chosen_bam_file, aligner, is_single)
    chosen_bam = pysam.AlignmentFile(chosen_bam_file)
    if judge_coverdiff(bam, depth, chosen_bam, chosen_reads_num, haplotype,
                       float(diffcover)):
        return my_chosen_reads, my_mate_reads, real_mut_reads_num, depth
    else:
        return False, "haplotype in position %s:%s-%s: coverdiff is less than minDiffCover" % (
            haplotype.chrom, haplotype.start, haplotype.end)
Пример #5
0
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads,
                   total_add_reads, used_reads, seqer, aligner, aligner_index,
                   flow_order, lib_key, barcode, tag):
    bam = pysam.AlignmentFile(bam_file, 'rb')
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    readname_convert_file = os.path.join(out_dir, "readname_convert.txt")
    fout_convert = open(readname_convert_file, 'w')
    edit_bam_reads = {}
    if is_single:
        for read_pair in total_modify_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_name = read1.query_name.split(
                ":")[0] + ":" + get_new_readname()
            new_read1.query_name = new_name
            if seqer == "life":
                new_read1 = deal_life_reads(new_read1, flow_order, lib_key,
                                            barcode)
            if tag:
                new_read1 = add_tag(new_read1)
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

        for read_pair in total_add_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_name = get_new_readname()
            new_read1.query_name = new_name
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

    else:
        for read_pair in total_modify_reads + total_add_reads:
            read1 = read_pair[0]
            read2 = read_pair[1]
            keyname_read1 = getKeyName(read1)
            keyname_read2 = getKeyName(read2)
            orig_read1 = used_reads[keyname_read1]
            orig_read2 = used_reads[keyname_read2]
            orig_read1_name = orig_read1.query_name
            orig_read2_name = orig_read2.query_name
            new_read1 = copy.deepcopy(orig_read1)
            new_read2 = copy.deepcopy(orig_read2)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_read2.query_sequence = read2.query_sequence
            new_read2.query_qualities = read2.query_qualities
            new_name = get_new_readname()
            new_read1.query_name = new_name
            new_read2.query_name = new_name
            strand1 = getReadStrand(new_read1)
            strand2 = getReadStrand(new_read2)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand1] = new_read1
            edit_bam_reads[new_name][strand2] = new_read2
            if tag:
                new_read1 = add_tag(new_read1)
                new_read2 = add_tag(new_read2)

            fout_convert.write("%s: %s, %s, %s, %s, %s-%s, %s-%s\n" % (
                new_name,
                orig_read1_name,
                orig_read2_name,
                new_read1.is_read1,
                new_read2.is_read2,
                new_read1.reference_start,
                new_read1.reference_end,
                new_read2.reference_start,
                new_read2.reference_end,
            ))
            edit_bam.write(new_read1)
            edit_bam.write(new_read2)
    fout_convert.close()
    edit_bam.close()

    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner,
          is_single)

    if not is_single:
        editRemap = pysam.AlignmentFile(edit_remap_bam_file, 'rb')
        editRemapBam_addRG_File = os.path.join(out_dir, "edit.remap.addRG.bam")
        bamAddRG(editRemap, edit_bam_reads, bam, editRemapBam_addRG_File)
        editRemap.close()
    else:
        editRemapBam_addRG_File = edit_remap_bam_file
    bamIndex(editRemapBam_addRG_File)
    return editRemapBam_addRG_File