Пример #1
0
def bamAddRG(editRemap, editBamReads, templateBamFile, outBamFile):
    # editRemapBam_addRG_File = tempOutDir + "/edit.remap.addRG.bam"
    head = editRemap.header
    head["RG"] = templateBamFile.header["RG"]
    addRGBam = pysam.AlignmentFile(outBamFile, 'wb', header=head)
    RG = _getRGs(templateBamFile)
    for read in editRemap.fetch():
        readName = read.query_name
        strand = getReadStrand(read)
        if readName in editBamReads:
            orig = editBamReads[readName][strand]
        else:
            orig = None
        newRead = readAddRG(read, orig, RG)
        # print newRead
        addRGBam.write(newRead)
    addRGBam.close()
Пример #2
0
def deal_haplotype(bam_file, haplotype, reffasta, haplotype_prefix, mindepth,
                   minmutreads, minmapq, diffcover, is_single,
                   is_multmapfilter, aligner, aligner_index, **kwargs):
    reads_dict = OrderedDict()
    bam = pysam.AlignmentFile(bam_file, 'rb')
    reads = bam.fetch(reference=haplotype.chrom,
                      start=haplotype.start,
                      end=haplotype.end + 1)
    depth = 0
    for read in reads:
        depth += 1
        if read.reference_start is not None and not read.is_secondary and bin(
                read.flag & 2048) != bin(2048):
            if read.query_name not in reads_dict:
                reads_dict[read.query_name] = {}
            strand = getReadStrand(read)
            reads_dict[read.query_name][strand] = read

    # judge depth and mut reads whether qualified
    if depth < int(mindepth):
        print "depth less than min depth!"
        return False, "haplotype in position %s:%s-%s: depth less than min depth(%s)" % (
            haplotype.chrom, haplotype.start, haplotype.end, mindepth)
    else:
        mut_reads_num = int(depth * haplotype.freq)
        if mut_reads_num < int(minmutreads):
            print "mutation reads num less than minmutreads!"
            return False, "haplotype in position %s:%s-%s: mut reads less than min mut reads(%s)" % (
                haplotype.chrom, haplotype.start, haplotype.end, minmutreads)

    print "start pick reads"
    # print str(haplotype)
    res = pick_reads(bam, reads_dict, mut_reads_num, is_single, minmapq,
                     is_multmapfilter)
    if res[0] is False:
        return False, "haplotype in position %s:%s-%s: %s" % (
            haplotype.chrom, haplotype.start, haplotype.end, res[1])
    chosen_reads, mate_reads = res
    print "end pick reads"
    # edit
    my_chosen_reads = {}
    my_mate_reads = {}
    tmp_bam_file = haplotype_prefix + ".chosen.edited.bam"
    tmp_bam = pysam.AlignmentFile(tmp_bam_file, 'wb', template=bam)
    chosen_reads_num = 0

    real_mut_reads_num = 0
    for readName, readInfo in chosen_reads.items():
        my_chosen_reads[readName] = {}
        tmp_dict = {}
        tmp_dict2 = {}
        for strand, read in readInfo.items():
            my_read = Read(read)
            res = editRead(my_read, reffasta, haplotype.mutList)
            if res is False:
                continue
            real_mut_reads_num += 1
            sequence, quality, shift = res
            read.query_sequence = sequence
            read.query_qualities = quality
            tmp_dict[strand] = my_read
            tmp_dict2[strand] = read
        if is_single:
            for strand in tmp_dict:
                my_chosen_reads[readName][strand] = tmp_dict[strand]
                tmp_bam.write(tmp_dict2[strand])
                chosen_reads_num += 1
        else:
            if len(tmp_dict) == 0:
                continue
            elif len(tmp_dict) == 1 and readName in mate_reads:
                for strand in tmp_dict:
                    my_chosen_reads[readName][strand] = tmp_dict[strand]
                    tmp_bam.write(tmp_dict2[strand])
                    chosen_reads_num += 1
                mate_read = mate_reads[readName]
                my_mate_reads[readName] = Read(mate_read)
                tmp_bam.write(mate_read)
            elif len(tmp_dict) == 2:
                for strand in tmp_dict:
                    my_chosen_reads[readName][strand] = tmp_dict[strand]
                    tmp_bam.write(tmp_dict2[strand])
                    chosen_reads_num += 1
    tmp_bam.close()

    # alignment and judge coverdiff whether qualified
    chosen_bam_file = haplotype_prefix + ".chosen.remap.bam"
    genome_index = aligner_index
    remap(genome_index, tmp_bam_file, chosen_bam_file, aligner, is_single)
    chosen_bam = pysam.AlignmentFile(chosen_bam_file)
    if judge_coverdiff(bam, depth, chosen_bam, chosen_reads_num, haplotype,
                       float(diffcover)):
        return my_chosen_reads, my_mate_reads, real_mut_reads_num, depth
    else:
        return False, "haplotype in position %s:%s-%s: coverdiff is less than minDiffCover" % (
            haplotype.chrom, haplotype.start, haplotype.end)
Пример #3
0
def reads_replace(bam_file, total_chosen_reads, seqer, flow_order, lib_key,
                  barcode, tag, out_dir, aligner, aligner_index, is_single):
    bam = pysam.AlignmentFile(bam_file)
    edit_bam_reads = {}
    for read in bam.fetch():
        read_name = read.query_name
        if read_name in total_chosen_reads:
            strand = getReadStrand(read)
            if read_name not in edit_bam_reads:
                edit_bam_reads[read_name] = {}
            if strand in total_chosen_reads[read_name]:
                my_read = total_chosen_reads[read_name][strand]
                read.query_sequence = my_read.query_sequence
                read.query_qualities = my_read.query_qualities
                if seqer == "life":
                    read = deal_life_reads(read, flow_order, lib_key, barcode)
                if tag:
                    read = add_tag(read)

                edit_bam_reads[read_name][strand] = read
            else:
                edit_bam_reads[read_name][strand] = read

    # write edited reads into edit.bam
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    for read_name, readInfo in edit_bam_reads.items():
        for strand, read in readInfo.items():
            edit_bam.write(read)
    edit_bam.close()

    # write not edited reads into exclude.bam
    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    exclude_bam = pysam.AlignmentFile(exclude_bam_file, 'wb', template=bam)
    for read in bam.fetch():
        read_name = read.query_name
        if read_name not in edit_bam_reads:
            exclude_bam.write(read)
    exclude_bam.close()

    # remap the edited reads
    header = os.path.join(out_dir, 'bam.header')
    os.system('samtools view -H %s|grep "^@RG" > %s' % (bam_file, header))
    head = open(header, 'r').readline().rstrip()
    if not head:
        head = None
    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index,
          edit_bam_file,
          edit_remap_bam_file,
          aligner,
          is_single,
          header=head)
    edit_remap_bam_sorted_prefix = os.path.join(out_dir, "edit.remap.sort")
    edit_remap_bam_sorted_file = os.path.join(out_dir, "edit.remap.sort.bam")
    bamSort(edit_remap_bam_file, edit_remap_bam_sorted_prefix)
    bamIndex(edit_remap_bam_sorted_file)
    if tag:
        edit_remap_addtag_file = os.path.join(out_dir, "edit.remap.sort.bam")
        bam_add_tag(edit_remap_bam_sorted_file, edit_remap_addtag_file)
    else:
        edit_remap_addtag_file = edit_remap_bam_sorted_file

    return edit_remap_addtag_file, exclude_bam_file