def deal_inversion(bam, ref, svinfo, is_single, readLength, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end svtype = "inv" if is_single: reads_type6_left, reads_type6_right, reads_type7, filtered_read_num = pos_type_classify(bam, chrom, start, end, is_single, readLength, tempDir, center=False, maxsize=1, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type6_left) + len(reads_type6_right) + len(reads_type7) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num + filtered_read_num) / type_reads_num if freq > 1: freq = 1 modify_reads_type6 = deal_type6(ref, reads_type6_left, reads_type6_left, freq, start, end, svtype) return modify_reads_type6, [], [] else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, readLength, tempDir, center=False, maxsize=insertSize[1], minmapq=minmapq, is_multmapfilter=is_multmapfilter) record_reads(bam, tempDir, chrom + "_" + str(start) + "_" + str(end) + "_" + svtype, reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right) type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) if freq > 1: freq = 1 modified_reads = [] modify_reads_type1 = deal_type1(ref, reads_type1_left, reads_type1_right, freq, start, end, svtype) modified_reads.extend(modify_reads_type1) modify_reads_type2 = deal_type2(ref, reads_type2_left, reads_type2_right, freq, start, end, svtype) modified_reads.extend(modify_reads_type2) res = deal_type3(reads_type3_left, reads_type3_right, freq, insertSize, start, end, svtype) if res is False: return False, "The reads are not satisfied to modify, see more details in log file." modify_reads_type3, add_reads_type3, delete_reads_type3 = res modified_reads.extend(modify_reads_type3) modify_reads_type5 = deal_type5(ref, reads_type5_left, reads_type5_right, freq, start, end, svtype) modified_reads.extend(modify_reads_type5) return modified_reads, [], []
def deal_deletion(bam, ref, svinfo, is_single, readLength, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end svtype = "del" if is_single: reads_type6_left, reads_type6_right, reads_type7, filtered_read_num = pos_type_classify(bam, chrom, start, end, is_single, readLength, tempDir) type_reads_num = len(reads_type6_left) + len(reads_type6_right) + len(reads_type7) freq = freq * (type_reads_num + filtered_read_num) / type_reads_num if freq > 1: freq = 1 modify_reads_type6 = deal_type6(ref, reads_type6_left, reads_type6_left, freq, start, end, svtype) delete_reads_type7 = deal_type7(reads_type7, freq, svtype) return modify_reads_type6, delete_reads_type7, [] else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) record_reads(bam, tempDir, chrom + "_" + str(start) + "_" + str(end) + "_" + svtype, reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) if freq > 1: freq = 1 modify_reads_type1 = deal_type1(ref, reads_type1_left, reads_type1_right, freq, start, end, svtype) delete_reads_type2 = deal_type2(ref, reads_type2_left, reads_type2_right, freq, start, end, svtype) supple1 = reads_type3_right + reads_type1_right res = deal_type3(reads_type3_left, reads_type3_right, freq, insertSize, start, end, svtype, supple1=supple1) if res is False: return False, "The reads are not satisfied to modify, see more details in log file" modify_reads_type3, add_reads_type3, delete_reads_type3 = res delete_reads_type4 = deal_type4(reads_type4, freq, svtype) delete_reads_type5 = deal_type5(ref, reads_type5_left, reads_type5_right, freq, start, end, svtype) # modify, delete, add return modify_reads_type1 + modify_reads_type3, delete_reads_type2 + delete_reads_type4 + delete_reads_type5 + delete_reads_type3, []
def deal_cnv(bam, ref, svinfo, is_single, readLength, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end svtype = "cnv" if is_single: reads_type6_left, reads_type6_right, reads_type7, filtered_read_num = pos_type_classify(bam, chrom, start, end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type6_left) + len(reads_type6_right) + len(reads_type7) freq = freq * (type_reads_num + filtered_read_num) / type_reads_num if svinfo.cnv_type == "loss": delete_reads_type7 = deal_type7(reads_type7, freq, svtype) return [], delete_reads_type7, [] elif svinfo.cnv_type == "gain": add_reads_type7 = deal_type7(reads_type7, freq - 1, svtype) return [], [], add_reads_type7 else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num if svinfo.cnv_type == "loss": freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) delete_reads_type4 = deal_type4(reads_type4, freq, svtype, insertSize=insertSize, cnvType="loss") return [], delete_reads_type4, [] elif svinfo.cnv_type == "gain": freq = (freq - 1) * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) add_reads_type4 = deal_type4(reads_type4, freq, svtype, insertSize=insertSize, cnvType="gain") return [], [], add_reads_type4
def deal_translocation_chrom(bam, ref, svinfo, is_single, read_length, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end trans_chr, trans_start, trans_end = svinfo.trans_chrom, svinfo.trans_start, svinfo.trans_end trans_info = "%s:%s-%s" % (trans_chr, trans_start, trans_end) info = "%s:%s-%s" % (chrom, start, end) svtype = "trans_chrom" if is_single: pass else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, read_length, tempDir + ".info", center=False, maxsize=insertSize[1], minmapq=minmapq, is_multmapfilter=is_multmapfilter) record_reads(bam, tempDir, chrom + "_" + str(start) + "_" + str(end) + "_" + svtype, reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right) trans_reads_type1_left, trans_reads_type1_right, trans_reads_type2_left, trans_reads_type2_right, trans_reads_type3_left, trans_reads_type3_right, trans_reads_type4, trans_reads_type5_left, trans_reads_type5_right, filtered_read_num = pos_type_classify( bam, trans_chr, trans_start, trans_end, is_single, read_length, tempDir + ".trans", center=False, maxsize=insertSize[1], minmapq=minmapq, is_multmapfilter=is_multmapfilter) record_reads(bam, tempDir, chrom + "_" + str(start) + "_" + str(end) + "_" + svtype, trans_reads_type1_left, trans_reads_type1_right, trans_reads_type2_left, trans_reads_type2_right, trans_reads_type3_left, trans_reads_type3_right, trans_reads_type4, trans_reads_type5_left, trans_reads_type5_right) type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) if freq > 1: freq = 1 modified_reads = [] modify_reads_type1 = deal_type1(ref, reads_type1_left, reads_type1_right, freq, start, end, svtype, subPos=trans_info) trans_modify_reads_type1 = deal_type1(ref, trans_reads_type1_left, trans_reads_type1_right, freq, trans_start, trans_end, svtype, subPos=info) modified_reads.extend(modify_reads_type1 + trans_modify_reads_type1) modify_reads_type2 = deal_type2(ref, reads_type2_left, reads_type2_right, freq, start, end, svtype, subPos=trans_info) trans_modify_reads_type2 = deal_type2(ref, trans_reads_type2_left, trans_reads_type2_right, freq, trans_start, trans_end, svtype, subPos=info) modified_reads.extend(modify_reads_type2 + trans_modify_reads_type2) res = deal_type3(reads_type3_left, reads_type3_right, freq, insertSize, start, end, svtype, subPos=trans_info, supple1=trans_reads_type3_left, supple2=trans_reads_type3_right) res_trans = deal_type3(trans_reads_type3_left, trans_reads_type3_right, freq, insertSize, trans_start, trans_end, svtype, subPos=info, supple1=reads_type3_left, supple2=reads_type3_right) if res is False or res_trans is False: return False, "The reads are not satisfied to modify, see more details in log file" modify_reads_type3, add_reads_type3, delete_reads_type3 = res trans_modify_reads_type3, trans_add_reads_type3, trans_delete_reads_type3 = res_trans modified_reads.extend(modify_reads_type3 + trans_modify_reads_type3) modify_reads_type5 = deal_type5(ref, reads_type5_left, reads_type5_right, freq, start, end, svtype, subPos=trans_info) trans_modify_reads_type5 = deal_type5(ref, trans_reads_type5_left, trans_reads_type5_right, freq, trans_start, trans_end, svtype, subPos=info) modified_reads.extend(modify_reads_type5 + trans_modify_reads_type5) return modified_reads, [], []
def deal_duplication(bam, ref, svinfo, is_single, readLength, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end dup_num = svinfo.dup_num svtype = "dup" if is_single: reads_type6_left, reads_type6_right, reads_type7, filtered_read_num = pos_type_classify(bam, chrom, start, end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type6_left) + len(reads_type6_right) + len(reads_type7) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num + filtered_read_num) / type_reads_num if freq > 1: used_freq = 1 else: used_freq = freq add_reads_type6 = deal_type6(ref, reads_type6_left, reads_type6_left, used_freq, start, end, svtype) add_reads_type7 = deal_type7(reads_type7, freq, svtype) return [], [], add_reads_type6 + add_reads_type7 else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) record_reads(bam, tempDir, chrom + "_" + str(start) + "_" + str(end) + "_" + svtype, reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right) type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) print freq if freq > 1: freq = 1 freq_dup = (dup_num - 1) * freq freq_dup_part = freq_dup / 2.0 if freq_dup_part > 1: freq_dup_part = 1 if freq_dup > 1: used_freq = 1 else: used_freq = freq_dup # start_coverage = count_coverage(bam, chrom, start, start) # end_coverage = count_coverage(bam, chrom, end, end) add_reads = [] add_reads_type2 = deal_type2(ref, reads_type2_left, reads_type2_right, used_freq, start, end, svtype) add_reads.extend(add_reads_type2) # modify by fangshs 20180606 supple1 = reads_type3_right + reads_type2_right # type2 bug supple2 = reads_type3_left + reads_type1_left res = deal_type3(reads_type3_left, reads_type3_right, freq_dup, insertSize, start, end, svtype, supple1=supple1, supple2=supple2) if res is False: return False, "The reads are not satisfied to modify, see more details in log file" modify_reads_type3, add_reads_type3, delete_reads_type3 = res add_reads.extend(add_reads_type3) add_reads_type4 = deal_type4(reads_type4, freq_dup, svtype, insertSize=insertSize) add_reads.extend(add_reads_type4) add_reads_type5 = deal_type5(ref, reads_type5_left, reads_type5_right, freq_dup_part, start, end, svtype) add_reads.extend(add_reads_type5) return [], [], add_reads
def deal_translocation_unbalance(bam, ref, svinfo, is_single, readLength, tempDir, insertSize, minmapq, is_multmapfilter): freq = svinfo.freq chrom = svinfo.chrom start = svinfo.start end = svinfo.end trans_chr, trans_start, trans_end = svinfo.trans_chrom, svinfo.trans_start, svinfo.trans_end info = "%s:%s-%s" % (chrom, start, end) svtype = "trans_unbalance" if is_single: reads_type6_left, reads_type6_right, reads_type7, filtered_read_num = pos_type_classify(bam, chrom, start, end, is_single, readLength, tempDir) trans_reads_type6_left, trans_reads_type6_right, trans_reads_type7, filtered_read_num = pos_type_classify(bam, trans_chr, trans_start, trans_end, is_single, readLength, tempDir, minmapq=minmapq, is_multmapfilter=is_multmapfilter) type_reads_num = len(reads_type6_left) + len(reads_type6_right) + len(reads_type7) freq = freq * (type_reads_num + filtered_read_num) / type_reads_num if freq > 1: freq = 1 modify_reads_type6 = deal_type6(ref, trans_reads_type6_left, trans_reads_type6_left, freq, trans_start, trans_end, svtype, subPos=info) add_reads_type7 = deal_type7(reads_type7, freq, svtype) return modify_reads_type6, [], add_reads_type7 else: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_read_num = pos_type_classify( bam, chrom, start, end, is_single, readLength, tempDir) trans_reads_type1_left, trans_reads_type1_right, trans_reads_type2_left, trans_reads_type2_right, trans_reads_type3_left, trans_reads_type3_right, trans_reads_type4, trans_reads_type5_left, trans_reads_type5_right, filtered_read_num = pos_type_classify( bam, trans_chr, trans_start, trans_end, is_single, readLength, tempDir, extension=insertSize[1], minmapq=minmapq, is_multmapfilter=is_multmapfilter) # exit() type_reads_num = len(reads_type1_left) + len(reads_type1_right) + len(reads_type2_left) + len( reads_type2_right) + len(reads_type3_left) + len(reads_type3_right) + len(reads_type4) + len( reads_type5_left) + len(reads_type5_right) if type_reads_num == 0: return False, "All reads is filtered in this scope, total & filtered reads num %s" % filtered_read_num freq = freq * (type_reads_num * 2 + filtered_read_num) / (type_reads_num * 2) modify_reads_type1 = deal_type1(ref, trans_reads_type1_left, [], freq, trans_start, trans_end, svtype, subPos=info) modify_reads_type2 = deal_type2(ref, [], trans_reads_type2_left, freq, trans_start, trans_end, svtype, subPos=info) res = deal_type3(trans_reads_type3_left, trans_reads_type3_left, freq, insertSize, trans_start, trans_end, svtype, supple1=reads_type3_left, supple2=reads_type3_right, subPos=info) if res is False: return False, "The reads are not satisfied to modify, see more details in log file" modify_reads_type3, add_reads_type3, delete_reads_type3 = res add_reads_type4 = deal_type4(reads_type4, freq, svtype, insertSize=insertSize) modify_reads_type5 = deal_type5(ref, trans_reads_type5_left, trans_reads_type5_left, freq, trans_start, trans_end, svtype, subPos=info) return modify_reads_type1 + modify_reads_type2 + modify_reads_type3 + modify_reads_type5, [], add_reads_type4