peCount += call['DV'] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO['END'] invInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1} padding = 0 if record.INFO['SVTYPE'] == "DEL": padding = spacerLength for s2, e2 in sv[record.CHROM].overlap((max(s1 - padding, 0), e1 + padding)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: if inv3to3['id'] == record.ID: continue spacer = min(abs(e1-s2), abs(e2-s1)) (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) cc = carrierConcordance(nonRefHap, inv3to3['nonRefHap']) delLength = min(e1-s1, e2-s2) if record.INFO['SVTYPE'] == "INV": delLength = min(max(s1,s2)-min(s1,s2), max(e1,e2)-min(e1,e2)) if (spacer <= spacerLength) and (cc >= minCarrierConcordance) and (min(spacer, delLength) >= deletionLength) and (recO <= maxReciprocalOverlap): svControlID1 = re.sub(r"^[A-Z0]*","", record.ID) rdRatio1 = 1.0 if int(svControlID1) in control.keys(): rdRatio1 = altRefReadDepthRatio(rc, control[int(svControlID1)], hap) svControlID2 = re.sub(r"^[A-Z0]*","", inv3to3['id']) rdRatio2 = 1.0 if int(svControlID2) in control.keys(): rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap']) print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2) if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)): score = float(min(peCount, inv3to3['pe'])) * float(cc)
if (call.called) and (call.sample in sampleSet): rc[call.sample] = call['RC'] if (call.gt_type != 0) and (call['DV'] > 0): nonRefHap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')] peCount += call['DV'] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO['END'] dupInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1} for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for dup5to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2-s1), abs(e2-e1)) maxBpOffset = max(abs(s2-s1), abs(e2-e1)) cc = carrierConcordance(nonRefHap, dup5to3['hap']) if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (maxBpOffset > minDuplicationLength) and (cc >= minCarrierConcordance): rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, dup5to3['hap']), (rc, dup5to3['rc'])) if validRdRatio(recO/nestedO, rdRatio, args.readDepth)[0]: score = float(min(peCount, dup5to3['pe'])) * float(cc) if score > dupInfo['score']: dupInfo = {'id': dup5to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score} if dupInfo['score'] >= 0: if not dupRegion.has_key(record.CHROM): dupRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node((record.ID, dupInfo['id'])) G.node[(record.ID, dupInfo['id'])]['Score'] = dupInfo['score'] for dupIStart, dupIEnd in dupRegion[record.CHROM].overlap((dupInfo['start'], dupInfo['end'])): (id1, id2) = dupRegion[record.CHROM][(dupIStart, dupIEnd)] if overlapValid((dupInfo['start'], dupInfo['end']), (dupIStart, dupIEnd), 0.1, 10000): G.add_edge((record.ID, dupInfo['id']), (id1, id2))
int(gVal) for gVal in call['GT'].split('/') ] peCount += call['DV'] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO['END'] dupInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1} for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for dup5to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2 - s1), abs(e2 - e1)) maxBpOffset = max(abs(s2 - s1), abs(e2 - e1)) cc = carrierConcordance(nonRefHap, dup5to3['hap']) if (nestedO >= minNestedOverlap) and ( minBpOffset < maxInsertionOffset) and ( maxBpOffset > minDuplicationLength) and ( cc >= minCarrierConcordance): rdRatio = rdAltRefRatio( ((s1, e1), (s2, e2)), (nonRefHap, dup5to3['hap']), (rc, dup5to3['rc'])) if validRdRatio(recO / nestedO, rdRatio, args.readDepth)[0]: score = float(min(peCount, dup5to3['pe'])) * float(cc) if score > dupInfo['score']: dupInfo = { 'id': dup5to3['id'],
support += call["RV"] else: if call["DV"] >= 2: nonRefHap[call.sample] = [int(gVal) for gVal in call["GT"].split("/")] support += call["DV"] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO["END"] invInfo = {"id": "NA", "start": 0, "end": 0, "score": -1} for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2 - s1), abs(e2 - e1)) maxBpOffset = max(abs(s2 - s1), abs(e2 - e1)) cc = carrierConcordance(nonRefHap, inv3to3["hap"]) if ( (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (cc >= minCarrierConcordance) ): rdRatio = rdAltRefRatio( ((s1, e1), (s2, e2)), (nonRefHap, inv3to3["hap"]), (rc, inv3to3["rc"]) ) valid, updSVType = validRdRatio(recO / nestedO, rdRatio) if valid: if (record.INFO["SVTYPE"] != "INV") and (updSVType != "DUP"): continue if (updSVType != "DUP") or (maxBpOffset > minDuplicationLength): score = float(min(support, inv3to3["sup"])) * float(cc) if score > invInfo["score"]: