Пример #1
0
     for dup5to3 in svDups[(record.CHROM, s2,
                            e2)] + [sv[record.CHROM][(s2, e2)]]:
         (recO, nestedO, recUnion, bpOffset,
          oLen) = overlapMetrics((s1, e1), (s2, e2))
         minBpOffset = min(abs(s2 - s1), abs(e2 - e1))
         maxBpOffset = max(abs(s2 - s1), abs(e2 - e1))
         cc = carrierConcordance(nonRefHap, dup5to3['hap'])
         if (nestedO >= minNestedOverlap) and (
                 minBpOffset < maxInsertionOffset) and (
                     maxBpOffset > minDuplicationLength) and (
                         cc >= minCarrierConcordance):
             rdRatio = rdAltRefRatio(
                 ((s1, e1), (s2, e2)),
                 (nonRefHap, dup5to3['hap']),
                 (rc, dup5to3['rc']))
             if validRdRatio(recO / nestedO, rdRatio,
                             args.readDepth)[0]:
                 score = float(min(peCount,
                                   dup5to3['pe'])) * float(cc)
                 if score > dupInfo['score']:
                     dupInfo = {
                         'id': dup5to3['id'],
                         'start': min(s1, s2),
                         'end': max(e1, e2),
                         'score': score
                     }
 if dupInfo['score'] >= 0:
     if not dupRegion.has_key(record.CHROM):
         dupRegion[record.CHROM] = banyan.SortedDict(
             key_type=(int, int),
             alg=banyan.RED_BLACK_TREE,
             updator=banyan.OverlappingIntervalsUpdator)
Пример #2
0
                     nonRefHap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')]
                     support += call['DV']
 if len(nonRefHap):
     # Collect overlapping calls
     s1 = record.POS
     e1 = record.INFO['END']
     invInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1}
     for s2, e2 in sv[record.CHROM].overlap((s1, e1)):
         for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]:
             (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2))
             minBpOffset = min(abs(s2-s1), abs(e2-e1))
             maxBpOffset = max(abs(s2-s1), abs(e2-e1))
             cc = carrierConcordance(nonRefHap, inv3to3['hap'])
             if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (cc >= minCarrierConcordance):
                 rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, inv3to3['hap']), (rc, inv3to3['rc']))
                 valid, updSVType = validRdRatio(recO/nestedO, rdRatio)
                 if valid:
                     if (record.INFO['SVTYPE'] != 'INV') and (updSVType != "DUP"):
                         continue
                     if (updSVType != 'DUP') or (maxBpOffset > minDuplicationLength):
                         score = float(min(support, inv3to3['sup'])) * float(cc)
                         if score > invInfo['score']:
                             invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
     if invInfo['score'] >= 0:
         if not invRegion.has_key(record.CHROM):
             invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
         G.add_node((record.ID, invInfo['id']))
         G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
         for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
             (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)]
             if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000):
Пример #3
0
                    if (call.gt_type != 0) and (call['DV'] > 0):
                        nonRefHap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')]
                        peCount += call['DV']
            if len(nonRefHap):
                # Collect overlapping calls
                s1 = record.POS
                e1 = record.INFO['END']
                invInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1}
                for s2, e2 in sv[record.CHROM].overlap((s1, e1)):
                    for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]:
                        (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2))
                        minBpOffset = min(abs(s2-s1), abs(e2-e1))
                        cc = carrierConcordance(nonRefHap, inv3to3['hap'])
                        if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (cc >= minCarrierConcordance):
                            rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, inv3to3['hap']), (rc, inv3to3['rc']))
                            if validRdRatio(recO/nestedO, rdRatio, args.readDepth)[0]:
                                score = float(min(peCount, inv3to3['pe'])) * float(cc)
                                if score > invInfo['score']:
                                    invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
                if invInfo['score'] >= 0:
                    if not invRegion.has_key(record.CHROM):
                        invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node((record.ID, invInfo['id']))
                    G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
                    for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
                        (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)]
                        if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000):
                            G.add_edge((record.ID, invInfo['id']), (id1, id2))
                    invRegion[record.CHROM][(invInfo['start'], invInfo['end'])] = (record.ID, invInfo['id'])

# Pick best pair of inversions out of all overlapping calls
Пример #4
0
 for s2, e2 in sv[record.CHROM].overlap((s1, e1)):
     for inv3to3 in svDups[(record.CHROM, s2,
                            e2)] + [sv[record.CHROM][(s2, e2)]]:
         (recO, nestedO, recUnion, bpOffset,
          oLen) = overlapMetrics((s1, e1), (s2, e2))
         minBpOffset = min(abs(s2 - s1), abs(e2 - e1))
         maxBpOffset = max(abs(s2 - s1), abs(e2 - e1))
         cc = carrierConcordance(nonRefHap, inv3to3['hap'])
         if (nestedO >= minNestedOverlap) and (
                 minBpOffset < maxInsertionOffset) and (
                     cc >= minCarrierConcordance):
             rdRatio = rdAltRefRatio(
                 ((s1, e1), (s2, e2)),
                 (nonRefHap, inv3to3['hap']),
                 (rc, inv3to3['rc']))
             valid, updSVType = validRdRatio(
                 recO / nestedO, rdRatio)
             if valid:
                 if (record.INFO['SVTYPE'] !=
                         'INV') and (updSVType != "DUP"):
                     continue
                 if (updSVType != 'DUP') or (
                         maxBpOffset > minDuplicationLength):
                     score = float(min(
                         support, inv3to3['sup'])) * float(cc)
                     if score > invInfo['score']:
                         invInfo = {
                             'id': inv3to3['id'],
                             'start': min(s1, s2),
                             'end': max(e1, e2),
                             'score': score
                         }