def read_next(inf,strata): while True: v = inf.readline() if not v: return False num = int(v.rstrip().split("\t")[3]) if num < strata: continue arr = v.split("\t") res = Bed(arr[0],int(arr[1]),int(arr[2])) res.set_payload(int(num)) return res
def get_overlap(fileA,fileB,min_A,min_B): infA = do_open(fileA) infB = do_open(fileB) bufA = read_next(infA,min_A) bufB = read_next(infB,min_B) tot = 0 sizeA = 0 sizeB = 0 if bufA: sizeA = bufA.length() if bufB: sizeB = bufB.length() zA = 1 zB = 1 while True: #if (zA%10000 ==0 or zB%10000==0): sys.stderr.write(str(zA)+" "+str(zB)+" \r") if not bufA or not bufB: break c = bufA.cmp(bufB) if c == 0: tot += bufA.overlap_size(bufB) saveA = bufA nA = bufA.subtract(bufB) if len(nA) > 0 and nA[-1].end == bufA.end: num = bufA.get_payload() bufA = Bed(nA[-1].chr,nA[-1].start-1,nA[-1].end) bufA.set_payload(num) else: bufA = read_next(infA,min_A) if bufA: sizeA += bufA.length() zA+=1 nB = bufB.subtract(saveA) if len(nB) > 0 and nB[-1].end == bufB.end: num = bufB.get_payload() bufB = Bed(nB[-1].chr,nB[-1].start-1,nB[-1].end) bufB.set_payload(num) else: bufB = read_next(infB,min_B) if bufB: sizeB += bufB.length() zB+=1 elif c == -1: bufA = read_next(infA,min_A) if bufA: sizeA += bufA.length() zA += 1 else: bufB = read_next(infB,min_B) if bufB: sizeB += bufB.length() zB += 1 #sys.stderr.write("\n") if bufA: while True: bufA = read_next(infA,min_A) if bufA: sizeA += bufA.length() else: break if bufB: while True: bufB = read_next(infB,min_B) if bufB: sizeB += bufB.length() else: break infA.close() infB.close() return [min_A,min_B,sizeA,sizeB,tot]