Пример #1
0
def read_next(inf,strata):
  while True:
    v = inf.readline()
    if not v: return False
    num = int(v.rstrip().split("\t")[3])
    if num < strata: continue
    arr = v.split("\t")
    res = Bed(arr[0],int(arr[1]),int(arr[2]))
    res.set_payload(int(num))
    return res
Пример #2
0
def get_overlap(fileA,fileB,min_A,min_B):
  infA = do_open(fileA)
  infB = do_open(fileB)
  bufA = read_next(infA,min_A)
  bufB = read_next(infB,min_B)
  tot = 0
  sizeA = 0
  sizeB = 0
  if bufA:
    sizeA = bufA.length()
  if bufB:
    sizeB = bufB.length()
  zA = 1
  zB = 1
  while True:
    #if (zA%10000 ==0 or zB%10000==0): sys.stderr.write(str(zA)+" "+str(zB)+"  \r")
    if not bufA or not bufB: break
    c = bufA.cmp(bufB)
    if c == 0:
      tot += bufA.overlap_size(bufB)
      saveA = bufA
      nA = bufA.subtract(bufB)
      if len(nA) > 0 and nA[-1].end == bufA.end:
        num = bufA.get_payload()
        bufA = Bed(nA[-1].chr,nA[-1].start-1,nA[-1].end)
        bufA.set_payload(num)
      else:
        bufA = read_next(infA,min_A)
        if bufA:
          sizeA += bufA.length()
        zA+=1

      nB = bufB.subtract(saveA)
      if len(nB) > 0 and nB[-1].end == bufB.end:
        num = bufB.get_payload()
        bufB = Bed(nB[-1].chr,nB[-1].start-1,nB[-1].end)
        bufB.set_payload(num)
      else:
        bufB = read_next(infB,min_B)
        if bufB:
          sizeB += bufB.length()
        zB+=1

    elif c == -1:
      bufA = read_next(infA,min_A)
      if bufA:
        sizeA += bufA.length()
      zA += 1
    else:
      bufB = read_next(infB,min_B)
      if bufB:
        sizeB += bufB.length()
      zB += 1
  #sys.stderr.write("\n")
  if bufA:
    while True:
      bufA = read_next(infA,min_A)
      if bufA: sizeA += bufA.length()
      else: break
  if bufB:
    while True:
      bufB = read_next(infB,min_B)
      if bufB: sizeB += bufB.length()
      else: break
  infA.close()
  infB.close()
  return [min_A,min_B,sizeA,sizeB,tot]