def getWindowTree(selectRes): if len(selectRes[0]) == 3: start, end, score = selectRes[0] tree = IntervalNode(start, end, other=score) # build an interval tree from the rest of the data for start, end, score in selectRes[1:]: tree = tree.insert(start, end, other=score) else: start, end = selectRes[0] tree = IntervalNode(start, end, other=(end - start + 1)) # build an interval tree from the rest of the data for start, end in selectRes[1:]: # use size as the 3rd column tree = tree.insert(start, end, other=(end - start + 1)) return tree
def setUp(self): iv = IntervalNode(Interval(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = Interval(i, i + 9) iv = iv.insert(f) self.intervals = iv
def processTarget(oId, submatches, primaries, tfotargets, annotations, parameters): ''' process one target region for all targets and their offtargets''' output = "" query = submatches[oId] # list of submatches of the same id # quit if this primary target cannot be related to a primary target region if (not primaries.has_key(oId)): print >> sys.stderr, "[ERROR] submatch does not have a corresponding primary target : %s" % ( oId) exit(1) # create an empty root intersect_tree = IntervalNode(-1, -1, -1) # accumulate off-target data offtarget_data = [] if (tfotargets.has_key(oId)): # put all off-targets into an interval tree: offtarget_data = tfotargets[oId] # build an interval tree from the rest of the data for tfo_region in offtarget_data.keys(): start, end = tfo_region intersect_tree = intersect_tree.insert(start, end) # query all submatches output = "" for q in query: output += processSubTarget(q, oId, intersect_tree, offtarget_data) return output
def setUp(self): iv = IntervalNode(Interval(1, 2)) self.max = 1000000 for i in range(0, self.max, 10): f = Interval(i, i) iv = iv.insert(f) for i in range(6000): iv = iv.insert(Interval(0, 1)) self.intervals = iv
def setUp(self): intervals = [] for i in range(11, 20000, 15): for zz in range(random.randint(2, 5)): m = random.randint(1, 10) p = random.randint(1, 10) intervals.append(Interval(i - m, i + p)) iv = IntervalNode(intervals[0]) for f in intervals[1:]: iv = iv.insert(f) self.intervals = intervals self.tree = iv
def __init__(self, fileName): self.N = 0 self.headline = [] self.tree = {} if not os.path.exists(fileName): return fh = open(fileName, 'rt') a = -1 for lines in fh: line = lines.strip() if line[0] in ['#', '@']: self.headline.append(line) continue IL1 = line.split('\t') self.N = self.N + 1 chrom = IL1[0] pstart = IL1[1] pend = IL1[2] p1 = int(pstart) p2 = int(pend) #chrom=chrom.replace("MT","M") #self.IL.append([chrom,p1,p2]) start, end = p1 - 1, p2 + 1 if not (chrom == a): if not (a == -1): self.tree[a] = tree1 if (verbose): print('scan %s line %d chrom %s' % (fileName, self.N, a)) a = chrom tree1 = IntervalNode(start, end) else: # build an interval tree from the rest of the data tree1 = tree1.insert(start, end) self.tree[a] = tree1 fh.close() print "Loaded IntervalList:\t" + fileName print "events:\t" + str(self.N)
f_file = dirOutPerCategory + prefix + "." + chr + ".genomicFeature" outfile = open(f_file, 'w') outFile[chr] = open(f_file, 'w') outFile[chr].write( 'readName,chr,category, geneID, geneName, flag_multiMapped\n') #DATA STRUCTURE - per chr tree_utr3 = {} tree_utr5 = {} tree_cds = {} tree_geneCoordinates = {} tree_rRNA = {} tree_intergenic = {} # +10,000 for chr in chr_list: tree_utr3[chr] = IntervalNode(0, 0) tree_utr5[chr] = IntervalNode(0, 0) tree_cds[chr] = IntervalNode(0, 0) tree_geneCoordinates[chr] = IntervalNode(0, 0) tree_rRNA[chr] = IntervalNode(0, 0) tree_intergenic[chr] = IntervalNode(0, 0) print("Load gene annotations ...") geneUTR3 = {} #UTR3 print("Load", utr3_file) with open(utr3_file, 'r') as f: reader = csv.reader(f)
hi = lo + randint(1, SPAN) return (lo, hi) def find(start, end, tree): "Returns a list with the overlapping intervals" out = [] tree.intersect(start, end, lambda x: out.append(x)) return [(x.start, x.end) for x in out] # use this to force both examples to generate the same data seed(10) # generate 10 thousand random intervals data = map(generate, xrange(N)) # generate the intervals to query over query = map(generate, xrange(10)) # start the root at the first element start, end = data[0] tree = IntervalNode(start, end) # build an interval tree from the rest of the data for start, end in data[1:]: tree = tree.insert(start, end) for start, end in query: overlap = find(start, end, tree) print '(%s, %s) -> %s' % (start, end, overlap)
def build_interval_tree(intervals): # root = IntervalNode(intervals[0].start, intervals[0].end, # other=intervals[0]) root = IntervalNode(intervals[0]) return reduce(lambda tree, x: tree.insert(x), intervals[1:], root)