Exemplo n.º 1
0
def getWindowTree(selectRes):
    if len(selectRes[0]) == 3:
        start, end, score = selectRes[0]
        tree = IntervalNode(start, end, other=score)
        # build an interval tree from the rest of the data
        for start, end, score in selectRes[1:]:
            tree = tree.insert(start, end, other=score)
    else:
        start, end = selectRes[0]
        tree = IntervalNode(start, end, other=(end - start + 1))
        # build an interval tree from the rest of the data
        for start, end in selectRes[1:]:
            # use size as the 3rd column
            tree = tree.insert(start, end, other=(end - start + 1))
    return tree
Exemplo n.º 2
0
 def setUp(self):
     iv = IntervalNode(Interval(50, 59))
     for i in range(0, 110, 10):
         if i == 50: continue
         f = Interval(i, i + 9)
         iv = iv.insert(f)
     self.intervals = iv
Exemplo n.º 3
0
def processTarget(oId, submatches, primaries, tfotargets, annotations,
                  parameters):
    ''' process one target region for all targets and their offtargets'''

    output = ""
    query = submatches[oId]  # list of submatches of the same id
    # quit if this primary target cannot be related to a primary target region
    if (not primaries.has_key(oId)):
        print >> sys.stderr, "[ERROR] submatch does not have a corresponding primary target : %s" % (
            oId)
        exit(1)

    # create an empty root
    intersect_tree = IntervalNode(-1, -1, -1)
    # accumulate off-target data
    offtarget_data = []
    if (tfotargets.has_key(oId)):
        # put all off-targets into an interval tree:
        offtarget_data = tfotargets[oId]

    # build an interval tree from the rest of the data
    for tfo_region in offtarget_data.keys():
        start, end = tfo_region
        intersect_tree = intersect_tree.insert(start, end)

    # query all submatches
    output = ""
    for q in query:
        output += processSubTarget(q, oId, intersect_tree, offtarget_data)

    return output
Exemplo n.º 4
0
    def setUp(self):
        iv = IntervalNode(Interval(1, 2))
        self.max = 1000000
        for i in range(0, self.max, 10):
            f = Interval(i, i)
            iv = iv.insert(f)

        for i in range(6000):
            iv = iv.insert(Interval(0, 1))
        self.intervals = iv
Exemplo n.º 5
0
    def setUp(self):
        intervals = []
        for i in range(11, 20000, 15):
            for zz in range(random.randint(2, 5)):
                m = random.randint(1, 10)
                p = random.randint(1, 10)
                intervals.append(Interval(i - m, i + p))
        iv = IntervalNode(intervals[0])
        for f in intervals[1:]:
            iv = iv.insert(f)

        self.intervals = intervals
        self.tree = iv
    def __init__(self, fileName):

        self.N = 0
        self.headline = []
        self.tree = {}

        if not os.path.exists(fileName):
            return

        fh = open(fileName, 'rt')
        a = -1
        for lines in fh:
            line = lines.strip()
            if line[0] in ['#', '@']:
                self.headline.append(line)
                continue

            IL1 = line.split('\t')

            self.N = self.N + 1

            chrom = IL1[0]
            pstart = IL1[1]
            pend = IL1[2]

            p1 = int(pstart)
            p2 = int(pend)
            #chrom=chrom.replace("MT","M")
            #self.IL.append([chrom,p1,p2])

            start, end = p1 - 1, p2 + 1
            if not (chrom == a):
                if not (a == -1):
                    self.tree[a] = tree1
                    if (verbose):
                        print('scan %s  line %d chrom %s' %
                              (fileName, self.N, a))
                a = chrom
                tree1 = IntervalNode(start, end)
            else:  # build an interval tree from the rest of the data
                tree1 = tree1.insert(start, end)

        self.tree[a] = tree1
        fh.close()
        print "Loaded IntervalList:\t" + fileName
        print "events:\t" + str(self.N)
        f_file = dirOutPerCategory + prefix + "." + chr + ".genomicFeature"
        outfile = open(f_file, 'w')
        outFile[chr] = open(f_file, 'w')
        outFile[chr].write(
            'readName,chr,category, geneID, geneName, flag_multiMapped\n')

#DATA STRUCTURE - per chr
tree_utr3 = {}
tree_utr5 = {}
tree_cds = {}
tree_geneCoordinates = {}
tree_rRNA = {}
tree_intergenic = {}  # +10,000

for chr in chr_list:
    tree_utr3[chr] = IntervalNode(0, 0)
    tree_utr5[chr] = IntervalNode(0, 0)
    tree_cds[chr] = IntervalNode(0, 0)
    tree_geneCoordinates[chr] = IntervalNode(0, 0)
    tree_rRNA[chr] = IntervalNode(0, 0)
    tree_intergenic[chr] = IntervalNode(0, 0)

print("Load gene annotations ...")

geneUTR3 = {}

#UTR3
print("Load", utr3_file)
with open(utr3_file, 'r') as f:

    reader = csv.reader(f)
Exemplo n.º 8
0
    hi = lo + randint(1, SPAN)
    return (lo, hi)


def find(start, end, tree):
    "Returns a list with the overlapping intervals"
    out = []
    tree.intersect(start, end, lambda x: out.append(x))
    return [(x.start, x.end) for x in out]


# use this to force both examples to generate the same data
seed(10)

# generate 10 thousand random intervals
data = map(generate, xrange(N))

# generate the intervals to query over
query = map(generate, xrange(10))

# start the root at the first element
start, end = data[0]
tree = IntervalNode(start, end)

# build an interval tree from the rest of the data
for start, end in data[1:]:
    tree = tree.insert(start, end)

for start, end in query:
    overlap = find(start, end, tree)
    print '(%s, %s) -> %s' % (start, end, overlap)
def build_interval_tree(intervals):
    # root = IntervalNode(intervals[0].start, intervals[0].end,
    #                     other=intervals[0])
    root = IntervalNode(intervals[0])
    return reduce(lambda tree, x: tree.insert(x), intervals[1:], root)