def turnSegmentGWRIntoRBDict(gwr, extend_dist=20000, min_reciprocal_overlap=0.6, report=True): """ 2010-3-17 extend_dist is used to enlarge the segments in each data_obj of gwr, """ sys.stderr.write("Turning a segment-gwr (start-stop style) into an RBDict ...") from RBTree import RBDict # 2010-1-26 RBDict is more efficiency than binary_tree. rbDict = RBDict(cmpfn=leftWithinRightAlsoEqualCmp) for data_obj in gwr.data_obj_ls: start = max(data_obj.position-extend_dist, 0) stop = data_obj.stop_position+extend_dist segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=data_obj.chromosome, span_ls=[start, stop], \ min_reciprocal_overlap=min_reciprocal_overlap) rbDict[segmentKey] = data_obj if report: print "\tDepth of rbDict: %d" % (rbDict.depth()) print "\tOptimum Depth: %f (%d) (%f%% depth efficiency)" % (rbDict.optimumdepth(), math.ceil(rbDict.optimumdepth()), math.ceil(rbDict.optimumdepth()) / rbDict.depth()) sys.stderr.write("%s objects converted.\n"%len(rbDict)) return rbDict
no_of_cnvs = len(cnv_ls) min_reciprocal_overlap = 0.6 #from BinarySearchTree import binary_tree #tree = binary_tree() from RBTree import RBDict #2010-1-26 binary_tree and RBDict are swappable. but RBDict is more efficient (balanced). tree = RBDict(cmpfn=leftWithinRightAlsoEqualCmp) # 2010-1-28 use the custom cmpfn if you want the case that left within right is regarded as equal as well. for cnv in cnv_ls: segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=cnv[0], span_ls=cnv[1], min_reciprocal_overlap=min_reciprocal_overlap) tree[segmentKey] = cnv print "Binary Tree Test\n" print "Node Count: %d" % len(tree) print "Depth: %d" % tree.depth() print "Optimum Depth: %f (%d) (%f%% depth efficiency)" % (tree.optimumdepth(), math.ceil(tree.optimumdepth()), math.ceil(tree.optimumdepth()) / tree.depth()) print "Efficiency: %f%% (total possible used: %d, total wasted: %d): " % (tree.efficiency() * 100, len(tree) / tree.efficiency(), (len(tree) / tree.efficiency()) - len(tree)) """ print "Min: %s" % repr(tree.min()) print "Max: %s" % repr(tree.max()) print "List of Layers:\n\t" + repr(tree.listlayers()) + "\n" print "\"Recursive\" List:\n\t" + repr(tree.listrecursive()) + "\n" print "List of Keys:\n\t" + repr(tree.listkeys()) + "\n" print "List of Data:\n\t" + repr(tree.listdata()) + "\n" print "List of Nodes:\n\t" + repr(tree.listnodes()) + "\n" print "Dictionary:\n\t" + repr(tree.dict()) + "\n"