Exemplo n.º 1
0
def turnSegmentGWRIntoRBDict(gwr, extend_dist=20000, min_reciprocal_overlap=0.6, report=True):
	"""
	2010-3-17
		extend_dist is used to enlarge the segments in each data_obj of gwr,
	"""
	sys.stderr.write("Turning a segment-gwr (start-stop style) into an RBDict ...")
	from RBTree import RBDict	# 2010-1-26 RBDict is more efficiency than binary_tree.
	rbDict = RBDict(cmpfn=leftWithinRightAlsoEqualCmp)
	for data_obj in gwr.data_obj_ls:
		start = max(data_obj.position-extend_dist, 0)
		stop = data_obj.stop_position+extend_dist
		segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=data_obj.chromosome, span_ls=[start, stop], \
													min_reciprocal_overlap=min_reciprocal_overlap)
		rbDict[segmentKey] = data_obj
	if report:
		print "\tDepth of rbDict: %d" % (rbDict.depth())
		print "\tOptimum Depth: %f (%d) (%f%% depth efficiency)" % (rbDict.optimumdepth(), math.ceil(rbDict.optimumdepth()),
															  math.ceil(rbDict.optimumdepth()) / rbDict.depth())		
	sys.stderr.write("%s objects converted.\n"%len(rbDict))
	return rbDict
Exemplo n.º 2
0
	

# test program if this file is run
if __name__ == "__main__":
	import os, sys, math
	#import pdb
	#pdb.set_trace()
	
	cnv_ls = [[1, (2323,2600)], [2,(50000,)], [3,(43214,78788)], [5,(150,500)], [5,(500,950)], [5, (43241, 43242)]]
	no_of_cnvs = len(cnv_ls)
	min_reciprocal_overlap = 0.6
	
	#from BinarySearchTree import binary_tree
	#tree = binary_tree()
	from RBTree import RBDict	#2010-1-26 binary_tree and RBDict are swappable. but RBDict is more efficient (balanced).
	tree = RBDict(cmpfn=leftWithinRightAlsoEqualCmp)	# 2010-1-28 use the custom cmpfn if you want the case that left within right is regarded as equal as well.  
	
	for cnv in cnv_ls:
		segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=cnv[0], span_ls=cnv[1], min_reciprocal_overlap=min_reciprocal_overlap)
		tree[segmentKey] = cnv
	
	print "Binary Tree Test\n"
	print "Node Count: %d" % len(tree)
	print "Depth: %d" % tree.depth()
	print "Optimum Depth: %f (%d) (%f%% depth efficiency)" % (tree.optimumdepth(), math.ceil(tree.optimumdepth()),
															  math.ceil(tree.optimumdepth()) / tree.depth())
	
	print "Efficiency: %f%% (total possible used: %d, total wasted: %d): " % (tree.efficiency() * 100,
																			  len(tree) / tree.efficiency(),
																			  (len(tree) / tree.efficiency()) - len(tree))
	"""