示例#1
0
def turnSegmentGWRIntoRBDict(gwr, extend_dist=20000, min_reciprocal_overlap=0.6, report=True):
	"""
	2010-3-17
		extend_dist is used to enlarge the segments in each data_obj of gwr,
	"""
	sys.stderr.write("Turning a segment-gwr (start-stop style) into an RBDict ...")
	from RBTree import RBDict	# 2010-1-26 RBDict is more efficiency than binary_tree.
	rbDict = RBDict(cmpfn=leftWithinRightAlsoEqualCmp)
	for data_obj in gwr.data_obj_ls:
		start = max(data_obj.position-extend_dist, 0)
		stop = data_obj.stop_position+extend_dist
		segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=data_obj.chromosome, span_ls=[start, stop], \
													min_reciprocal_overlap=min_reciprocal_overlap)
		rbDict[segmentKey] = data_obj
	if report:
		print "\tDepth of rbDict: %d" % (rbDict.depth())
		print "\tOptimum Depth: %f (%d) (%f%% depth efficiency)" % (rbDict.optimumdepth(), math.ceil(rbDict.optimumdepth()),
															  math.ceil(rbDict.optimumdepth()) / rbDict.depth())		
	sys.stderr.write("%s objects converted.\n"%len(rbDict))
	return rbDict
示例#2
0
	no_of_cnvs = len(cnv_ls)
	min_reciprocal_overlap = 0.6
	
	#from BinarySearchTree import binary_tree
	#tree = binary_tree()
	from RBTree import RBDict	#2010-1-26 binary_tree and RBDict are swappable. but RBDict is more efficient (balanced).
	tree = RBDict(cmpfn=leftWithinRightAlsoEqualCmp)	# 2010-1-28 use the custom cmpfn if you want the case that left within right is regarded as equal as well.  
	
	for cnv in cnv_ls:
		segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=cnv[0], span_ls=cnv[1], min_reciprocal_overlap=min_reciprocal_overlap)
		tree[segmentKey] = cnv
	
	print "Binary Tree Test\n"
	print "Node Count: %d" % len(tree)
	print "Depth: %d" % tree.depth()
	print "Optimum Depth: %f (%d) (%f%% depth efficiency)" % (tree.optimumdepth(), math.ceil(tree.optimumdepth()),
															  math.ceil(tree.optimumdepth()) / tree.depth())
	
	print "Efficiency: %f%% (total possible used: %d, total wasted: %d): " % (tree.efficiency() * 100,
																			  len(tree) / tree.efficiency(),
																			  (len(tree) / tree.efficiency()) - len(tree))
	"""
	print "Min: %s" % repr(tree.min())
	print "Max: %s" % repr(tree.max())
	
	print "List of Layers:\n\t" + repr(tree.listlayers()) + "\n"
	print "\"Recursive\" List:\n\t" + repr(tree.listrecursive()) + "\n"
	print "List of Keys:\n\t" + repr(tree.listkeys()) + "\n"
	print "List of Data:\n\t" + repr(tree.listdata()) + "\n"
	print "List of Nodes:\n\t" + repr(tree.listnodes()) + "\n"
	print "Dictionary:\n\t" + repr(tree.dict()) + "\n"