示例#1
0
def label_sets_for_nodes(cursor):
	"""
	Returns <sets_for_nodes> id --> ncRNA family (can be None)
	"""
	c2.CONN_FUNC = CONN_FUNC
	nodes_label = {}
	cursor.execute("SELECT i FROM sets_for_nodes")
	for r in cursor.fetchall():
		print >> sys.stderr, "labeling for node....", r[0]
		ncRNA_id, ncRNA_fam = c2.check_hit(r[0])
		nodes_label[r[0]] = ncRNA_fam
	return nodes_label
示例#2
0
def graph_ncRNA_connectivity(clique_pickle=None):
	"""
	Looks at table:parsed, and for each ncRNA of family F,
	calculate the ratio of connected F members/Nones,
	and do this for both immediately connected nodes
	and path-2 nodes.
	"""
	c2.CONN_FUNC = CONN_FUNC
	ncRNA_map = defaultdict(lambda: []) # fam --> list of <i>s
	dum = 0
	with CONN_FUNC() as cursor:
		cursor.execute("SELECT i FROM sets_for_nodes")
		for r in cursor.fetchall():
			print >> sys.stderr, "i is....", r[0]
			id,fam = c2.check_hit(r[0])
			if id is not None:
				ncRNA_map[fam].append(id)
				dum += 1
			if dum > 100: break
		for fam,list_of_i in ncRNA_map.iteritems():
			get_connectivity(list_of_i, cursor)
示例#3
0
clique_stats = defaultdict(lambda: {'sizes':[],'precisions':[]})
ncRNA_id_to_family = {}
hitQ_sizes = []

conn = get_conn_Actino()
cursor = get_dict_cursor(conn)
with open(clique_filename) as f:
	QQQ = load(f)
	for Q in QQQ:
		if 75 in Q or 163 in Q: continue
		if len(Q) < 5:
			continue
		tally_by_family = defaultdict(lambda: 0)
		for i in Q:
			# if it's not a hit, will return (None,None)
			(ncRNA_id, ncRNA_family) = c2.check_hit(i, cursor)
			tally_by_family[ncRNA_family] += 1
			ncRNA_ids_seen[ncRNA_id] += 1
			ncRNA_id_to_family[ncRNA_id] = ncRNA_family
		# decide the dominant family of this cluster
		tally_by_family = tally_by_family.items()
		tally_by_family.sort(key=itemgetter(1))
		fam,count = tally_by_family[-1]
		if fam is not None:
			print("{0}\t{1}/{2}".format(fam,count,len(Q)))
			hitQ_sizes.append(len(Q))
		clique_stats[fam]['sizes'].append(len(Q))
		clique_stats[fam]['precisions'].append(count*1./len(Q))

# now print the stats
print('####################### stats #########################')