Пример #1
0
	def output_cluster_bs_data(self, communicator, parameter_list, data):
		"""
		12-20-05 for darwin output
		
			out:=[
			[id, [fuzzyDense gene list], { [ [TF gene list1], [TF target gene list1], p-value], [ [TF gene list2], [TF target gene list2], p-value], ...}, \
				{[dataset_no1, description], [dataset_no2, description], ... }  ],
			[...],
			...
			[]]:
			
		"""
		outf, gene_id2symbol, dataset_no2desc = parameter_list
		data = cPickle.loads(data)
		for row in data:
			id, core_vertex_ls, on_dataset_index_ls, ls_to_return = row
			#prepare the dataset_no_desc_ls
			dataset_no_desc_ls = []
			for dataset_index in on_dataset_index_ls:
				dataset_no = dataset_index +1
				dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]])
			#prepare the tfbs_row_darwin_ls
			tfbs_row_darwin_ls = []
			for tfbs_row in ls_to_return:
				score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = tfbs_row
				bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list)
				target_gene_no_symbol_list = dict_map(gene_id2symbol, target_gene_no_list)
				tfbs_row_darwin_ls.append([bs_no_symbol_list, target_gene_no_symbol_list, score])
			#translate the core_vertex_ls
			core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls)
			
			#output them all
			outf.write('[%s, %s,{%s},{%s}],\n'%(id, repr(core_vertex_symbol_ls), repr(tfbs_row_darwin_ls)[1:-1], repr(dataset_no_desc_ls)[1:-1]))
Пример #2
0
	def output_function_group1(self, curs, writer, function_struc_dict, gene_no2gene_id, go_no2go_id, go_no2name, go_no2accuracy, \
		go_no2accuracy_pair, prediction_pair2lca_list=None, gene_no=None):
		"""
		03-15-05
			copied from output_function_group()
		"""
		for (go_no, function_struc) in function_struc_dict.iteritems():
			#transform to character type
			p_value_list = map(repr, function_struc.p_value_list)
			mcl_id_list = map(repr, function_struc.cluster_array)
			if prediction_pair2lca_list:
				#use prediction_pair2lca_list to judge whether it's for known genes or unknown
				if function_struc.is_correct_lca==1:
					lca_list = prediction_pair2lca_list.get((gene_no,go_no))
					if lca_list:
						former_length = len(lca_list)
						lca_list = dict_map(go_no2go_id, lca_list)
						if len(lca_list)!=former_length:
							sys.stderr.write("Warning: some go_nos in lca_list have no go_ids for gene: %s and go_no: %s.\n\tList shrinked from %s to %s\n"%\
								(gene_no, go_no, former_length, len(lca_list)))
					else:
						sys.stderr.write("Error: prediction pair gene=%s, go_no=%s is correct by lca, but no lca_list.\n"%\
							(gene_no, go_no))
						sys.exit(2)
				else:
					lca_list = []
				row = [go_no2go_id[go_no], go_no2name[go_no], function_struc.is_correct_lca, ';'.join(lca_list), \
					';'.join(p_value_list), len(mcl_id_list), go_no2accuracy_pair[go_no].ratio]
			else:
				row = [go_no2go_id[go_no], go_no2name[go_no], \
					';'.join(p_value_list), len(mcl_id_list), go_no2accuracy_pair[go_no].ratio]
				
			writer.writerow(row)
Пример #3
0
	def _pattern_darwin_format(self, curs, pattern_table, gene_no2id, go_no2id, output_fname, mcl_id_set=None):
		"""
		2007-01-07
			add edge_set
		
		format:
			r:=[
			[mcl_id, vertex_set, edge_set, recurrence_array, recurrence, connectivity, unknown_ratio]
			[...],
			...
			[]]:
			
		"""
		sys.stderr.write("pattern...\n")
		of = open(output_fname, 'w')
		of.write('r:=[\n')
		curs.execute("DECLARE crs CURSOR FOR select id, vertex_set, edge_set, recurrence_array, recurrence, \
			connectivity, unknown_gene_ratio from %s"%pattern_table)
		curs.execute("fetch 5000 from crs")
		rows = curs.fetchall()
		while rows:
			for row in rows:
				mcl_id, vertex_set, edge_set, recurrence_array, recurrence, connectivity, unknown_ratio = row
				if mcl_id_set and mcl_id not in mcl_id_set:
					continue
				vertex_set = vertex_set[1:-1].split(',')
				vertex_set = map(int, vertex_set)
				vertex_set = dict_map(gene_no2id, vertex_set, type=2)
				edge_set = edge_set[2:-2].split('},{')
				for i in range(len(edge_set)):
					edge = edge_set[i].split(',')
					edge = map(int, edge)
					edge = dict_map(gene_no2id, edge, type=2)
					edge_set[i] = edge
				recurrence_array = '[' + recurrence_array[1:-1] + ']'
				of.write('[%s, %s, %s, %s, %s, %s, %s],\n'%(mcl_id, repr(vertex_set), repr(edge_set), recurrence_array,\
					recurrence, connectivity, unknown_ratio))
			curs.execute("fetch 5000 from crs")
			rows = curs.fetchall()
		of.write('[]]:\n')	#add the last blank list
		del of
		sys.stderr.write("pattern darwin format done.\n")
Пример #4
0
	def _prediction_darwin_format(self, curs, p_gene_table, gene_p_table, gene_no2id, go_no2id, output_fname):
		"""
		12-01-05
			deal with lca_list={}
		03-01-06
			add no_of_distinct_funcitons_from_gene_p_table in the output
		2006-09-25 now defunct
		
		format:
			r:=[
			[gene_id, go_id, is_correct_lca, p_value, mcl_id, lca_list, no_of_distinct_funcitons_from_gene_p_table],
			[...],
			[]]:
		"""
		sys.stderr.write("prediction...\n")
		#03-01-06 firstly get the gene_no2p_gene_id_src_set
		curs.execute("DECLARE crs_1 CURSOR FOR SELECT p.gene_no, g.p_gene_id_src from %s p, %s g\
			where p.p_gene_id=g.p_gene_id_src"%(p_gene_table, gene_p_table))
		curs.execute("fetch 5000 from crs_1")
		rows = curs.fetchall()
		gene_no2p_gene_id_src_set = {}
		while rows:
			for row in rows:
				gene_no, p_gene_id_src = row
				if gene_no not in gene_no2p_gene_id_src_set:
					gene_no2p_gene_id_src_set[gene_no] = Set()
				gene_no2p_gene_id_src_set[gene_no].add(p_gene_id_src)
			curs.execute("fetch 5000 from crs_1")
			rows = curs.fetchall()
		curs.execute("close crs_1")
		
		of = open(output_fname, 'w')		
		of.write('r:=[\n')
		curs.execute("DECLARE crs CURSOR FOR select p.gene_no, p.go_no, p.is_correct_lca, p.avg_p_value, p.mcl_id, p.lca_list\
			from %s p, %s g where g.p_gene_id = p.p_gene_id"%(p_gene_table, gene_p_table))
		curs.execute("fetch 5000 from crs")
		rows = curs.fetchall()
		while rows:
			for row in rows:
				gene_no, go_no, is_correct_lca, p_value, mcl_id, lca_list = row
				if lca_list and len(lca_list)>2:	#12-01-05 lca_list={} just blank
					lca_list = lca_list[1:-1].split(',')
					lca_list = map(int, lca_list)
					lca_list = dict_map(go_no2id, lca_list, type=2)
				else:
					lca_list = []
				of.write("['%s', '%s', %s, %s, %s, %s, %s],\n"%(gene_no2id.get(gene_no) or gene_no, go_no2id[go_no], is_correct_lca,\
					p_value, mcl_id, repr(lca_list), len(gene_no2p_gene_id_src_set[gene_no])))	#03-01-06
			curs.execute("fetch 5000 from crs")
			rows = curs.fetchall()
		of.write('[]]:\n')	#add the last blank list
		del of
		curs.execute("close crs")
		sys.stderr.write("prediction darwin format done.\n")
Пример #5
0
    def run(self):
        """
		2007-08-30
			add label_type 3
		"""
        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname, host=self.hostname)
        curs = conn.cursor()
        popid2pos_size = self.get_popid2pos_size(curs,
                                                 self.popid2ecotypeid_table)
        popid_ls = popid2pos_size.keys()
        pos_size_ls = dict_map(popid2pos_size, popid_ls)
        weighted_pos_ls = [row[0] for row in pos_size_ls]
        diameter_ls = [row[1] for row in pos_size_ls]
        if self.label_type == 1:
            label_ls = popid_ls
        elif self.label_type == 2:
            label_ls = diameter_ls
        elif self.label_type == 3:
            if self.selfing_rate_table is None:
                sys.stderr.write(
                    "Label type is 3(selfing rate), but no selfing_rate_table specified\n"
                )
                sys.exit(3)
            popid2selfing_rate = self.get_popid2selfing_rate(
                curs, self.selfing_rate_table, self.which_method)
            label_ls = []
            for popid in popid_ls:
                avg_s = '0'
                if popid in popid2selfing_rate:
                    if popid2selfing_rate[popid]:  #not NULL
                        avg_s = int(round(popid2selfing_rate[popid] * 1000))
                label_ls.append(avg_s)
        self.draw_clustered_strain_location(
            label_ls,
            weighted_pos_ls,
            diameter_ls,
            self.label_type,
            self.label_type2label_name,
            pic_area=self.pic_area,
            output_fname_prefix=self.output_fname_prefix)
        if self.draw_site_network:
            from CreatePopulation import CreatePopulation
            CreatePopulation_instance = CreatePopulation()
            lat_lon_ls, pos2ecotypeid_ls = CreatePopulation_instance.get_pos2ecotypeid_ls(
                curs, self.strain_info_table)
            g, node_label2pos_counts = CreatePopulation_instance.divide_data_by_geography(
                lat_lon_ls, self.max_dist)
            self.DrawSiteNetwork(g, node_label2pos_counts, self.pic_area,
                                 self.output_fname_prefix)
Пример #6
0
	def get_mcl_id2vertex_edge_recurrence(self, curs, pattern_table, gene_no2id, go_no2id, mcl_id_set):
		"""
		2007-02-08
		
		"""
		sys.stderr.write("Getting mcl_id2vertex_edge_recurrence ...\n")
		mcl_id2vertex_edge_recurrence = {}
		curs.execute("DECLARE crs CURSOR FOR select id, vertex_set, edge_set, recurrence_array from %s"%pattern_table)
		curs.execute("fetch 5000 from crs")
		rows = curs.fetchall()
		recurrence_func = lambda x: int(float(x)>=0.8)
		while rows:
			for row in rows:
				mcl_id, vertex_set, edge_set, recurrence_array = row
				if mcl_id in mcl_id_set:
					vertex_set = vertex_set[1:-1].split(',')
					vertex_set = map(int, vertex_set)
					vertex_set = dict_map(gene_no2id, vertex_set, type=2)
					edge_set = edge_set[2:-2].split('},{')
					for i in range(len(edge_set)):
						edge = edge_set[i].split(',')
						edge = map(int, edge)
						edge = dict_map(gene_no2id, edge, type=2)
						edge_set[i] = edge
					recurrence_array = recurrence_array[1:-1].split(',')
					recurrence_array = map(recurrence_func, recurrence_array)
					new_recurrence_array = []
					for i in range(len(recurrence_array)):
						if recurrence_array[i]==1:
							new_recurrence_array.append(i+1)
					mcl_id2vertex_edge_recurrence[mcl_id] = [vertex_set, edge_set, new_recurrence_array]
			curs.execute("fetch 5000 from crs")
			rows = curs.fetchall()
		curs.execute("close crs")
		sys.stderr.write("done.\n")
		return mcl_id2vertex_edge_recurrence
Пример #7
0
	def _cluster_darwin_format(self, curs, good_cluster_table, gene_no2id, go_no2id, output_fname):
		"""
		format:
			r:=[
			[mcl_id, vertex_set, recurrence_array, recurrence, connectivity, unknown_ratio, size, go_id_list, p_value_list]
			[...],
			...
			[]]:
			
		"""
		sys.stderr.write("cluster...\n")
		of = open(output_fname, 'w')
		of.write('r:=[\n')
		curs.execute("DECLARE crs CURSOR FOR select mcl_id, vertex_set, recurrence_array, recurrence, \
			connectivity, unknown_ratio, size, go_no_list, p_value_list from %s"%good_cluster_table)
		curs.execute("fetch 5000 from crs")
		rows = curs.fetchall()
		while rows:
			for row in rows:
				mcl_id, vertex_set, recurrence_array, recurrence, connectivity, unknown_ratio,\
					size, go_no_list, p_value_list = row
				vertex_set = vertex_set[1:-1].split(',')
				vertex_set = map(int, vertex_set)
				vertex_set = dict_map(gene_no2id, vertex_set, type=2)
				recurrence_array = '[' + recurrence_array[1:-1] + ']'
				go_no_list = go_no_list[1:-1].split(',')
				go_no_list = map(int, go_no_list)
				go_id_list = dict_map(go_no2id, go_no_list, type=2)
				p_value_list = '[' + p_value_list[1:-1] + ']'
				of.write('[%s, %s, %s, %s, %s, %s, %s, %s, %s],\n'%(mcl_id, repr(vertex_set), recurrence_array,\
					recurrence, connectivity, unknown_ratio, size, repr(go_id_list), p_value_list))
			curs.execute("fetch 5000 from crs")
			rows = curs.fetchall()
		of.write('[]]:\n')	#add the last blank list
		del of
		sys.stderr.write("cluster darwin format done.\n")
Пример #8
0
	def run(self):
		"""
		12-28-05
		"""
		conn, curs = db_connect(self.hostname, self.dbname, self.schema)
		organism = get_org_from_tax_id(curs, self.tax_id)
		#get the key_map
		gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id)
		#open output here
		outf = open(self.output_fname, 'w')
		
		if len(self.running_bit)>=1 and self.running_bit[0] =='1':
			gene_id2go_bp_term = get_gene_id2go_term(curs, term_type='biological_process', organism=organism)
			self.dict2darwin(gene_id2go_bp_term, 'go_bp', gene_id2symbol, outf)
		if len(self.running_bit)>=2 and self.running_bit[1] =='1':
			gene_id2go_cc_term = get_gene_id2go_term(curs, term_type='cellular_component', organism=organism)
			self.dict2darwin(gene_id2go_cc_term, 'go_cc', gene_id2symbol, outf)
		if len(self.running_bit)>=3 and self.running_bit[2] =='1':
			gene_id2no_of_events = get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl2no_of_events')
			self.dict2darwin(gene_id2no_of_events, 'as', gene_id2symbol, outf)
		if len(self.running_bit)>=4 and self.running_bit[3] =='1':
			gene_id2no_of_promoters = get_gene_id2no_of_promoters(curs, self.tax_id)
				#get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl_id2no_of_promoters')
			self.dict2darwin(gene_id2no_of_promoters, 'dp', gene_id2symbol, outf)
		if len(self.running_bit)>=5 and self.running_bit[4] =='1':
			tg_tax_id2ca_depth_tax_id_short_org = get_tg_tax_id2ca_depth_tax_id_short_org(curs, self.tax_id)
			gene_id2ortholog_tax_id_set = get_gene_id2ortholog_tax_id_set(curs, self.tax_id, homologene_table='homologene.homologene')
			#convert gene_id2ortholog_tax_id_set to gene_id2ca_depth_tax_id_short_org_list
			gene_id2ca_depth_tax_id_short_org_list = {}
			for gene_id, ortholog_tax_id_set in gene_id2ortholog_tax_id_set.iteritems():
				ca_depth_tax_id_short_org_list = dict_map(tg_tax_id2ca_depth_tax_id_short_org, list(ortholog_tax_id_set))
				ca_depth_tax_id_short_org_list.sort()
				gene_id2ca_depth_tax_id_short_org_list[gene_id] = ca_depth_tax_id_short_org_list
			self.dict2darwin(gene_id2ca_depth_tax_id_short_org_list, 'gene_age', gene_id2symbol, outf)
		if len(self.running_bit)>=6 and self.running_bit[5] =='1':
			gene_id2tissue_list = get_gene_id2tissue_list(curs, self.tax_id)
			self.dict2darwin(gene_id2tissue_list, 'gene_tissue', gene_id2symbol, outf)
		if len(self.running_bit)>=7 and self.running_bit[6] =='1':
			gene_id2family_size = get_gene_id2family_size(curs, self.tax_id)
			self.dict2darwin(gene_id2family_size, 'gene_family_size', gene_id2symbol, outf)
		if len(self.running_bit)>=8 and self.running_bit[7] =='1':
			gnf_gene_id2tissue = get_gnf_gene_id2tissue_list(curs, self.tax_id)
			self.dict2darwin(gnf_gene_id2tissue, 'gnf_gene_tissue', gene_id2symbol, outf)
		#close output
		outf.close()
Пример #9
0
	def run(self):
		"""
		2007-08-30
			add label_type 3
		"""
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname,host=self.hostname)
		curs = conn.cursor()
		popid2pos_size = self.get_popid2pos_size(curs, self.popid2ecotypeid_table)
		popid_ls = popid2pos_size.keys()
		pos_size_ls = dict_map(popid2pos_size, popid_ls)
		weighted_pos_ls = [row[0] for row in pos_size_ls]
		diameter_ls = [row[1] for row in pos_size_ls]
		if self.label_type == 1:
			label_ls = popid_ls
		elif self.label_type == 2:
			label_ls = diameter_ls
		elif self.label_type == 3:
			if self.selfing_rate_table is None:
				sys.stderr.write("Label type is 3(selfing rate), but no selfing_rate_table specified\n")
				sys.exit(3)
			popid2selfing_rate = self.get_popid2selfing_rate(curs, self.selfing_rate_table, self.which_method)
			label_ls = []
			for popid in popid_ls:
				avg_s = '0'
				if popid in popid2selfing_rate:
					if popid2selfing_rate[popid]:	#not NULL
						avg_s = int(round(popid2selfing_rate[popid]*1000))
				label_ls.append(avg_s)
		self.draw_clustered_strain_location(label_ls, weighted_pos_ls, diameter_ls, self.label_type, self.label_type2label_name, pic_area=self.pic_area, output_fname_prefix=self.output_fname_prefix)
		if self.draw_site_network:
			from CreatePopulation import CreatePopulation
			CreatePopulation_instance = CreatePopulation()
			lat_lon_ls, pos2ecotypeid_ls = CreatePopulation_instance.get_pos2ecotypeid_ls(curs, self.strain_info_table)
			g, node_label2pos_counts = CreatePopulation_instance.divide_data_by_geography(lat_lon_ls, self.max_dist)
			self.DrawSiteNetwork(g, node_label2pos_counts, self.pic_area, self.output_fname_prefix)
Пример #10
0
	def _tf_darwin_format(self, curs, good_cluster_table, output_fname, gene_no2id, mcl_id2tf_set):
		"""
		2006-09-25
			change good_cluster_table to be pattern_table
			
		format:
			r:=[
			[mcl_id, [gene1, gene2, ...], [ [TF1], [hyper_p_value] ], [ [TF2, TF3], [hyper_p_value] ], ... ],
			[...],
			[]]:
		"""
		sys.stderr.write("TF...\n")
		of = open(output_fname, 'w')
		of.write('r:=[\n')
		curs.execute("DECLARE crs CURSOR FOR select id, vertex_set from %s"%good_cluster_table)	#2006-09-25
		curs.execute("fetch 5000 from crs")
		rows = curs.fetchall()
		while rows:
			for row in rows:
				mcl_id, vertex_set = row
				if mcl_id in mcl_id2tf_set:
					vertex_set = vertex_set[1:-1].split(',')
					vertex_set = map(int, vertex_set)
					vertex_set = dict_map(gene_no2id, vertex_set, type=2)
					tf_list = list(mcl_id2tf_set[mcl_id])
					tf_list = map(list, tf_list)	#first transform to list, so will have []
					for i in range(len(tf_list)):
						tf_list[i] = map(list, tf_list[i])	#one tf_list[i] is (tf_name_tuple, ratio_tuple)
					tf_list = map(repr, tf_list)	#second transform inner list to string
					row = [repr(mcl_id), repr(vertex_set)] + tf_list
					of.write('[%s],\n'%(','.join(row)))
			curs.execute("fetch 5000 from crs")
			rows = curs.fetchall()
		of.write('[]]:\n')	#add the last blank list
		del of
		sys.stderr.write("TF darwin format done.\n")
Пример #11
0
	def output(self, curs, gene_no2go_id_set_list, go_id_set_list, support, prefix, gene_no2id, go_id2name, schema_list):
		"""
		07-06-05
		"""
		sys.stderr.write("Outputing...")
		
		#get the total set
		total_gene_no_set = Set()
		total_go_id_set = Set()
		for i in range(len(gene_no2go_id_set_list)):
			total_gene_no_set |= Set(gene_no2go_id_set_list[i].keys())
			total_go_id_set |= go_id_set_list[i]
		print "the total number of genes is ",len(total_gene_no_set)
		gene_ofname = '%s.gene'%prefix
		function_ofname = '%s.function'%prefix
		gene_writer = csv.writer(open(gene_ofname,'w'), delimiter='\t')
		function_writer = csv.writer(open(function_ofname, 'w'), delimiter='\t')
		gene_writer.writerow(['']+schema_list)
		function_writer.writerow([''] + schema_list)
		
		from gene_p_map_redundancy import gene_p_map_redundancy
		node_distance_class = gene_p_map_redundancy()
		
		go_id2term_id = get_go_id2term_id(curs)
		go_term_id2depth = get_go_term_id2depth(curs)
		
		#output the gene-oriented information
		for gene_no in total_gene_no_set:
			freq = 0
			p_go_id_set_list = []
			for i in range(len(gene_no2go_id_set_list)):
				if gene_no in gene_no2go_id_set_list[i]:
					p_go_id_set_list.append(gene_no2go_id_set_list[i][gene_no])
					freq += 1
			if freq == support:
				if self.p_go_id_set_list_distinct(curs, p_go_id_set_list, node_distance_class, go_term_id2depth, go_id2term_id):
					row = [gene_no2id[gene_no]]
					for i in range(len(gene_no2go_id_set_list)):
						if gene_no in gene_no2go_id_set_list[i]:
							go_id_set = gene_no2go_id_set_list[i][gene_no]
							go_name_list = dict_map(go_id2name, go_id_set)
							row.append(';'.join(go_name_list))
						else:
							row.append('')
					gene_writer.writerow(row)
		
		#output the function_oriented information
		for go_id in total_go_id_set:
			freq = 0
			for i in range(len(go_id_set_list)):
				if go_id in go_id_set_list[i]:
					freq += 1
			if freq == support:
				row = ['%s(%s)'%(go_id2name[go_id],go_id)]
				for i in range(len(go_id_set_list)):
					if go_id in go_id_set_list[i]:
						row.append('1')
					else:
						row.append('0')
				function_writer.writerow(row)
		
		
		sys.stderr.write("Done.\n")
	def draw_pattern(self, figure_no, old_g, pos, sub_label_map, title_map, go_id_or_mt_no_struct, go_id_or_mt_no2gene_id_set, \
		output_fname_prefix, is_go_function=0, prot_interaction_graph=None):
		"""
		2006-11-20
			add prot_interaction_graph
		2006-12-27
			just draw the labels, ignore the circle nodes
		2007-01-10
			edges overlapping between interaction and co-expression are separated from interaction_edge_list
			and they were widened with color 'magenta', the pure- interaction edges are justed colored in 'magenta'
		2007-01-25
			turn off the axis
			add codes to draw a pure network
			change png format to svg, eps, png
		"""
		g = old_g.copy()
		"""
		2007-01-30
			at font_size=16
			char_width=12 and char_height=20 is good for svg and eps
			char_width=50 and char_height=80 is good for png
		"""
		char_width = 12
		char_height = 20
		
		for key in go_id_or_mt_no_struct:
			figure_no += 1
			pylab.figure()
			pylab.axis("off")
			pylab.title(title_map[key])
			standout_gene_id_list = []
			standout_and_associated_gene_id_list = []
			associated_gene_id_list = []
			other_gene_id_list = []
			for v in g:
				if is_go_function:
					if v in go_id_or_mt_no_struct[key] and v in go_id_or_mt_no2gene_id_set[key]:
						standout_and_associated_gene_id_list.append(v)
					elif v in go_id_or_mt_no_struct[key] and v not in go_id_or_mt_no2gene_id_set[key]:
						standout_gene_id_list.append(v)
					elif v in go_id_or_mt_no2gene_id_set[key]:
						associated_gene_id_list.append(v)
					else:
						other_gene_id_list.append(v)
				else:
					if v in go_id_or_mt_no2gene_id_set[key]:
						associated_gene_id_list.append(v)
					else:
						other_gene_id_list.append(v)
			if prot_interaction_graph:
				nodes_of_g = g.nodes()
				sub_prot_graph = prot_interaction_graph.subgraph(nodes_of_g)
				interaction_edge_list = []
				non_interaction_edge_list = []	#pure interaction
				overlapping_edge_list = []	#2007-01-10
				for (u, v) in g.edges():
					if not sub_prot_graph.has_edge(u,v):
						non_interaction_edge_list.append((u,v))
					else:	#2007-01-10
						overlapping_edge_list.append((u, v))
				for (u, v, interaction_type_id) in sub_prot_graph.edges():
					if not g.has_edge(u, v):	#2007-01-10
						interaction_edge_list.append((u,v))
					#if not g.has_edge(u,v):	#expand g, not necesary
					#	print 'added'
					#	g.add_edge(u,v)
				#2007-01-10 overlapping
				nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', width=5, edgelist=overlapping_edge_list)
				#pure interaction
				nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', edgelist=interaction_edge_list)
				#pure co-expression
				nx.draw_networkx_edges(g, pos, alpha=1.0, edgelist=non_interaction_edge_list)
			else:
				nx.draw_networkx_edges(g, pos, alpha=1.0)
			
			node_size_list = []
			for v in g:
				node_size_list.append(len(sub_label_map[v]))	#2007-01-29, it's gonna extend along the x-axis
			
			nx.draw_networkx_nodes(g, pos, node_color='w', node_size=node_size_list, node_shape=None, alpha=1, verts=[[-char_width/2, -char_height/2], [-char_width/2, char_height/2], [char_width/2, char_height/2], [char_width/2, -char_height/2]])
			
			if standout_gene_id_list:
				nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_gene_id_list, type=3), \
					font_color='g', alpha=0.4, font_size=16)
				#nx.draw_networkx_nodes(g, pos, nodelist= standout_gene_id_list, node_color='g', alpha=0.4)
			if standout_and_associated_gene_id_list:
				nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_and_associated_gene_id_list, type=3), \
					font_color='y', alpha=0.4, font_size=16)
				#nx.draw_networkx_nodes(g, pos, nodelist= standout_and_associated_gene_id_list, node_color='y', alpha=0.4)
			if associated_gene_id_list:
				nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, associated_gene_id_list, type=3), \
					font_color='r', alpha=0.4, font_size=16)
				#nx.draw_networkx_nodes(g, pos, nodelist= associated_gene_id_list, node_color='r', alpha=0.4)
			if other_gene_id_list:
				nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, other_gene_id_list, type=3), \
					font_color='k', alpha=0.4, font_size=16)
				#nx.draw_networkx_nodes(g, pos, nodelist= other_gene_id_list, node_color='b', alpha=0.4)
			
			#nx.draw_networkx_labels(g, pos, labels=sub_label_map)
			#nx.draw(g, pos, node_color=pylab.array(color_gene_id_list), labels=sub_label_map, alpha=0.4)
			pylab.savefig('%s_%s.svg'%(output_fname_prefix, key), dpi=300)
			pylab.savefig('%s_%s.eps'%(output_fname_prefix, key), dpi=300)
			pylab.savefig('%s_%s.png'%(output_fname_prefix, key), dpi=300)
			pylab.clf()
		if go_id_or_mt_no_struct == []:	#2007-01-25
			figure_no += 1
			pylab.figure()
			pylab.axis("off")
			nx.draw_networkx_edges(g, pos, alpha=0.8)
			
			#2007-01-30
			node_size_list = []
			for v in g:
				node_size_list.append(len(sub_label_map[v]))	#2007-01-29, it's gonna extend along the x-axis
			nx.draw_networkx_nodes(g, pos, node_color='w', node_size=node_size_list, node_shape=None, alpha=1.0, verts=[[-char_width/2, -char_height/2], [-char_width/2, char_height/2], [char_width/2, char_height/2], [char_width/2, -char_height/2]])	#node_shape has to be set to None, otherwise verts won't work
			
			nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, g.nodes(), type=3), alpha=0.4, font_size=16)
			pylab.savefig('%s.svg'%(output_fname_prefix), dpi=300)
			pylab.savefig('%s.eps'%(output_fname_prefix), dpi=300)
			pylab.savefig('%s.png'%(output_fname_prefix), dpi=300)
			pylab.clf()
		return figure_no
	def draw_augmented_PI_graph(self, old_g, prot_interaction_graph, old_sub_label_map, gene_id2gene_symbol, output_fname_prefix):
		"""
		2006-12-16
			draw a graph based on old_g but augmented by prot_interaction_graph
				augment the old graph by node-pairwise searching against the interaction graph
			old nodes and edges are marked in 'green' and 'red' color, respectively
			overlapping edges are widened with 'red' color
		2006-12-27
			just draw the labels, ignore the circle nodes
		2006-12-29, some protein interaction genes are new to table gene.gene
		2007-01-10 add the pure co-expression edges to the interaction graph and re-position
		2007-01-25
			turn off the axis
			change png format to svg, eps, png
		"""
		g = nx.XGraph()
		pylab.axis("off")
		node_list = old_g.nodes()
		no_of_nodes = len(node_list)
		overlapping_edge_list = []
		non_standout_edge_list = []
		for m in range(no_of_nodes):
			for n in range(m+1, no_of_nodes):
				u = node_list[m]
				v = node_list[n]
				if prot_interaction_graph.has_node(u) and prot_interaction_graph.has_node(v):
					shortest_path_list = nx.shortest_path(prot_interaction_graph, u, v)
					if shortest_path_list:	#check the whole shortest path
						for i in range(len(shortest_path_list)-1):
							j = i+1
							if not g.has_edge(shortest_path_list[i], shortest_path_list[j]):
								g.add_edge(shortest_path_list[i], shortest_path_list[j], 1)
								if old_g.has_edge(shortest_path_list[i], shortest_path_list[j]):
									overlapping_edge_list.append((shortest_path_list[i], shortest_path_list[j]))
								else:
									non_standout_edge_list.append((shortest_path_list[i], shortest_path_list[j]))		
		standout_edge_list = []
		for (u,v) in old_g.edges():
			if not g.has_edge(u,v):
				standout_edge_list.append((u,v))
				g.add_edge(u, v, 1)	#2007-01-10
		
		sub_label_map = old_sub_label_map.copy()
		standout_node_list = []
		non_standout_node_list = []
		for v in g:
			if v not in old_g:
				if v in gene_id2gene_symbol:	#2006-12-29, some protein interaction genes are new to table gene.gene
					sub_label_map[v] = gene_id2gene_symbol[v]
				else:
					sub_label_map[v] = repr(v)
				non_standout_node_list.append(v)
		for v in old_g:
			standout_node_list.append(v)
			if v not in g:
				g.add_node(v)
		pos = nx.spring_layout(g)	#position is determined by the interaction graph
		
		if standout_edge_list:	#pure co-expression edges
			nx.draw_networkx_edges(g, pos, alpha=0.4, edgelist=standout_edge_list)
		if non_standout_edge_list:	#pure interaction edges
			nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', edgelist=non_standout_edge_list)
		if overlapping_edge_list:	#overlapping
			nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', width=5, edgelist=overlapping_edge_list)
		if standout_node_list:	#in co-expression network
			nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_node_list, type=3), \
					font_color='k', alpha=0.4, font_size=10)
			#nx.draw_networkx_nodes(g, pos, nodelist= standout_node_list, node_color='k', alpha=0.4)
		if non_standout_node_list:	#pure protein interaction nodes
			nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, non_standout_node_list, type=3), \
					font_color='m', alpha=0.4, font_size=10)
			#nx.draw_networkx_nodes(g, pos, nodelist= non_standout_node_list, alpha=0.4)
		#nx.draw_networkx_labels(g, pos, labels=sub_label_map)
		pylab.savefig('%s.svg'%(output_fname_prefix), dpi=300)
		pylab.savefig('%s.eps'%(output_fname_prefix), dpi=300)
		pylab.savefig('%s.png'%(output_fname_prefix), dpi=300)
		pylab.clf()
Пример #14
0
		from TF_functions import cluster_bs_analysis
		ls_to_return = cluster_bs_analysis(core_vertex_ls, gene_no2bs_no_set, bs_no2gene_no_set, ratio_cutoff, \
			top_number, p_value_cut_off)
		
		gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id)
		dataset_no2desc = get_dataset_no2desc(curs)
		
		dataset_no_desc_ls = []
		for dataset_index in recurrent_and_on_datasets_ls:
			dataset_no = dataset_index +1
			dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]])
		
		
		outf = open(output_file, 'w')
		outf.write("out:=[\n")
		for i in range(len(ls_to_return)):
			row = ls_to_return[i]
			score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = row
			core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls)
			bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list)
			if i == len(ls_to_return)-1:
				outf.write('[{%s},{%s},{%s}]\n'%(repr(core_vertex_symbol_ls)[1:-1], repr(bs_no_symbol_list)[1:-1], repr(dataset_no_desc_ls)[1:-1]))
			else:
				outf.write('[{%s},{%s},{%s}],\n'%(repr(core_vertex_symbol_ls)[1:-1], repr(bs_no_symbol_list)[1:-1], repr(dataset_no_desc_ls)[1:-1]))
		
		outf.write(']:\n')
		
	else:
		print __doc__
		sys.exit(2)