Exemplo n.º 1
0
	def dstruc_loadin(self, curs):
		'''
		'''
		sys.stderr.write("Loading Data STructure...\n")
		
		from codense.common import get_go_no2go_id, get_gene_no2gene_id, get_go_no2name, get_gene_id2gene_no, get_gene_no2go_no
		self.go_no2go_id = get_go_no2go_id(curs)
		self.go_no2go_name = get_go_no2name(curs)
		self.gene_no2gene_id = get_gene_no2gene_id(curs)
		self.gene_id2gene_no = get_gene_id2gene_no(curs)
		self.global_gene_to_go_dict = get_gene_no2go_no(curs)
		
		#04-01-05 the second kind in label_dict
		gene_no2no = {}
		for gene_no in self.gene_no2gene_id:
			gene_no2no[gene_no] = gene_no
		self.label_dict = {1:self.gene_no2gene_id,
			2: gene_no2no}
		
		
		curs.execute("select gene_no,go_functions from gene")
		
		if self.type == 3:
			curs.execute("select array_upper(recurrence_array,1) from %s limit 1"%self.table)
			rows = curs.fetchall()
			self.no_of_datasets = int(rows[0][0])
			
		sys.stderr.write("Done\n")
Exemplo n.º 2
0
    def dstruc_loadin(self, curs):
        """
		
		03-09-05
			get the context from mcl_table via linking through mcl_id of p_gene_table
			context_dict is set
		"""
        from codense.common import get_known_genes_dict, get_go_no2go_id, get_go_no2name, get_gene_no2gene_id

        self.known_genes_dict = get_known_genes_dict(curs)
        self.go_no2go_id = get_go_no2go_id(curs)
        self.go_no2go_name = get_go_no2name(curs)
        self.gene_no2gene_id = get_gene_no2gene_id(curs)

        sys.stderr.write("Setting up gene_prediction_dict...")
        # setup self.gene_prediction_dict
        curs.execute(
            "select p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, p.is_correct_lca, m.vertex_set\
			from %s p, %s g, %s m where g.p_gene_id=p.p_gene_id and m.mcl_id=p.mcl_id"
            % (self.gene_table, self.table, self.mcl_table)
        )
        rows = curs.fetchall()
        for row in rows:
            gene_no = row[0]
            if self.type == 2 and gene_no not in self.known_genes_dict:
                # I only want the known genes, but this gene is unknown
                continue
            elif self.type == 3 and gene_no in self.known_genes_dict:
                # i only want the unknown genes, but this gene is known
                continue
            go_no = row[1]
            is_correct = row[2]
            is_correct_l1 = row[3]
            is_correct_lca = row[4]
            vertex_set = row[5][1:-1].split(",")
            vertex_set = map(int, vertex_set)

            item = function_struc()
            item.is_correct = is_correct
            item.is_correct_l1 = is_correct_l1
            item.is_correct_lca = is_correct_lca
            # context_dict is a set
            item.context_dict = Set(vertex_set)
            if gene_no not in self.gene_prediction_dict:
                self.gene_prediction_dict[gene_no] = gene_prediction()
                self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item
            else:
                self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item

        sys.stderr.write("Done\n")

        """
Exemplo n.º 3
0
	def dstruc_loadin(self, curs):
		"""
		03-14-05
			remove the distance loading part
		"""
		sys.stderr.write("Loading Data STructure...\n")
		from codense.common import get_known_genes_dict, get_go_no2go_id,\
			get_go_no2term_id, get_go_no2depth, get_go_term_id2go_no, \
			get_go_term_id2depth
		
		self.known_genes_dict = get_known_genes_dict(curs)
		self.go_no2go_id = get_go_no2go_id(curs)
		self.go_no2term_id = get_go_no2term_id(curs)
		self.go_no2depth = get_go_no2depth(curs)
		self.go_term_id2go_no = get_go_term_id2go_no(curs)
		self.go_term_id2depth = get_go_term_id2depth(curs)
		
		sys.stderr.write("Done\n")
Exemplo n.º 4
0
	def output(self, curs, outf, known_gene_no2p_gene_id_src, unknown_gene_no2p_gene_id_src, p_gene_id_src_map):
		"""
		03-03-05
			loop over gene_no2p_gene_id_src and p_gene_id_src_map
		03-13-05
			add a column, #clusters in the output file
			
			--output_one_gene()
			--output_function_group()
		"""
		#three dictionaries
		gene_no2gene_id = get_gene_no2gene_id(curs)
		gene_no2direct_go = get_gene_no2direct_go(curs)
		go_no2go_id = get_go_no2go_id(curs)
		go_no2name = get_go_no2name(curs)
		go_no2accuracy, go_no2accuracy_pair = self.get_go_no2accuracy(curs, self.p_gene_table, self.gene_p_table)
		
		sys.stderr.write("Outputing prediction table...")
		writer = csv.writer(outf, delimiter='\t')
		#first output the known genes
		for (gene_no, p_gene_id_src_list) in known_gene_no2p_gene_id_src.iteritems():
			self.output_one_gene(curs, writer, gene_no, gene_no2gene_id, gene_no2direct_go)
			row = ['go_no', 'go_id', 'go_name', 'is_correct', 'is_correct_L1', 'is_correct_lca', 'p_value_list', '#clusters', 'mcl_id_list', \
				'e_acc', 'e_acc_pair', 'cluster_context']
			writer.writerow(row)
			for p_gene_id_src in p_gene_id_src_list:
				self.output_function_group(curs, writer, p_gene_id_src_map[p_gene_id_src], gene_no2gene_id,\
					go_no2go_id, go_no2name, go_no2accuracy, go_no2accuracy_pair)
			writer.writerow([])
		#second output the unknown genes
		for (gene_no, p_gene_id_src_list) in unknown_gene_no2p_gene_id_src.iteritems():
			self.output_one_gene(curs, writer, gene_no, gene_no2gene_id, gene_no2direct_go)
			row = ['go_no', 'go_id', 'go_name', 'is_correct', 'is_correct_L1', 'is_correct_lca', 'p_value_list', '#clusters', 'mcl_id_list', \
				'e_acc', 'e_acc_pair', 'cluster_context']
			writer.writerow(row)
			for p_gene_id_src in p_gene_id_src_list:
				self.output_function_group(curs, writer, p_gene_id_src_map[p_gene_id_src], gene_no2gene_id,\
					go_no2go_id, go_no2name, go_no2accuracy, go_no2accuracy_pair)
			writer.writerow([])
		del writer
		sys.stderr.write("Done\n")
Exemplo n.º 5
0
	def output1(self, curs, outf, known_gene_no2p_gene_id_src, unknown_gene_no2p_gene_id_src, p_gene_id_src_map):
		"""
		03-15-05
			copied from output()
		"""
		#three dictionaries
		gene_no2gene_id = get_gene_no2gene_id(curs)
		gene_no2direct_go = get_gene_no2direct_go(curs)
		go_no2go_id = get_go_no2go_id(curs)
		go_no2name = get_go_no2name(curs)
		go_no2accuracy, go_no2accuracy_pair = self.get_go_no2accuracy(curs, self.p_gene_table, self.gene_p_table)
		from codense.common import get_prediction_pair2lca_list
		prediction_pair2lca_list = get_prediction_pair2lca_list(curs,p_gene_table=self.p_gene_table)
		
		sys.stderr.write("Outputing prediction table...")
		writer = csv.writer(outf, delimiter='\t')
		#first output the known genes
		for (gene_no, p_gene_id_src_list) in known_gene_no2p_gene_id_src.iteritems():
			self.output_one_gene1(curs, writer, gene_no, gene_no2gene_id, gene_no2direct_go)
			row = ['go_id', 'go_name', 'is_correct_lca', 'lca_list', 'p_value_list', '#clusters',\
				'e_acc']
			writer.writerow(row)
			for p_gene_id_src in p_gene_id_src_list:
				#NOTE: the arguments passed to this function is different between known and unknown genes.
				self.output_function_group1(curs, writer, p_gene_id_src_map[p_gene_id_src], gene_no2gene_id,\
					go_no2go_id, go_no2name, go_no2accuracy, go_no2accuracy_pair, prediction_pair2lca_list, gene_no)
			writer.writerow([])
		#second output the unknown genes
		for (gene_no, p_gene_id_src_list) in unknown_gene_no2p_gene_id_src.iteritems():
			self.output_one_gene1(curs, writer, gene_no, gene_no2gene_id, gene_no2direct_go)
			row = ['go_id', 'go_name', 'p_value_list', '#clusters', 'e_acc']
			writer.writerow(row)
			for p_gene_id_src in p_gene_id_src_list:
				self.output_function_group1(curs, writer, p_gene_id_src_map[p_gene_id_src], gene_no2gene_id,\
					go_no2go_id, go_no2name, go_no2accuracy, go_no2accuracy_pair)
			writer.writerow([])
		del writer
		sys.stderr.write("Done\n")
Exemplo n.º 6
0
	def run(self):
		"""
		10-31-05
		2006-09-26
			modify it to be compatible with the modified pipeline from haifeng
		2006-11-06
			add type
		2006-12-13
			use font_path and font_size
			
			--form_schema_tables()
			--db_connect()
			--get_char_dimension()
			
			--get_no_of_p_funcs_gene_no_go_no_list()
			--get_recurrence_go_no_rec_array_cluster_id_ls()
			--get_go_no2name()
			--draw_function_map()
			
			--draw_gene_function_map()

			--get_recurrence_rec_array_bs_no_list()
			--get_mt_no2tf_name()
			--draw_tf_map()
		"""
		schema_instance = form_schema_tables(self.inputfname, self.acc_cutoff, self.lm_bit)
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		font = ImageFont.truetype(self.font_path, self.font_size)
		char_dimension = font.getsize('a')
		#char_dimension = get_char_dimension()
		
		#go_no2name = get_go_no2name(curs)
		go_no2name = get_go_id2name(curs)
		if self.type==1:
			go_no2go_id = get_go_no2go_id(curs)
			given_p_gene_set = p_gene_id_set_from_gene_p_table(curs, schema_instance.gene_p_table)
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_db(curs, \
				schema_instance.p_gene_table, given_p_gene_set, go_no2go_id)
		elif self.type==2:
			no_of_p_funcs_gene_no_go_no_list, mcl_id2go_no_set = self.get_no_of_p_funcs_gene_no_go_no_list_from_file(self.inputfname)
		
		
		recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets, mcl_id2enc_recurrence = \
			self.get_recurrence_go_no_rec_array_cluster_id_ls(curs, self.pattern_table, mcl_id2go_no_set)
		
		no_of_functions = len(recurrence_go_no_rec_array_cluster_id_ls)
		function_map_output_fname = '%s.function_map.png'%self.output_prefix
		go_no2index, function_name_region = self.draw_function_map(recurrence_go_no_rec_array_cluster_id_ls, no_of_datasets,\
			go_no2name, function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)				
		
		gene_function_map_output_fname = '%s.gene_function_map.png'%self.output_prefix
		self.draw_gene_function_map(no_of_p_funcs_gene_no_go_no_list, go_no2index, function_name_region,\
			gene_function_map_output_fname, self.function_name_length, char_dimension, no_of_functions, font)
		
		
		#tf_map requires mcl_id2enc_recurrence and no_of_datasets from above
		recurrence_rec_array_bs_no_list = self.get_recurrence_rec_array_bs_no_list(curs, self.cluster_bs_table, mcl_id2enc_recurrence)
		mt_no2tf_name = get_gene_id2gene_symbol(curs, tax_id=9606)
		#mt_no2tf_name = get_mt_no2tf_name()
		tf_map_output_fname = '%s.tf_map.png'%self.output_prefix
		self.draw_tf_map(recurrence_rec_array_bs_no_list, no_of_datasets, mt_no2tf_name, \
			tf_map_output_fname, self.function_name_length, char_dimension, font)