示例#1
0
	def data_fetch(self, curs, splat_table, mcl_table, crs_no=0, output_fname=None):
		"""
		04-17-05
			fetch cluster_dstructures for all clusters(Jasmine's request)	
		04-19-05
			1. return a mcl_id2cluster_dstructure
			2. crs_no
		08-31-05
			output clusters directly to output_fname
		09-01-05
			add the last []
		"""
		gene_no2gene_id = get_gene_no2gene_id(curs)	#08-31-05
		outf = open(output_fname, 'w')	#08-31-05
		outf.write("r:=[")	#08-31-05
		
		mcl_id2cluster_dstructure = {}
		no_of_total_genes = get_no_of_total_genes(curs)
		sys.stderr.write("Getting the basic information for all clusters...\n")
		curs.execute("DECLARE crs%s CURSOR FOR select m.mcl_id, m.vertex_set, m.connectivity, 0,\
			m.recurrence_array, s.edge_set, s.connectivity, m.cooccurrent_cluster_id from %s m, %s s where \
			m.splat_id=s.splat_id"\
			%(crs_no, mcl_table, splat_table))	#06-20-05	connectivity_original faked to be 0
		curs.execute("fetch 5000 from crs%s"%crs_no)
		rows = curs.fetchall()
		while rows:
			for row in rows:
				unit = cluster_dstructure()
				unit.cluster_id = row[0]
				vertex_set = row[1][1:-1].split(',')
				unit.vertex_set = map(int, vertex_set)
				unit.connectivity = row[2]
				unit.connectivity_original = row[3]
				recurrence_array = row[4][1:-1].split(',')
				unit.recurrence_array = map(float, recurrence_array)
				unit.edge_set = parse_splat_table_edge_set(row[5])
				unit.splat_connectivity = row[6]
				unit.cooccurrent_cluster_id = row[7]
				unit.go_no2association_genes = self.get_go_functions_of_this_gene_set(curs, unit.vertex_set)
				unit.go_no2information = self.get_information_of_go_functions(curs, \
					unit.go_no2association_genes, len(unit.vertex_set), no_of_total_genes, p_value_cut_off=0.05)	#jasmine wants to cut some go-nos.
				unit.edge_cor_2d_list, unit.edge_sig_2d_list = self.get_cor_sig_2d_list(curs, unit.edge_set)
				
				str_tmp = self.return_string_form_of_cluster_dstructure(unit, gene_no2gene_id)	#08-31-05
				outf.write("%s,"%str_tmp)
				#mcl_id2cluster_dstructure[unit.cluster_id] = unit
				"""
				order_1st_id, order_2nd_id = map(int, unit.cooccurrent_cluster_id.split('.'))
				if order_1st_id not in self.order_1st_id2all_clusters:
					self.order_1st_id2all_clusters[order_1st_id] = {}
				if order_2nd_id not in self.order_1st_id2all_clusters[order_1st_id]:
					self.order_1st_id2all_clusters[order_1st_id][order_2nd_id] = []
				self.order_1st_id2all_clusters[order_1st_id][order_2nd_id].append(unit)
				"""
			curs.execute("fetch 5000 from crs%s"%crs_no)
			rows = curs.fetchall()
		outf.write("[]]:")	#08-31-05, 09-01-05 add the last blank []
		del outf
		sys.stderr.write("Done.\n")
		return mcl_id2cluster_dstructure
示例#2
0
	def get_basic_cluster_dstructure(self, curs, mcl_id, splat_table, mcl_table):
		"""
		04-06-05
		
		"""
		sys.stderr.write("Getting the basic information of cluster no.%s..."%mcl_id)
		unit = cluster_dstructure()
		curs.execute("select m.mcl_id, m.vertex_set, m.connectivity, 0,\
			m.recurrence_array, s.edge_set, s.connectivity from %s m, %s s where m.splat_id=s.splat_id and \
			m.mcl_id=%s"\
			%(mcl_table, splat_table, mcl_id))	#06-20-05	connectivity_original faked to be 0
		rows = curs.fetchall()
		if len(rows)>0:
			for row in rows:
				unit.cluster_id = row[0]
				vertex_set = row[1][1:-1].split(',')
				unit.vertex_set = map(int, vertex_set)
				unit.connectivity = row[2]
				unit.connectivity_original = row[3]
				recurrence_array = row[4][1:-1].split(',')
				unit.recurrence_array = map(float, recurrence_array)
				unit.edge_set = parse_splat_table_edge_set(row[5])
				unit.splat_connectivity = row[6]
			sys.stderr.write("Done.\n")
		else:
			unit = None
			sys.stderr.write("Cluster: %s not found.\n"%mcl_id)
		return unit
    def subgraph_fetch(self, curs, mcl_table, splat_table, size=None):
        """
		04-03-05
			counterpart of get_random_subgraph, get subgraph from database
		"""
        sys.stderr.write("Fetching subgraphs from database...\n")
        curs.execute(
            "DECLARE crs CURSOR FOR select m.mcl_id, m.vertex_set, \
			s.edge_set from %s m, %s s where m.splat_id=s.splat_id"
            % (mcl_table, splat_table)
        )
        curs.execute("fetch 5000 from crs")
        rows = curs.fetchall()
        no_of_records = 0
        while rows:
            for row in rows:
                mcl_id = row[0]
                vertex_set = row[1][1:-1].split(",")
                vertex_set = map(int, vertex_set)
                edge_set = parse_splat_table_edge_set(row[2])
                self._connectivity2homogeneity(mcl_id, vertex_set, edge_set, self.gene_no2go_no, self.writer)
                no_of_records += 1
            if self.report:
                sys.stderr.write("%s%s" % ("\x08" * 20, no_of_records))

            curs.execute("fetch 5000 from crs")
            rows = curs.fetchall()

        sys.stderr.write("Done.\n")