def data_fetch(self, curs, splat_table, mcl_table, crs_no=0, output_fname=None): """ 04-17-05 fetch cluster_dstructures for all clusters(Jasmine's request) 04-19-05 1. return a mcl_id2cluster_dstructure 2. crs_no 08-31-05 output clusters directly to output_fname 09-01-05 add the last [] """ gene_no2gene_id = get_gene_no2gene_id(curs) #08-31-05 outf = open(output_fname, 'w') #08-31-05 outf.write("r:=[") #08-31-05 mcl_id2cluster_dstructure = {} no_of_total_genes = get_no_of_total_genes(curs) sys.stderr.write("Getting the basic information for all clusters...\n") curs.execute("DECLARE crs%s CURSOR FOR select m.mcl_id, m.vertex_set, m.connectivity, 0,\ m.recurrence_array, s.edge_set, s.connectivity, m.cooccurrent_cluster_id from %s m, %s s where \ m.splat_id=s.splat_id"\ %(crs_no, mcl_table, splat_table)) #06-20-05 connectivity_original faked to be 0 curs.execute("fetch 5000 from crs%s"%crs_no) rows = curs.fetchall() while rows: for row in rows: unit = cluster_dstructure() unit.cluster_id = row[0] vertex_set = row[1][1:-1].split(',') unit.vertex_set = map(int, vertex_set) unit.connectivity = row[2] unit.connectivity_original = row[3] recurrence_array = row[4][1:-1].split(',') unit.recurrence_array = map(float, recurrence_array) unit.edge_set = parse_splat_table_edge_set(row[5]) unit.splat_connectivity = row[6] unit.cooccurrent_cluster_id = row[7] unit.go_no2association_genes = self.get_go_functions_of_this_gene_set(curs, unit.vertex_set) unit.go_no2information = self.get_information_of_go_functions(curs, \ unit.go_no2association_genes, len(unit.vertex_set), no_of_total_genes, p_value_cut_off=0.05) #jasmine wants to cut some go-nos. unit.edge_cor_2d_list, unit.edge_sig_2d_list = self.get_cor_sig_2d_list(curs, unit.edge_set) str_tmp = self.return_string_form_of_cluster_dstructure(unit, gene_no2gene_id) #08-31-05 outf.write("%s,"%str_tmp) #mcl_id2cluster_dstructure[unit.cluster_id] = unit """ order_1st_id, order_2nd_id = map(int, unit.cooccurrent_cluster_id.split('.')) if order_1st_id not in self.order_1st_id2all_clusters: self.order_1st_id2all_clusters[order_1st_id] = {} if order_2nd_id not in self.order_1st_id2all_clusters[order_1st_id]: self.order_1st_id2all_clusters[order_1st_id][order_2nd_id] = [] self.order_1st_id2all_clusters[order_1st_id][order_2nd_id].append(unit) """ curs.execute("fetch 5000 from crs%s"%crs_no) rows = curs.fetchall() outf.write("[]]:") #08-31-05, 09-01-05 add the last blank [] del outf sys.stderr.write("Done.\n") return mcl_id2cluster_dstructure
def get_basic_cluster_dstructure(self, curs, mcl_id, splat_table, mcl_table): """ 04-06-05 """ sys.stderr.write("Getting the basic information of cluster no.%s..."%mcl_id) unit = cluster_dstructure() curs.execute("select m.mcl_id, m.vertex_set, m.connectivity, 0,\ m.recurrence_array, s.edge_set, s.connectivity from %s m, %s s where m.splat_id=s.splat_id and \ m.mcl_id=%s"\ %(mcl_table, splat_table, mcl_id)) #06-20-05 connectivity_original faked to be 0 rows = curs.fetchall() if len(rows)>0: for row in rows: unit.cluster_id = row[0] vertex_set = row[1][1:-1].split(',') unit.vertex_set = map(int, vertex_set) unit.connectivity = row[2] unit.connectivity_original = row[3] recurrence_array = row[4][1:-1].split(',') unit.recurrence_array = map(float, recurrence_array) unit.edge_set = parse_splat_table_edge_set(row[5]) unit.splat_connectivity = row[6] sys.stderr.write("Done.\n") else: unit = None sys.stderr.write("Cluster: %s not found.\n"%mcl_id) return unit
def subgraph_fetch(self, curs, mcl_table, splat_table, size=None): """ 04-03-05 counterpart of get_random_subgraph, get subgraph from database """ sys.stderr.write("Fetching subgraphs from database...\n") curs.execute( "DECLARE crs CURSOR FOR select m.mcl_id, m.vertex_set, \ s.edge_set from %s m, %s s where m.splat_id=s.splat_id" % (mcl_table, splat_table) ) curs.execute("fetch 5000 from crs") rows = curs.fetchall() no_of_records = 0 while rows: for row in rows: mcl_id = row[0] vertex_set = row[1][1:-1].split(",") vertex_set = map(int, vertex_set) edge_set = parse_splat_table_edge_set(row[2]) self._connectivity2homogeneity(mcl_id, vertex_set, edge_set, self.gene_no2go_no, self.writer) no_of_records += 1 if self.report: sys.stderr.write("%s%s" % ("\x08" * 20, no_of_records)) curs.execute("fetch 5000 from crs") rows = curs.fetchall() sys.stderr.write("Done.\n")