def copath_parser(self, row, writer, argument=None, argument2=None): """ 04-12-05 copied from codense2db.py, changed a lot """ cooccurrent_cluster_id = self.p_cooccurrent_cluster_id.match(row[0]).group() vertex_set = row[2][1:-2].split(";") vertex_set = map(int, vertex_set) edge_list = row[3][2:-4].split(" );(") edge_set = [] for edge in edge_list: edge = edge.split(",") edge = map(int, edge) # in ascending order edge.sort() edge_set.append(edge) # 04-29-05 cc module come into play to get the connected components instance = cc_from_edge_list() instance.run(edge_set) cc_list = instance.cc_list for cc_edge_list in cc_list: cluster = cluster_dstructure() cluster.cooccurrent_cluster_id = cooccurrent_cluster_id # it's not used in the output() # initialize two sets cluster.vertex_set = self.vertex_set_from_cc_edge_list(cc_edge_list) cluster.edge_set = cc_edge_list self.output(writer, cluster)
def run(self): """ 06-08-05 06-09-05 add type 2: group dataset clusters --db_connect() --headerOutput() if self.type==1: --datasetClustOutput() --return_go_name() elif self.type==2: --id2dataset_cluster_setConstruct() --dataset_clusterGraphConstruct() --<cc_edge_list> --<CcFromBiclusteringOutput> --returnBigDatasetClust() """ (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) outf = csv.writer(open(self.outfname, 'w'), delimiter='\t') #no_of_datasets = self.headerOutput(curs, outf) dataset_no2id_desc = self.return_dataset_no2id_desc(curs) if self.type==1: reader = csv.reader(open(self.infname, 'r'), delimiter='\t') for row in reader: if self.debug: print row self.datasetClustOutput2(curs, outf, row, dataset_no2id_desc) del reader elif self.type==2: id2dataset_cluster_set = self.id2dataset_cluster_setConstruct(infname) #06-09-05 mapping between an id and a dataset cluster set if self.debug: print "id2dataset_cluster_set is:" print id2dataset_cluster_set edge_list = self.dataset_clusterGraphConstruct(id2dataset_cluster_set, self.similar_score) if self.debug: print "The constructed graph has %s edges"%len(edge_list) cfe_instance= cc_from_edge_list() cfe_instance.run(edge_list) cfbo_instance = CcFromBiclusteringOutput() for cc_edge_list in cfe_instance.cc_list: id_set = cfbo_instance.vertex_set_from_cc_edge_list(cc_edge_list) if self.debug: print cc_edge_list print id_set big_dataset_cluster_set = self.returnBigDatasetClust(id2dataset_cluster_set, id_set) big_dataset_cluster = list(big_dataset_cluster_set) big_dataset_cluster.sort() self._datasetClustOutput(outf, big_dataset_cluster, dataset_no2id_desc) del outf
def default_parser(self, row, j_instance, cfbo_instance): """ 2006-08-22 default parser, work for annot's pipeline """ vertex_set, edge_set = row[:2] #04-27-06, just first 2 elements edge_set = edge_set[2:-2].split('), (') for i in range(len(edge_set)): edge_set[i] = edge_set[i].split(',') edge_set[i] = map(int, edge_set[i]) #04-27-06, work on each connected component result = [] cfe_instance= cc_from_edge_list() cfe_instance.run(edge_set) for cc_edge_list in cfe_instance.cc_list: vertex_set = cfbo_instance.vertex_set_from_cc_edge_list(cc_edge_list) D = j_instance.py_shortest_distance(vertex_set,cc_edge_list) recurrence_array = j_instance.py_recurrence_list() #MUST be after py_shortest_distance() cc_edge_list.sort() #10-28-05 to ease codense2db.py #01-01-06 output_row = [vertex_set, cc_edge_list, recurrence_array, D] #10-28-05, #01-01-06 result.append(output_row) return result
def outputCcFromEdgeList(of, signature, edge_list, codense2db_instance, min_cluster_size, no_cc): """ 08-07-05 08-09-05 calculate recurrence array for codense2db.py 12-31-05 remove several time-consuming steps, but vertex_set and cc_edge_list are not sorted anymore no recurrence_array cc_edge_list is tuple-list 01-07-06 add min_cluster_size """ if no_cc: vertex_set = codense2db_instance.vertex_set_from_cc_edge_list(edge_list) if len(vertex_set)>=min_cluster_size: vertex_set.sort() #combined_vector = get_combined_vector(edge_list) #recurrence_array = codense2db_instance.parse_recurrence(combined_vector) of.write('%s\t%s\n'%(repr(vertex_set), repr(edge_list)) ) else: cf_instance = cc_from_edge_list() cf_instance.run(edge_list) cc_list = cf_instance.cc_list for cc_edge_list in cc_list: vertex_set = codense2db_instance.vertex_set_from_cc_edge_list(cc_edge_list) if len(vertex_set)>=min_cluster_size: vertex_set.sort() """ #12-31-05 each edge in cc_edge_list is already sorted cc_edge_list = map(list, cc_edge_list) #change the tuple type to list for i in range(len(cc_edge_list)): cc_edge_list[i].sort() #sort it """ cc_edge_list.sort() #combined_vector = get_combined_vector(cc_edge_list) #recurrence_array = codense2db_instance.parse_recurrence(combined_vector) of.write('%s\t%s\n'%(repr(vertex_set), repr(cc_edge_list) ) )