Пример #1
0
    def copath_parser(self, row, writer, argument=None, argument2=None):
        """
		04-12-05
			copied from codense2db.py, changed a lot
		"""

        cooccurrent_cluster_id = self.p_cooccurrent_cluster_id.match(row[0]).group()
        vertex_set = row[2][1:-2].split(";")
        vertex_set = map(int, vertex_set)
        edge_list = row[3][2:-4].split(" );(")
        edge_set = []
        for edge in edge_list:
            edge = edge.split(",")
            edge = map(int, edge)
            # in ascending order
            edge.sort()
            edge_set.append(edge)
            # 04-29-05	cc module come into play to get the connected components
        instance = cc_from_edge_list()
        instance.run(edge_set)
        cc_list = instance.cc_list
        for cc_edge_list in cc_list:
            cluster = cluster_dstructure()
            cluster.cooccurrent_cluster_id = cooccurrent_cluster_id  # it's not used in the output()
            # initialize two sets
            cluster.vertex_set = self.vertex_set_from_cc_edge_list(cc_edge_list)
            cluster.edge_set = cc_edge_list
            self.output(writer, cluster)
Пример #2
0
	def run(self):
		"""
		06-08-05
		
		06-09-05
			add type 2: group dataset clusters
			
			--db_connect()
			--headerOutput()
			if self.type==1:
				--datasetClustOutput()
					--return_go_name()
			elif self.type==2:
				--id2dataset_cluster_setConstruct()
				--dataset_clusterGraphConstruct()
				--<cc_edge_list>
				--<CcFromBiclusteringOutput>
					--returnBigDatasetClust()
		"""
		(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
		outf = csv.writer(open(self.outfname, 'w'), delimiter='\t')
		#no_of_datasets = self.headerOutput(curs, outf)
		dataset_no2id_desc = self.return_dataset_no2id_desc(curs)
		if self.type==1:
			reader = csv.reader(open(self.infname, 'r'), delimiter='\t')
			for row in reader:
				if self.debug:
					print row
				self.datasetClustOutput2(curs, outf, row, dataset_no2id_desc)
			del reader
		elif self.type==2:
			id2dataset_cluster_set = self.id2dataset_cluster_setConstruct(infname)	#06-09-05	mapping between an id and a dataset cluster set
			if self.debug:
				print "id2dataset_cluster_set is:"
				print id2dataset_cluster_set
			edge_list = self.dataset_clusterGraphConstruct(id2dataset_cluster_set, self.similar_score)
			if self.debug:
				print "The constructed graph has %s edges"%len(edge_list)
			cfe_instance= cc_from_edge_list()
			cfe_instance.run(edge_list)
			cfbo_instance = CcFromBiclusteringOutput()
			for cc_edge_list in cfe_instance.cc_list:
				id_set = cfbo_instance.vertex_set_from_cc_edge_list(cc_edge_list)
				if self.debug:
					print cc_edge_list
					print id_set
				big_dataset_cluster_set = self.returnBigDatasetClust(id2dataset_cluster_set, id_set)
				big_dataset_cluster = list(big_dataset_cluster_set)
				big_dataset_cluster.sort()
				self._datasetClustOutput(outf, big_dataset_cluster, dataset_no2id_desc)
				
		del outf
Пример #3
0
	def default_parser(self, row, j_instance, cfbo_instance):
		"""
		2006-08-22
			default parser, work for annot's pipeline
		"""
		vertex_set, edge_set = row[:2]	#04-27-06, just first 2 elements
		edge_set = edge_set[2:-2].split('), (')
		for i in range(len(edge_set)):
			edge_set[i] = edge_set[i].split(',')
			edge_set[i] = map(int, edge_set[i])
		#04-27-06, work on each connected component
		result = []
		cfe_instance= cc_from_edge_list()
		cfe_instance.run(edge_set)
		for cc_edge_list in cfe_instance.cc_list:
			vertex_set = cfbo_instance.vertex_set_from_cc_edge_list(cc_edge_list)
			D = j_instance.py_shortest_distance(vertex_set,cc_edge_list)
			recurrence_array = j_instance.py_recurrence_list()	#MUST be after py_shortest_distance()
			cc_edge_list.sort()	#10-28-05 to ease codense2db.py	#01-01-06
			output_row = [vertex_set, cc_edge_list, recurrence_array, D]	#10-28-05, #01-01-06
			result.append(output_row)
		return result
def outputCcFromEdgeList(of, signature, edge_list, codense2db_instance, min_cluster_size, no_cc):
	"""
	08-07-05
	08-09-05
		calculate recurrence array for codense2db.py
	12-31-05
		remove several time-consuming steps, but vertex_set and cc_edge_list are not sorted anymore
		no recurrence_array
		cc_edge_list is tuple-list
	01-07-06
		add min_cluster_size
	"""
	if no_cc:
		vertex_set = codense2db_instance.vertex_set_from_cc_edge_list(edge_list)
		if len(vertex_set)>=min_cluster_size:
			vertex_set.sort()
			#combined_vector = get_combined_vector(edge_list)
			#recurrence_array = codense2db_instance.parse_recurrence(combined_vector)
			of.write('%s\t%s\n'%(repr(vertex_set), repr(edge_list)) )
	else:
		cf_instance = cc_from_edge_list()
		cf_instance.run(edge_list)
		cc_list = cf_instance.cc_list
		for cc_edge_list in cc_list:
			vertex_set = codense2db_instance.vertex_set_from_cc_edge_list(cc_edge_list)
			if len(vertex_set)>=min_cluster_size:
				vertex_set.sort()
				"""
				#12-31-05	each edge in cc_edge_list is already sorted
				cc_edge_list = map(list, cc_edge_list)  #change the tuple type to list
				for i in range(len(cc_edge_list)):
					cc_edge_list[i].sort()  #sort it
				"""
				cc_edge_list.sort()
				#combined_vector = get_combined_vector(cc_edge_list)
				#recurrence_array = codense2db_instance.parse_recurrence(combined_vector)
				of.write('%s\t%s\n'%(repr(vertex_set), repr(cc_edge_list) ) )