Exemplo n.º 1
0
	def run(self):
		"""
		03-30-05
		
		06-30-05
			more complex data grouping via which_column_list and group_size_list
			if both lists are of length 2, 2-level grouping.
			
		--db_connect()
		--get_go_no2depth()
		--data_fetch()
		--group_data()
		if self.stat_table_fname:
			--prediction_space_output()
		"""
		self.init()
		(conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
		from codense.common import  get_go_no2depth
		self.go_no2depth = get_go_no2depth(curs)
		
		self.data_fetch(curs, self.table, self.mcl_table, self.gene_table)
		local_prediction_space2attr = self.group_data(self.prediction_data,key_column=self.which_column_list[0], group_size=self.group_size_list[0])
		for key, unit in local_prediction_space2attr.iteritems():
			if len(self.which_column_list)>1 and len(self.group_size_list)>1:
				local_prediction_space2attr_2 = self.group_data(unit, key_column=self.which_column_list[1], group_size=self.group_size_list[1])
				for key2, unit2 in local_prediction_space2attr_2.iteritems():
					self.prediction_space2attr[(key,key2)] = unit2
			else:
				self.prediction_space2attr[(key,)] = unit
		stat_table_f = open(self.stat_table_fname, 'w')
		self.prediction_space_output(stat_table_f, self.prediction_space2attr)
Exemplo n.º 2
0
	def dstruc_loadin(self, curs):
		"""
		03-14-05
			remove the distance loading part
		"""
		sys.stderr.write("Loading Data STructure...\n")
		from codense.common import get_known_genes_dict, get_go_no2go_id,\
			get_go_no2term_id, get_go_no2depth, get_go_term_id2go_no, \
			get_go_term_id2depth
		
		self.known_genes_dict = get_known_genes_dict(curs)
		self.go_no2go_id = get_go_no2go_id(curs)
		self.go_no2term_id = get_go_no2term_id(curs)
		self.go_no2depth = get_go_no2depth(curs)
		self.go_term_id2go_no = get_go_term_id2go_no(curs)
		self.go_term_id2depth = get_go_term_id2depth(curs)
		
		sys.stderr.write("Done\n")
Exemplo n.º 3
0
	def prepare_gene_no2go_no(self, curs):
		"""
		04-15-05
			different from get_gene_no2go_no, the value is a set.
		04-27-05
			only depth ==5
		"""
		sys.stderr.write("Preparing gene_no2go_no...")
		#from codense.common import get_gene_no2go_no, get_go_no2depth
		go_no2depth = get_go_no2depth(curs)
		gene_no2go_no = get_gene_no2go_no(curs)
		gene_no2go_no_set = {}
		for gene_no,go_no_list in gene_no2go_no.iteritems():
			gene_no2go_no_set[gene_no] = Set()
			for go_no in go_no_list:
				if go_no2depth[go_no] == 5:
					gene_no2go_no_set[gene_no].add(go_no)
		sys.stderr.write("Done.\n")
		return gene_no2go_no_set
Exemplo n.º 4
0
    def get_gene_no2go_no_list(self, curs, depth=5):
        """
		04-03-05
			get the mapping between gene_no and its associated functions
			given the depth.(unknown's depth is 2, so all these genes are known genes.
			the fact is used in _connectivity2homogeneity())
		"""
        from codense.common import get_go_no2depth

        go_no2depth = get_go_no2depth(curs)
        if self.debug:
            print "length of go_no2depth is %s" % len(go_no2depth)

            # codes below similar to get_gene_no2go_no of codense.common, but different.
        sys.stderr.write("Getting gene_no2go_no (go_no depth:%s) ..." % depth)
        gene_no2go_no = {}
        curs.execute("select gene_no,go_functions from gene")
        rows = curs.fetchall()
        for row in rows:
            go_functions_list = row[1][1:-1].split(",")
            # don't forget to transform the data type to integer.
            go_functions_list = map(int, go_functions_list)
            if self.debug:
                print "gene is %s" % row[0]
                print "go_functions_list is %s" % row[1]
            for go_no in go_functions_list:
                go_no_depth = go_no2depth.get(go_no)
                if self.debug:
                    print "go_no %s depth: %s" % (go_no, go_no_depth)
                    raw_input("pause:")
                if go_no_depth == depth:
                    if row[0] not in gene_no2go_no:
                        gene_no2go_no[row[0]] = []
                    gene_no2go_no[row[0]].append(go_no)
        sys.stderr.write("Done\n")
        return gene_no2go_no
Exemplo n.º 5
0
	def run(self):
		"""
		09-05-05
		10-23-05
			create views from old schema
			result goes to the new schema's p_gene_table
		
			(input_node)
				--db_connect()
				--form_schema_tables()
				--form_schema_tables()
				--get_gene_no2go_no_set()
				--get_go_no2depth()
				(pass data to computing_node)
			(computing_node)
				(take data from other nodes, 0 and size-1)
			(judge_node)
				--gene_stat()
				--db_connect()
				--gene_p_map_redundancy()
			(output_node)
				--db_connect()
				--form_schema_tables()
				--form_schema_tables()
				--MpiPredictionFilter()
				--MpiPredictionFilter_instance.createGeneTable()
				--get_go_no2edge_counter_list()(if necessary)
				(pass go_no2edge_counter_list to computing_node)
			
			(input_node)
				--fetch_cluster_block()
			(computing_node)
				--get_no_of_unknown_genes()
				--node_fire_handler()
				--cleanup_handler()
			--judge_node()
				--gene_stat_instance.(match functions)
			--output_node()
				--output_node_handler()
					--MpiPredictionFilter_instance.submit_to_p_gene_table()
		"""
		communicator = MPI.world.duplicate()
		node_rank = communicator.rank
		if node_rank == 0:
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			"""
			#01-02-06
			old_schema_instance = form_schema_tables(self.input_fname)
			new_schema_instance = form_schema_tables(self.jnput_fname)
			"""
			gene_no2go_no = get_gene_no2go_no_set(curs)
			gene_no2go_no_pickle = cPickle.dumps(gene_no2go_no, -1)	#-1 means use the highest protocol
			go_no2depth = get_go_no2depth(curs)
			go_no2depth_pickle = cPickle.dumps(go_no2depth, -1)
			go_no2gene_no_set = get_go_no2gene_no_set(curs)
			go_no2gene_no_set_pickle = cPickle.dumps(go_no2gene_no_set, -1)
			for node in range(1, communicator.size-2):	#send it to the computing_node
				communicator.send(gene_no2go_no_pickle, node, 0)
				communicator.send(go_no2depth_pickle, node, 0)
				communicator.send(go_no2gene_no_set_pickle, node, 0)
		elif node_rank<=communicator.size-3:	#WATCH: last 2 nodes are not here.
			data, source, tag = communicator.receiveString(0, 0)
			gene_no2go_no = cPickle.loads(data)	#take the data
			data, source, tag = communicator.receiveString(0, 0)
			go_no2depth = cPickle.loads(data)
			data, source, tag = communicator.receiveString(0, 0)
			go_no2gene_no_set = cPickle.loads(data)
			data, source, tag = communicator.receiveString(communicator.size-1, 0)	#from the last node
			go_no2edge_counter_list = cPickle.loads(data)
			#choose a functor for recurrence_array
			functor_dict = {0: None,
				1: lambda x: int(x>=self.recurrence_x),
				2: lambda x: math.pow(x, self.recurrence_x)}
			functor = functor_dict[self.recurrence_x_type]
		elif node_rank == communicator.size-2:	#judge node
			gene_stat_instance = gene_stat(depth_cut_off=self.depth)
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			gene_stat_instance.dstruc_loadin(curs)
			from gene_p_map_redundancy import gene_p_map_redundancy
			node_distance_class = gene_p_map_redundancy()			
		elif node_rank==communicator.size-1:	#establish connection before pursuing
			(conn, curs) =  db_connect(self.hostname, self.dbname, self.schema)
			"""
			#01-02-06, input and output are all directed to files
			old_schema_instance = form_schema_tables(self.input_fname)
			new_schema_instance = form_schema_tables(self.jnput_fname)
			MpiPredictionFilter_instance = MpiPredictionFilter()
			MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.splat_table, new_schema_instance.splat_table)
			MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.mcl_table, new_schema_instance.mcl_table)
			MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.pattern_table, new_schema_instance.pattern_table)
			if self.new_table:
				MpiPredictionFilter_instance.createGeneTable(curs, new_schema_instance.p_gene_table)
			"""
			if self.go_no2edge_counter_list_fname:
				go_no2edge_counter_list = cPickle.load(open(self.go_no2edge_counter_list_fname,'r'))
			else:
				if self.eg_d_type==2:
					go_no2edge_counter_list = None
				else:
					gene_no2go_no = get_gene_no2go_no_set(curs)
					go_no2edge_counter_list = get_go_no2edge_counter_list(curs, gene_no2go_no, self.edge_type2index)
			go_no2edge_counter_list_pickle = cPickle.dumps(go_no2edge_counter_list, -1)
			for node in range(1, communicator.size-2):	#send it to the computing_node
				communicator.send(go_no2edge_counter_list_pickle, node, 0)
		
		mpi_synchronize(communicator)
		
		free_computing_nodes = range(1,communicator.size-2)	#exclude the last node
		if node_rank == 0:
			"""
			curs.execute("DECLARE crs CURSOR FOR SELECT id, vertex_set, edge_set, no_of_edges,\
			connectivity, unknown_gene_ratio, recurrence_array, d_matrix from %s"%(old_schema_instance.pattern_table))
			"""
			self.counter = 0	#01-02-06 counter is used as id
			reader = csv.reader(open(self.input_fname, 'r'), delimiter='\t')
			parameter_list = [reader]
			input_node(communicator, parameter_list, free_computing_nodes, self.message_size, \
				self.report, input_handler=self.input_handler)
			del reader
		elif node_rank in free_computing_nodes:
			no_of_unknown_genes = get_no_of_unknown_genes(gene_no2go_no)
			GradientScorePrediction_instance = GradientScorePrediction(gene_no2go_no, go_no2gene_no_set, go_no2depth, \
				go_no2edge_counter_list, no_of_unknown_genes, self.depth, self.min_layer1_associated_genes, \
				self.min_layer1_ratio, self.min_layer2_associated_genes, self.min_layer2_ratio, self.exponent, \
				self.score_list, self.max_layer, self.norm_exp, self.eg_d_type, self.debug)
			parameter_list = [GradientScorePrediction_instance, functor]
			computing_node(communicator, parameter_list, self.node_fire_handler, self.cleanup_handler, self.report)
		elif node_rank == communicator.size-2:
			self.judge_node(communicator, curs, gene_stat_instance, node_distance_class)
		elif node_rank==communicator.size-1:
			#01-02-06 output goes to plain file, not database
			writer = csv.writer(open(self.jnput_fname, 'w'), delimiter='\t')
			parameter_list = [writer]
			output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report)
			del writer