Пример #1
0
    def distribute_jobs(self, communicator, no_of_clusters, node_function, netmine2nd_parameter_list):
        """
		04-09-05
			input: rank_range, no_of_clusters
			output: node_rank2cluster_no
			
			distribute jobs among the nodes in the rank_range based on no_of_clusters
		05-19-05
			(rewritten)
		"""
        job_list = range(no_of_clusters)
        if self.debug:
            sys.stderr.write(
                "The common parameter_list of netmine2nd is %s.\n" % repr(" ".join(netmine2nd_parameter_list))
            )
        of_name_list = mpi_schedule_jobs(communicator, job_list, node_function, netmine2nd_parameter_list, self.debug)
        return of_name_list
        """
Пример #2
0
	def run(self):
		"""
		06-03-05
			
			--<get_edge_data>
			
			--mpi_schedule_jobs()
				--callTightClust()
					--<PreprocessEdgeData>
					--tightClust
			--<netmine_wrapper>
			
		"""
		communicator = MPI.world.duplicate()
		get_edge_data_instance = get_edge_data(self.hostname, self.dbname, self.schema,\
				self.table, self.output_dir, self.min_no_of_edges, self.debug, self.no_of_nas)
		
		if communicator.rank == 0:
			sys.stderr.write("this is node %s\n"%communicator.rank)
			get_edge_data_instance.run()
		
		mpi_synchronize(communicator)
		
		job_list = get_edge_data_instance.go_no_qualified
		parameter_list =[self.output_dir, self.no_of_nas, self.top_percentage, self.targetClustNum, \
			self.min_k, self.max_k, self.alpha, self.beta, self.topNum, self.seqNum, self.resampNum,\
			self.subSampPercent, self.npass]
		if self.debug:
			sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list))
		of_name_list = mpi_schedule_jobs(communicator, job_list, callTightClust, parameter_list, self.debug)
		
		mpi_synchronize(communicator)
		
		#collecting
		if communicator.rank==0:
			final_ofname = os.path.join(self.output_dir, 'tightClust')
			netmine_wrapper_instance = netmine_wrapper()
			netmine_wrapper_instance.collect_and_merge_output(of_name_list, final_ofname)
Пример #3
0
	def run(self):
		"""
		08-14-05
		"""
		communicator = MPI.world.duplicate()
		fake_no_of_nodes = int((communicator.size-1)*times_nodes)	#NOTICE: fake_no_of_nodes is used to enlarge(or shrink) the actual number of nodes,
			#to balance the amount of work on each node
		OffsetLimitList = Numeric.zeros((fake_no_of_nodes,2), Numeric.Int)
		if communicator.rank == 0:
			(conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
			OffsetLimitList = self.createOffsetLimitList(curs, self.source_table, fake_no_of_nodes)
			OffsetLimitList = Numeric.array(OffsetLimitList, Numeric.Int)	#transform it into Numeric array to broadcast()
			if self.commit:	#08-14-05	create the gene_table
				instance = gene_stat()
				instance.createGeneTable(curs, self.gene_table)
				curs.execute('end')
			if self.debug:
				sys.stderr.write("OffsetLimitList: %s"%repr(OffsetLimitList))
			del conn, curs
		
		communicator.broadcast(OffsetLimitList, 0)	#share the OffsetLimitList
		
		mpi_synchronize(communicator)
		job_list = range(len(OffsetLimitList))	#corresponding to the indices in the OffsetLimitList
		parameter_list =[self.hostname, self.dbname, self.schema, self.source_table, self.output, \
			self.gene_table, self.commit, OffsetLimitList, self.debug]
		if self.debug:
			sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list))
		of_name_list = mpi_schedule_jobs(communicator, job_list, node_cluster_stat, parameter_list, self.debug)
		
		mpi_synchronize(communicator)
		
		#collecting 08-14-05 not really necessary, but just to make the number of files small
		if communicator.rank==0:
			netmine_wrapper_instance = netmine_wrapper()
			netmine_wrapper_instance.collect_and_merge_output(of_name_list, self.output)