def distribute_jobs(self, communicator, no_of_clusters, node_function, netmine2nd_parameter_list): """ 04-09-05 input: rank_range, no_of_clusters output: node_rank2cluster_no distribute jobs among the nodes in the rank_range based on no_of_clusters 05-19-05 (rewritten) """ job_list = range(no_of_clusters) if self.debug: sys.stderr.write( "The common parameter_list of netmine2nd is %s.\n" % repr(" ".join(netmine2nd_parameter_list)) ) of_name_list = mpi_schedule_jobs(communicator, job_list, node_function, netmine2nd_parameter_list, self.debug) return of_name_list """
def run(self): """ 06-03-05 --<get_edge_data> --mpi_schedule_jobs() --callTightClust() --<PreprocessEdgeData> --tightClust --<netmine_wrapper> """ communicator = MPI.world.duplicate() get_edge_data_instance = get_edge_data(self.hostname, self.dbname, self.schema,\ self.table, self.output_dir, self.min_no_of_edges, self.debug, self.no_of_nas) if communicator.rank == 0: sys.stderr.write("this is node %s\n"%communicator.rank) get_edge_data_instance.run() mpi_synchronize(communicator) job_list = get_edge_data_instance.go_no_qualified parameter_list =[self.output_dir, self.no_of_nas, self.top_percentage, self.targetClustNum, \ self.min_k, self.max_k, self.alpha, self.beta, self.topNum, self.seqNum, self.resampNum,\ self.subSampPercent, self.npass] if self.debug: sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list)) of_name_list = mpi_schedule_jobs(communicator, job_list, callTightClust, parameter_list, self.debug) mpi_synchronize(communicator) #collecting if communicator.rank==0: final_ofname = os.path.join(self.output_dir, 'tightClust') netmine_wrapper_instance = netmine_wrapper() netmine_wrapper_instance.collect_and_merge_output(of_name_list, final_ofname)
def run(self): """ 08-14-05 """ communicator = MPI.world.duplicate() fake_no_of_nodes = int((communicator.size-1)*times_nodes) #NOTICE: fake_no_of_nodes is used to enlarge(or shrink) the actual number of nodes, #to balance the amount of work on each node OffsetLimitList = Numeric.zeros((fake_no_of_nodes,2), Numeric.Int) if communicator.rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) OffsetLimitList = self.createOffsetLimitList(curs, self.source_table, fake_no_of_nodes) OffsetLimitList = Numeric.array(OffsetLimitList, Numeric.Int) #transform it into Numeric array to broadcast() if self.commit: #08-14-05 create the gene_table instance = gene_stat() instance.createGeneTable(curs, self.gene_table) curs.execute('end') if self.debug: sys.stderr.write("OffsetLimitList: %s"%repr(OffsetLimitList)) del conn, curs communicator.broadcast(OffsetLimitList, 0) #share the OffsetLimitList mpi_synchronize(communicator) job_list = range(len(OffsetLimitList)) #corresponding to the indices in the OffsetLimitList parameter_list =[self.hostname, self.dbname, self.schema, self.source_table, self.output, \ self.gene_table, self.commit, OffsetLimitList, self.debug] if self.debug: sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list)) of_name_list = mpi_schedule_jobs(communicator, job_list, node_cluster_stat, parameter_list, self.debug) mpi_synchronize(communicator) #collecting 08-14-05 not really necessary, but just to make the number of files small if communicator.rank==0: netmine_wrapper_instance = netmine_wrapper() netmine_wrapper_instance.collect_and_merge_output(of_name_list, self.output)