def _renew_pp_server(self, servers = None, ncpus = None): ''' @summary: renews the pp server @param servers: tuple of all parallel python servers to connect with, example ("*",) = auto-discover, ("10.0.0.1","10.0.0.2") # list of static IPs @param ncpus: number of CPUs to reserve ''' try: self.job_server.destroy() except: pass if servers == "serial": self.job_server = None self.number_of_cores = None self.parallelmode = False else: import pp if servers == "auto": servers = None if (servers == None) and (ncpus == None): self.job_server = pp.Server() elif (servers == None) and (ncpus != None): self.job_server = pp.Server(ncpus = ncpus) elif (servers!= None) and (ncpus == None): self.job_server = pp.Server(ppservers=servers) elif (servers!= None) and (ncpus != None): self.job_server = pp.Server(ppservers=servers, ncpus = ncpus) self.number_of_cores = self.job_server.get_ncpus() self.parallelmode = True print "Parallel Python option started with", str(self.number_of_cores), "cores"
def communities(self, parallel): print "Training classifiers..." # multiprocessing.freeze_support() # cores=mp.cpu_count() # cores = multiprocessing.cpu_count() # pool = mp.ProcessingPool(4) # subgraphs={} classifiers = {} deg_centra = {} between_centra = {} load_centra = {} avg_nei_deg = {} harmonic_centra = {} close_centra = {} # score={} # for i,y in enumerate(pool.imap(self.get_classifiers,self.comm2node.keys())): # print i # subgraph=y[0] # classifiers[i]=y[1] # deg_centra[i],between_centra[i],load_centra[i],avg_nei_deg[i],harmonic_centra[i],close_centra[i]=self.attributes(y[0]) # score=dict(score,**y[2]) if parallel == False: start_time = time.time() for comm in self.comm2node.keys(): print comm # nodes=self.comm2node[comm] subgraph = self.graph.subgraph(self.comm2node[comm]) # subgraphs[comm]=subgraph classifiers[comm] = Predictor.training(subgraph) deg_centra[comm], between_centra[comm], load_centra[ comm], avg_nei_deg[comm], harmonic_centra[ comm], close_centra[comm] = self.attributes(subgraph) print 'non-parallel:', time.time() - start_time, 's' else: ppservers = () if len(sys.argv) > 1: ncpus = int(sys.argv[1]) # Creates jobserver with ncpus workers job_server = pp.Server(ncpus, ppservers=ppservers) else: # Creates jobserver with automatically detected number of workers job_server = pp.Server(ppservers=ppservers) print "pp 可以用的工作核心线程数", job_server.get_ncpus(), "workers" # comms=list(self.comm2node.keys()) start_time = time.time() jobs = [(comm, job_server.submit(self.get_classifiers, (comm, ), (), ("Predictor", ))) for comm in self.comm2node.keys()] # print "yes" for comm, job in jobs: print comm classifiers[comm] = job()[1] deg_centra[comm],between_centra[comm],load_centra[comm],\ avg_nei_deg[comm],harmonic_centra[comm],close_centra[comm]=self.attributes(job()[0]) print 'parallel:', time.time() - start_time, 's' return classifiers, deg_centra, between_centra, load_centra, avg_nei_deg, harmonic_centra, close_centra
def NCVtrain(args): ### input M_files = [ os.path.abspath(line.strip()) for line in open(args.male) if line.strip() != "" ] F_files = [ os.path.abspath(line.strip()) for line in open(args.female) if line.strip() != "" ] job_server = pp.Server(min(6, args.cpus), ppservers=("*:3456", )) M_counts = tqdm_pp_jobs([ job_server.submit(get_readcount_table, (f, ), modules=("pysam", )) for f in M_files ], desc="Male train samples") F_counts = tqdm_pp_jobs([ job_server.submit(get_readcount_table, (f, ), modules=("pysam", )) for f in F_files ], desc="Female train samples") chroms = ["chr" + str(i) for i in range(1, 23)] + ["chrX", "chrY"] df = pd.DataFrame( { chr: [sum(counts[chr]) for counts in M_counts + F_counts] for chr in chroms }, index=M_files + F_files, dtype="float") df['Gender'] = ["M"] * len(M_counts) + ["F"] * len(F_counts) ### train reference chromosome set inputs = [("chr" + str(i), df) for i in range(1, 23)] inputs += [("chrX", df[df.Gender == "F"]), ("chrY", df[df.Gender == "F"])] job_server = pp.Server(args.cpus, ppservers=("*:3456", )) refset = tqdm_pp_jobs([ job_server.submit(optimize, i, modules=("numpy", "pandas")) for i in inputs ], desc="Enumerating reference set") ### calculate mean/std for each bin bin_mean_std = {} pickle.dump((refset, bin_mean_std), open(args.model, "w")) for chr, comb, _, _ in tqdm(refset, desc="Bin training"): bin_num = [len(counts[chr]) for counts in M_counts + F_counts] if min(bin_num) != max(bin_num): raise Exception("BAMs from different ref genome") bin_mean_std[chr] = [] for i in range(bin_num[0]): bin_count = [ counts[chr][i] for counts in ( F_counts if chr in ['chrX', 'chrY'] else M_counts + F_counts) ] ref_count = df[df.Gender == "F"] if chr in ['chrX', 'chrY'] else df bin_ratio = bin_count / ref_count[comb].sum(axis=1) bin_mean_std[chr] += [(bin_ratio.mean(), bin_ratio.std())] pickle.dump((refset, bin_mean_std), open(args.model, "w"))
def cerenkov_ml(task_table, method_table, fold_table, hp_table, data_table, fold_assign_method, ncpus, feature_reduced): if fold_assign_method == "SNP": task_table["auroc"] = -1.0 task_table["aupvr"] = -1.0 else: task_table["avgrank"] = -1.0 jobs = [] result = [] ppservers = () if ncpus == -1: job_server = pp.Server(ppservers=ppservers) print "Starting with ", job_server.get_ncpus(), "CPUs" else: job_server = pp.Server(ncpus, ppservers=ppservers) print "Starting with ", job_server.get_ncpus(), "CPUs" for task_no, task in task_table.iterrows(): method = task["method"] method_name = method.__name__ hp_ind = task["hp"] hp = hp_table[method_name].ix[hp_ind, "hp"] # time: 0.0004 data_ind = task["data"] # time: 3.0e-5 data = data_table[data_ind] # time: 9.0e-7 fold_ind = task["fold"] # time: 2.0e-5 fold = fold_table.loc[[fold_ind]] # time: 6.0e-4 args = (data, hp, fold, fold_assign_method, task_no) # time: 5.0e-6 jobs.append( job_server.submit(method, args, modules=("time", "pandas", "numpy", "xgboost", "sklearn.ensemble"))) # time: 0.08 # job_server.submit(method, args, modules=("time", "pandas", "numpy", "xgboost", "sklearn.ensemble")) # time: 0.15 print "[INFO] a job submitted!", "task_no", task_no s_time = time.time() for j in jobs: result = j() if fold_assign_method == "SNP": result_task_no = result["task_no"] task_table.ix[result_task_no, "auroc"] = result["auroc"] task_table.ix[result_task_no, "aupvr"] = result["aupvr"] else: result_task_no = result["task_no"] task_table.ix[result_task_no, "avgrank"] = result["avgrank"] job_server.print_stats() job_server.destroy() pickle.dump(task_table, open("task_table.p", "wb")) print "[INFO] dump to task_table.p!" return task_table
def optimalBandwidthSelection(Y, X, *args): ''' for a more reasonable search grid that is robust to the outliers use log than transform back to level ''' # add minor value to enable if len(args) == 0: logX = numpy.log(X - X.min() + 0.00001) hVec = numpy.array(numpy.linspace(logX.min(), logX.max(), 52)) # cut off the first couple, which will be too small anyway hVec = numpy.delete(hVec, [0, 1, 2, 3, 51], 0) hVec = numpy.exp(hVec) elif len(args) == 1: hVec = args[0] else: raise TypeError('Error: hVec format is wrong.') numH = hVec.shape[0] rHatVecPP = numpy.zeros((numH, 1)) import pp, sys # tuple of all parallel python servers to connect with ppservers = () if len(sys.argv) > 1: ncpus = int(sys.argv[1]) # Creates jobserver with ncpus workers job_server = pp.Server(ncpus, ppservers=ppservers) else: # Creates jobserver with automatically detected number of workers job_server = pp.Server(ppservers=ppservers) # see what comes out jobs = [(i, job_server.submit(LOORiskEstFast, (Y, X, hVec[i]), (polynomialFit, ), ( "numpy", "kernel", ))) for i in range(numH)] for i, job in jobs: result = job() rHatVecPP[i] = result job_server.destroy() #start_time = time.time() #for iH in range(numH): # rHatVecNP[iH] = LOORiskEstFast(Y,X,hVec[iH]) #print "NP: Time elapsed ", time.time()-start_time # now get the optimalIdx = numpy.argmin(rHatVecPP) return hVec[optimalIdx]
def __initializeRay(self): """ Internal method that is aimed to initialize the internal parallel system. It initilizes the RAY implementation (with socketing system) in case RAVEN is run in a cluster with multiple nodes or the NumMPI > 1, otherwise multi-threading is used. @ In, None @ Out, None """ ## set up enviroment os.environ['PYTHONPATH'] = os.pathsep.join(sys.path) ## Check if the list of unique nodes is present and, in case, initialize the servers = None if self.runInfoDict['internalParallel']: if len(self.runInfoDict['Nodes']) > 0: availableNodes = [ nodeId.strip() for nodeId in self.runInfoDict['Nodes'] ] ## identify the local host name and get the number of local processors localHostName = self.__getLocalHost() self.raiseADebug("Local host name is : ", localHostName) nProcsHead = availableNodes.count(localHostName) self.raiseADebug("# of local procs : ", str(nProcsHead)) ## initialize ray server with nProcs self.rayServer = ray.init( num_cpus=int(nProcsHead)) if _rayAvail else pp.Server( ncpus=int(nProcsHead)) ## Get localHost and servers servers = self.__runRemoteListeningSockets( self.rayServer['redis_address']) else: self.rayServer = ray.init(num_cpus=int(self.runInfoDict['totalNumCoresUsed'])) if _rayAvail else \ pp.Server(ncpus=int(self.runInfoDict['totalNumCoresUsed'])) if _rayAvail: self.raiseADebug("Head node IP address: ", self.rayServer['node_ip_address']) self.raiseADebug("Redis address : ", self.rayServer['redis_address']) self.raiseADebug("Object store address: ", self.rayServer['object_store_address']) self.raiseADebug("Raylet socket name : ", self.rayServer['raylet_socket_name']) self.raiseADebug("Session directory : ", self.rayServer['session_dir']) if servers: self.raiseADebug("# of remote servers : ", str(len(servers))) self.raiseADebug("Remote servers : ", " , ".join(servers)) else: ## We are just using threading self.rayServer = None self.isRayInitialized = True
def main(): #tuple of all parallel python servers to connect with ppservers = () #ppservers = ("10.0.0.1",) if len(sys.argv) > 1: ncpus = int(sys.argv[1]) folder_number = int(sys.argv[2]) # Creates jobserver with ncpus workers job_server = pp.Server(ncpus, ppservers=ppservers) else: # Creates jobserver with automatically detected number of workers job_server = pp.Server(ppservers=ppservers) print "Starting pp with", job_server.get_ncpus(), "workers" print "Folder Number == ", folder_number folders = range(0, folder_number + 26, 25) intervales = [] for i in range(len(folders) - 1): intervales.append((folders[i], folders[i + 1])) print intervales #Parallel evolution for every lamda value print "Start running jobs" jobs = [(interval, job_server.submit(get_min_generation, (interval, ), modules=("numpy", "pandas", "os"))) for interval in intervales] #archive_strc = numpy.zeros((24,4)) archive_strc = [] for folder, job in jobs: res = job() print folder, "===", res archive_strc.append(res) archive_strc = numpy.concatenate(archive_strc) print archive_strc.shape print " ", [ "|| N ||", "|| F ||", "||S||", "||FREQ||" ] print "The average number of generations is ", numpy.mean(archive_strc, axis=0) print "The median number of generations is ", numpy.median(archive_strc, axis=0) print "Number of success === ", [ len(success[success < 100]) for success in archive_strc.T ] print "Number of failures === ", [ len(success[success == 100]) for success in archive_strc.T ]
def start(): DateinOutput = datetime.datetime.now().strftime("%Y-%m-%d") if platform.system() == 'Windows': file = '..\\..\\data\\rain\\5-9_2017.csv' pathIR8 = '..\\..\\data\\irdata\\ir08nc\\' pathIR13 = '..\\..\\data\\irdata\\ir13nc\\' pathIR15 = '..\\..\\data\\irdata\\ir15nc\\' outputPath = '..\\..\\output_{0}\\'.format(DateinOutput) else: file = '/data/rain/8-9_2017.csv' pathIR8 = '/data/dl_hackathon_data_2/ir08nc/' pathIR13 = '/data/dl_hackathon_data_2/ir13nc/' pathIR15 = '/data/dl_hackathon_data_2/ir15nc/' outputPath = '/home/team7/hackathon/Test' if os.path.isdir(outputPath) == False: os.mkdir(outputPath) header = ['Date', 'Lat', 'Long', 'Rain'] df = pandas.read_csv(file, names=header) date = pandas.unique(df['Date']) ####################################### Parallel #################################################### ppservers = () if len(sys.argv) > 1: ncpus = int(sys.argv[1]) # Creates jobserver with ncpus workers job_server = pp.Server(ncpus, ppservers=ppservers) else: # Creates jobserver with automatically detected number of workers job_server = pp.Server(ppservers=ppservers) print("Starting pp with", job_server.get_ncpus(), "workers") ################################################################################################# for dt in date: IR8_check = checkIR8(dt, pathIR8) IR13_check = checkIR13(dt, pathIR13) IR15_check = checkIR15(dt, pathIR15) if IR15_check != None and IR13_check != None and IR8_check != None: print(dt) job_server.submit(ReadFile, ( dt, df, IR8_check, IR13_check, IR15_check, outputPath, ), (WriteToCSV, ), ("os", "netCDF4", 'pandas', 'numpy', 'datetime'))
def create_job_server(simulatedPP=True, serverList=defaultServerList, secret=defaultSecret): #create a true or simulated job server #a useful trick for anoymous classes, used when no job servers are available: #http://norvig.com/python-iaq.html class Struct: def __init__(self, **entries): self.__dict__.update(entries) if simulatedPP: job_server = Struct() job_server.submit = lambda func,args=tuple(),depfuncs=tuple(),modules=tuple(),globals=tuple():lambda :func(*args) #creates a lambda fn which takes no arguments and returns the evaluated number job_server.destroy = lambda : '' job_server.get_ncpus = lambda : -10 job_server.get_active_nodes = lambda : {} job_server.print_stats = lambda : '' job_server.secret = 'epo20pdosl;dksldkmm' job_server.ppservers = [('simulated', 60000)] job_server.simulated = True job_server.wait = lambda : '' print 'Created a SIMULATED job server.' print else: print 'Server list: \n' + str(serverList) job_server = pp.Server(ppservers=tuple(serverList), loglevel=pp.logging.DEBUG, restart=True, secret=secret) job_server.set_ncpus(0) #making all jobs remote removes unexpected errors time.sleep(1) print 'Active nodes: \n' + str(job_server.get_active_nodes()) return job_server
def parFeatureExtraction(dataset_location,pathLength): t0=time.time() path =dataset_location+'/*.wav' files=glob.glob(path) auList=list() for file in files: auList.append(file) jobs=[] with open('/home/ubantu/TwoClassfeatureSet.csv', 'w') as csvfile: fieldnames = ['Spect Centroid', 'Spect Rolloff','Spect Flux','RMS','ZCR','SC_SD','SR_SD','SF_SD','ZCR_SD','energy',\ 'MFCC1','MFCC2','MFCC3','MFCC4','MFCC5','MFCC6','MFCC7','MFCC8','MFCC9','MFCC10','MFCC11','MFCC12','MFCC13',\ 'CLASS'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() job_server = pp.Server() fileIndex=[] jobs = [(file, job_server.submit(extractFeature,(file,pathLength),\ (ClassToInt,spectral_centroid,spectral_rolloff,spectral_flux,\ root_mean_square,zero_crossing_rate,mfcc,stEnergy,stft,trfbank,hz2mel,mel2hz,preemp,segment_axis),\ ("numpy","wavfile","loadmat","lfilter","hamming","fft","dct",))) for file in auList] for input,job in jobs: #print "INPUT= ",input," JOB= ",job() fileIndex.append(input) result=job() s1=result[0] sr1=result[1] sf1=result[2] rms=result[3] zcr=result[4] MFCC_res=result[5] #MFCCs=MFCC_res[0] #rms= rms[~numpy.isnan(rms)] #rms array contains NAN values and we have to remove these values MFCC_coef=list() #TEMP COMMENT ran=MFCC_res.shape #print "RAANNNN===",ran #for ind in range(len(MFCCs)): # MFCCs[ind][MFCCs[ind] == -inf] = 0 ran1=ran[0] for ind1 in range(13): #TEMP COMMENT sum=0 for ind in range(ran1): sum+=MFCC_res[ind,ind1] MFCC_coef.append(sum/ran1) #TEMP COMMENT eng=result[6] intClass=result[7] #TEMP COMMENT #print result,"<===JOB" writer.writerow({'Spect Centroid':s1.mean().astype(float), 'Spect Rolloff':sr1.mean().astype(float),'Spect Flux':sf1.mean().astype(float),'RMS':rms.mean().astype(float),'ZCR':zcr.mean().astype(float),\ 'SC_SD':s1.std().astype(float),'SR_SD':sr1.std().astype(float),'SF_SD':sf1.std().astype(float),'ZCR_SD':zcr.std().astype(float),'energy':eng.astype(float),\ 'MFCC1':MFCC_coef[0],'MFCC2':MFCC_coef[1],'MFCC3':MFCC_coef[2],'MFCC4':MFCC_coef[3],\ 'MFCC5':MFCC_coef[4],'MFCC6':MFCC_coef[5],'MFCC7':MFCC_coef[6],'MFCC8':MFCC_coef[7],\ 'MFCC9':MFCC_coef[8],'MFCC10':MFCC_coef[9],'MFCC11':MFCC_coef[10],'MFCC12':MFCC_coef[11],\ 'MFCC13':MFCC_coef[12],'CLASS':intClass}) print "feature extraction done in= ",time.time()-t0 #joinCSVs(auList,pathLength) print job_server.print_stats() print "=======================================END"
def fit(self, X, Y): """To train with the data provided as ndarrays :param X: Training features :param Y: Labels :returns: Trained model. """ # for i in xrange(len(self.treeModels)): # self.treeModels[i].fit(X,Y, self.splitCriterion, self.weighting) server = pp.Server() jobqueue = deque() for i in xrange(len(self.treeModels)): #jobqueue.append(server.submit(self.treeModels[i].fit, (X[reld],Y[reld],self.splitCriterion,self.weighting), modules = ('paralleltrees','copy','numpy','random'))) jobqueue.append( server.submit(self.treeModels[i].fit, (X, Y, self.splitCriterion, self.weighting), modules=('paralleltrees', 'copy', 'numpy', 'random'))) print "This should be parallel now, check CPU usage in task manager, should be >25%" for i in xrange(len(self.treeModels)): self.treeModels[i] = copy.deepcopy(jobqueue[0]()) jobqueue.popleft() return
def ZHfucplbias(sig_arr, detc_arr): deriv_step = 0.0001 step_arr = deriv_step * np.ones(len(detc_arr)) # Liu deriv_cof = np.array([detc_arr+2.0*step_arr, detc_arr+step_arr, detc_arr-step_arr, detc_arr-2.0*step_arr, detc_arr]) ppservers = () ncpus = 8 ppservers = ("10.0.0.1",) job_server = pp.Server(ncpus, ppservers=ppservers) start_time = time.time() # The following submits 8 jobs and then retrieves the results inputs = deriv_cof deriv_temp = np.zeros([len(detc_arr), 5]) jobs = [(input, job_server.submit(ZHfunc,(input, sig_arr, mass_arr, CP['num_r'], CP['rho_m']), (Gaus,), ("numpy","scipy.interpolate",))) for input in inputs] i = 0 for input, job in jobs: #print "Derivative array", "is", job() job() deriv_temp[:, i] = job() i = i + 1 print "Time elapsed: ", time.time() - start_time, "s" # Liu deriv_nuFu = - np.log(deriv_temp[:, 0]) + 8.0*np.log(deriv_temp[:, 1]) deriv_nuFu = deriv_nuFu - 8.0*np.log(deriv_temp[:, 2]) + np.log(deriv_temp[:, 3]) deriv_nuFu = deriv_nuFu / (12.0*deriv_step) Bias = 1.0 - deriv_nuFu dndlnm = deriv_temp[:, 4] for i in range(len(Bias)): if(np.isnan(Bias[i])): Bias[i] = 0 return (dndlnm, Bias)
def calculate_results_parallel(specimens, args, scale_min, scale_max): res = [None] * args.o if args.x is None: ppservers = () else: ppservers = (args.x, ) job_server = pp.Server(args.t, ppservers=ppservers) jobs = [] for i in range(len(res)): jobs.append( job_server.submit(objective_function, (specimens[i], args.f, scale_min, scale_max), (scale, ), ( 'math', 'time', ))) i = 0 for job in jobs: res[i] = job() i += 1 return res
def job_server_init(): ncpus = settings['ncpus'] pservers = tuple(settings['ppservers']) secret = settings['secret'] if ncpus is not None: job_server = pp.Server(ncpus, ppservers=pservers, secret=secret, socket_timeout=None) else: job_server = pp.Server( ppservers=pservers, secret=secret, socket_timeout=None) print "Starting pp with %d SMP local workers and %s remote workers" % (job_server.get_ncpus(),str(pservers))
def Score(P, S): """ :rtype : int """ #scoreLock.acquire() #print "In score method with P length", len(P) finalScore = 0 # tuple of all parallel python servers to connect with #ppServers = ("127.0.0.1",) ppservers = () ncpus = 4 #pp.Server.get_ncpus() job_server = pp.Server(ncpus, ppservers=ppservers) jobs = [(input, job_server.submit(Max_Match, (P, input,))) for input in S] for input, job in jobs: finalScore += job() returnScore = finalScore job_server.destroy() #print "returning score ", returnScore #scoreLock.release() return returnScore
def import_recommendscore(self, filename): user_rankingscore = {} user_auc = {} user_predictitem = {} user_list = [] job_server = pp.Server() # require parallel python try: with open(self._filepath + filename, "r") as f: templine = f.readline() while (templine): temp = templine[:-1].split(" ")[:3] user = int(temp[0]) item = int(temp[1]) score = float(temp[2]) if user not in user_list: job = job_server.submit(func=self.calc, \ args=((user, item, score), f.tell(), filename), \ depfuncs=(), modules=("random",), \ callback=self.para_statistics) user_list.append(user) templine = f.readline() job_server.wait() except Exception, e: print e sys.exit()
def test_pp_function(): """Test parallel python with plain function """ logger = getLogger("ostap.test_pp_function") logger.info('Test job submission with %s' % pp) from ostap.core.known_issues import DILL_ROOT_issue if DILL_ROOT_issue: logger.warning("test is disabled for Python %s (dill/ROOT issue)") return job_server = pp.Server() jobs = [(i, job_server.submit(make_histo, (i, n))) for (i, n) in enumerate(inputs)] result = None for input, job in progress_bar(uimap(jobs), max_value=len(jobs)): histo = job() if not result: result = histo else: result.Add(histo) del histo logger.info("Histogram is %s" % result.dump(80, 10)) logger.info("Entries %s/%s" % (result.GetEntries(), sum(inputs))) job_server.print_stats() with wait(1), use_canvas('test_pp_function'): result.draw() return result
def test_pp_callable(): """Test parallel python with callable """ logger = getLogger("ostap.test_pp_callable") logger.info('Test job submission with %s' % pp) logger.warning("test is disabled for UNKNOWN REASON") return job_server = pp.Server() jobs = [(i, job_server.submit(mh.__call__, (i, n))) for (i, n) in enumerate(inputs)] result = None for input, job in progress_bar(uimap(jobs), max_value=len(jobs)): histo = job() if not result: result = histo else: result.Add(histo) del histo logger.info("Histogram is %s" % result.dump(80, 10)) logger.info("Entries %s/%s" % (result.GetEntries(), sum(inputs))) with wait(1), use_canvas('test_pp_callable'): result.draw() return result
def multicore(): delta = 20 #set up the evo strategy best_list, mut_list = [], [] evo = popstrat.Evostrategy(5000, 50) children = evo.iterate(evo.pop) nodes = ("*",) job_server = pp.Server(8, ppservers=nodes) print "Starting pp with", job_server.get_ncpus(), "workers" start_time = time.time() for i in range(50): run_time = time.time() jobs = [(child, job_server.submit(run_grn, (child['genome'], delta), (), ("grn","numpy","math"))) for child in children] for child, result in jobs: results, conclist = result() bestidx = results.index(max(results)) child['fitness'] = results[bestidx] #plotting the best with colors children = evo.iterate(children) bestgenome = evo.pop[-1]['genome'] bestresult, conclist = run_grn(bestgenome, delta) bestidx = bestresult.index(max(bestresult)) filename = "best_gen_"+str("%03d" % i) print filename colors = [] simplist = [] for idx, result in enumerate(bestresult): if idx == len(bestresult)-1: simplist.append(conclist[idx]) colors.append('k') elif idx == bestidx: colors.append('g') simplist.append(conclist[idx]) # elif result == 0: # colors.append('b') # else: # colors.append('r') graph.plot_2d(simplist, filename, colors) print "gen:", evo.gen_count, "fitness:", evo.pop[-1]['fitness'] if evo.adaptive: evo.adapt_mutation() best_list.append(evo.pop[-1]['fitness']) mut_list.append(evo.mut_rate) mutfile = open('mutrate.txt','a') mutfile.write(str(mut_list)+'\n') mutfile.close()
def __init__(self, ppservers, ncpus, txtHasWeight, m_LexFile): self.text_has_weights = txtHasWeight self.job_server = pp.Server(ncpus, ppservers, jobInsertType=2) self.jobs = [] self.m_nPrsCps = 0 self.m_LexFile = m_LexFile self.m_CorpusList = []
def pp_pull(devices): ppservers = () job_server = pp.Server( ppservers=ppservers) # autodetect number of cpu's "workers" #job_server = pp.Server(ncpus=1, ppservers=ppservers) # manually set number of cpu's "workers", setting of 1 makes it be sequential start_time = time.time() for dev in devices: print("*", end=' ') print(" (with", job_server.get_ncpus(), "workers)") print("Returning Parallel-Python tasks:", end=' ') jobsList = [] jobs = [(dev, job_server.submit(issue_device_cmd, args=( utility, command_line, dev, ), depfuncs=(), modules=( "sys", "shlex", "subprocess", "time", "itertools", ))) for dev in devices] for dev, job in jobs: r = (dev, job()) jobsList.append(r) #print ("dev", dev, "got", r) #for job in jobsList: print job for job in jobsList: print(".", end=' ') print("") for i in range(0, len(jobsList)): results = str(jobsList[i][1]).split(",") results1 = shlex.split(str(jobsList[i][1])) ''' #print ("results:", results) #print ("results1:", results1) testTimeS = "".join(re.findall('\d+\.\d+', results1[2])) print (testTimeS) testTimeF = float(testTimeS) print (testTimeF) testTimeV = round(testTimeF, 3) print (testTimeV) ''' testTimeV = round(float("".join(re.findall('\d+\.\d+', results1[2]))), 3) print(" ", jobsList[i][0], "[Return code:", results1[0], results[1].strip() + ")", "]", testTimeV, " seconds") if "Unknown option" in results[1]: print("Aborting test!") print("") sys.exit(1) print("Parallel-Python tasks took: ", round(time.time() - start_time, 3), "seconds") print("") print("Parallel-Python", ) job_server.print_stats()
def multiTheadSimulate(): """ 使用多核多线程计算三基金组合策略,节省时间 :return: """ ppservers = () job_server = pp.Server(ppservers=ppservers) profitMean, logMean = getAverageMean() funds = getupMeanFunds(profitMean, logMean) t = [] jobs = {} for i in range(0, len(funds)): jobs[funds[i]] = job_server.submit(simulate, ( funds[i], funds, i, ), (multipleStrategy, ), ("time", "MongoDBUtil")) job_server.wait() for key in jobs.keys(): r = jobs[key]() for item in r: t.append(item) with open("../docs/multi/3.json", "w", encoding='utf-8') as f: json.dump(t, f, ensure_ascii=False, indent=4)
def align(self, ): quality = self.quality gen_index = self.genome_index bowtie2 = self.bowtie2 out_dir = self.output_folder experience_name = self.name i = 0 ncpu_pp = 1 speed = self.speed_looping ncpu_bowtie = self.ncpu / ncpu_pp looping = self.looping len_tag = self.len_tag paired_wise_fastq = self.paired_wise_fastq jobs = [] ppservers = () job_server = pp.Server(ncpu_pp, ppservers=ppservers) print self.bowtie2 print self.paired_files for paired_file in self.paired_files: print paired_file[0] + ' ' + paired_file[1] id = str(i) in_a = paired_file[0] in_b = paired_file[1] fastq_alignment.bowtie_fastq(bowtie2, in_a, in_b, gen_index, out_dir, id, ncpu_bowtie, looping, quality, len_tag, paired_wise_fastq, speed, self.len_paired_wise_fastq) # jobs.append(job_server.submit(fastq_alignment.bowtie_fastq, (bowtie2, in_a, in_b, gen_index, out_dir,id,ncpu_bowtie, looping,quality,len_tag,paired_wise_fastq,speed) # , (fastq_alignment.sam_filter,), ("fastq_alignment",))) i = i + 1 print str(i) + " jobs launched over " + str(ncpu_bowtie) + " cores"
def get_subscriptions(request): if 'token' in request.session: api = Api() api.SetAuthSubToken(request.session['token']) job_server = pp.Server(15, ppservers=()) jobs = [] for entry in api.GetYouTubeSubscriptionFeed().entry: uri = 'http://gdata.youtube.com/feeds/api/users/%s/uploads' % entry.username.text jobs.append( job_server.submit( gdata.youtube.service.YouTubeService().GetYouTubeUserFeed, ( uri, None, ), modules=( "gdata.youtube", "gdata.youtube.service", ))) feed = [] for job in jobs: for entry in job().entry: feed.append(getVideoDict(entry)) job_server.destroy() #for entry in api.GetYouTubeSubscriptionFeed().entry: # feed.extend(api.GetYouTubeUserFeed(username=entry.username.text).entry) feed.sort(key=lambda x: x['date'], reverse=True) return render_to_response("subscriptions.html", {'scrfeed': feed[:20]}, context_instance=RequestContext(request)) else: messages.warning(request, "Login to view your subscriptions!") return redirect("/")
def calc_pce_fft(rparams, subkeys): """ Submits jobs to job server """ # get a list of all dictionaries we want to run... rdicts = jfdfdUtil.dictpairs(rparams) if parallel: # tell parallel python how many cpus to use job_server = pp.Server(ncpus=rparams['ncpus']) jobs = [] # go through the dictionaries and run them or submit them to the job server.. for rd in rdicts: #run_one(rd, subkeys) job = job_server.submit(calc_pce_fft_one, (rd, subkeys), (), ('jfdfd', 'jfdfdUtil', 'pylab', 'numpy', 'materials', 'Plasmons','Patrick_Utilities','plasmon_fft')) jobs.append(job) njobs = len(jobs) print 'Submitted %d jobs' % njobs for ijob, job in enumerate(jobs): job() print ' %g%% done (%d of %d jobs remaining)' % ((ijob+1)*100./njobs, njobs-ijob-1, njobs) else: for rd in rdicts: calc_pce_fft_one(rd, subkeys)
def predict(self, data, dict_of_additional_variables={}): ncpus = max(len(self.models), 32) job_server = pp.Server(ncpus, ppservers=()) jobs = dict() to_import = ("import numpy as np", "sklearn", "time", "from localRegression import *", "from sklearn.linear_model import sparse", "from sklearn.utils import atleast2d_or_csc") for name, model in sorted(self.models.iteritems()): try: predictargs = [data] + dict_of_additional_variables[name] except KeyError: predictargs = [data] jobs[name] = job_server.submit( pp_predict, (model, name, self.verbose, predictargs), (), to_import) X = np.zeros((data.shape[0], len(self.models))) i = 0 for name, model in sorted(self.models.iteritems()): X[:, i] = jobs[name]() i += 1 job_server.destroy() return self.blender.predict(X)
def _pp_needs_monkeypatching(): # only run this function the first time mdp is imported # otherwise reload(mdp) breaks if not hasattr(mdp, '_pp_needs_monkeypatching'): # check if we are on one of those broken system were # parallel python is affected by # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=620551 # this is a minimal example to reproduce the problem # XXX IMPORTANT XXX # This function only works once, i.e. at import # if you attempt to call it again afterwards, # it does not work [pp does not print the error twice] # we need to hijack stdout here, because pp does not raise # exceptions: it writes to stdout directly!!! # pp stuff import pp server = pp.Server() with _sys_stdout_replaced() as capture: server.submit(lambda: None, (), (), ('numpy', ))() server.destroy() # read error from hijacked stdout error = capture.getvalue() mdp._pp_needs_monkeypatching = 'ImportError' in error return mdp._pp_needs_monkeypatching
def main() : import sys parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, argument_default=argparse.SUPPRESS) parser.add_argument('--target', type=str) parser.add_argument('--job', type=int) parser.add_argument('-g', type=int, help="Number of generation") parser.add_argument('-n', type=int, help="Population Size") args = parser.parse_args() target = args.target landscape = Landscape(target) print "=====================================================================================================" print "Solving for the target = ", target print "=====================================================================================================" number_of_run = args.job init_depth =len(target) mut_prob = 1./init_depth number_of_generation = args.g pop_size = args.n p_n = [0.7,0.1,0.1,.1] #default = [0.25,0.25,0.25,.25] [0.25,0.65,0.05,.05] [0.7,0.1,0.1,.1] ["A", "G","U","C"] p_c = [0.5, 0.5, 0., 0.,0.,0.] #[0.2,0.2,0.1,0.1,0.2,0.2] #[0.4, 0.5, 0.1, 0.,0.,0.] ["GC","CG","AU","UA", "GU", "UG"] ppservers = () mut_probs = numpy.array(RNA.ptable(target)[1:]) mut_probs = mut_probs + mut_prob mut_probs[mut_probs>mut_prob] = 0 job_server = pp.Server(4, ppservers=ppservers) print "Start running job", number_of_run run(number_of_generation, pop_size, mut_probs, 0, landscape, p_n, p_c) """
def checkOptString(): ppservers = ("192.168.0.125:35000", ) job_server = pp.Server(4, ppservers=ppservers) print "Starting parallel python with", job_server.get_ncpus(), "workers" print "*********************\n" ProgConstant = ShareConstant() ProgConstant.POP_SIZE = 3000 ProgConstant.CHROMO_LENGTH = 30000 ConstPrint(ProgConstant) chromo_list = [] lenCount = [] for i in range(100): lenCount.append(0) getRandomBit_jobs = [(job_server.submit(GetRandomBits, (ProgConstant.CHROMO_LENGTH, ), ( ChromoTyp, RANDOM_NUM, ), ("random", ))) for i in range(ProgConstant.POP_SIZE)] count = 0 for job in getRandomBit_jobs: count += 1 chromo_list.append(job()) print "Count: {}".format(count) for i in range(ProgConstant.POP_SIZE): index = numberOfDigits(chromo_list[i].bits, ProgConstant) lenCount[index] += 1 for i in range(len(lenCount)): if lenCount[i] > 0: print "index: {} count: {} ".format(i, lenCount[i])
def main(): features = np.loadtxt(sys.argv[1]) job_server = pp.Server(8) dist_start = 0.1 dist_step = 0.02 sample_start = 5 sample_step = 1 jobs = [] for i in range(0, 30): for j in range(0, 70): eps = dist_start + i * dist_step min_samples = sample_start + j * sample_step jobs.append( ( eps, min_samples, job_server.submit(mysilhouette, (features, eps, min_samples), (), ("sklearn.metrics", "sklearn.cluster","numpy",)))) print "Task for: " + str(eps) + ", " + str(min_samples) + " submitted" X = []; Y = []; Z1 = []; Z2 = [] for eps, min_samples, job in jobs: X.append(eps) Y.append(min_samples) temp = job() Z1.append(temp[0]) # Silhouette Score Z2.append(temp[1]) # Number of clusters print "Row: " + str(eps) + " column: " + str(min_samples) + " s: " + str(temp[0]) + " n: " + str(temp[1]) m = np.max(Z1) i = np.argmax(Z1) print "--- Largest Silhouette Score: " + str(m) + " ---" print "--- eps: " + str(X[i]) + " min_samples: " + str(Y[i]) + " number_clusters: " + str(Z2[i]) + " ---" np.savetxt("silhouette_x", X) np.savetxt("silhouette_y", Y) np.savetxt("silhouette_z1", Z1) np.savetxt("silhouette_z2", Z2)