def gen_training_data(self, pdbbind_dir, pdbbind_version = '2007', home_dir = None, sf_pickle = ''): # build train and test cpus = self.n_jobs if self.n_jobs > 0 else -1 #pool = Pool(processes=cpus) pdbbind_db = pdbbind(pdbbind_dir, int(pdbbind_version), opt={'b':None}) if not home_dir: home_dir = dirname(__file__) + '/RFScore' pdbbind_db.default_set = 'core' core_set = pdbbind_db.ids core_act = np.array(pdbbind_db.activities) # core_desc = np.vstack([self.descriptor_generator.build([pid.ligand], protein=pid.pocket) for pid in pdbbind_db]) result = Parallel(n_jobs=cpus)(delayed(_parallel_helper)(self.descriptor_generator, 'build', [pid.ligand], protein=pid.pocket) for pid in pdbbind_db if pid.pocket) core_desc = np.vstack(result) pdbbind_db.default_set = 'general' refined_set = [pid for pid in pdbbind_db.ids if not pid in core_set] refined_act = np.array([pdbbind_db.sets[pdbbind_db.default_set][pid] for pid in refined_set]) # refined_desc = np.vstack([self.descriptor_generator.build([pid.ligand], protein=pid.pocket) for pid in pdbbind_db]) result = Parallel(n_jobs=cpus)(delayed(_parallel_helper)(self.descriptor_generator, 'build', [pid.ligand], protein=pid.pocket) for pid in pdbbind_db if pid.pocket and not pid.id in core_set) refined_desc = np.vstack(result) self.train_descs = refined_desc self.train_target = refined_act self.test_descs = core_desc self.test_target = core_act # save numpy arrays np.savetxt(home_dir + '/train_descs_v%i.csv' % (self.version), self.train_descs, fmt='%g', delimiter=',') np.savetxt(home_dir + '/train_target.csv', self.train_target, fmt='%.2f', delimiter=',') np.savetxt(home_dir + '/test_descs_v%i.csv' % (self.version), self.test_descs, fmt='%g', delimiter=',') np.savetxt(home_dir + '/test_target.csv', self.test_target, fmt='%.2f', delimiter=',')
def batch_train(opt, round_index, round_train_data, round_valid_data, round_valid_weights=None, save_all=True, file_indices=None, return_acc_len=False, seq2seq=False): i = 0 perfs = [] M = len(round_train_data) while i < M: j = min(i + opt['num_machines'], M) cur_perfs = Parallel(n_jobs=j - i, backend='threading') \ (delayed(train)(opt, round_index, train_index, file_indices[train_index] if file_indices else train_index, round_train_data[train_index], round_valid_data[train_index], valid_weights=round_valid_weights[train_index] if round_valid_weights else None, save_all=save_all, return_acc_len=return_acc_len, seq2seq=seq2seq) \ for train_index in range(i, j)) perfs.extend(cur_perfs) i = j error_indices, valid_indices = [], [] for i, perf in enumerate(perfs): if perf == 0.0 or type(perf) == tuple and perf[0] == 0.0: error_indices.append(i) elif i < opt['num_machines']: valid_indices.append(i) M = len(error_indices) TMP_NUM_MACHINES = len(valid_indices) if M > 0 and TMP_NUM_MACHINES > 0: i = 0 error_perfs = [] while i < M: j = min(i + TMP_NUM_MACHINES, M) cur_perfs = Parallel(n_jobs=j - i, backend='threading') \ (delayed(train)(opt, round_index, valid_indices[train_index], file_indices[error_indices[train_index]] if file_indices else error_indices[train_index], round_train_data[error_indices[train_index]], round_valid_data[error_indices[train_index]], valid_weights=round_valid_weights[error_indices[train_index]] if round_valid_weights else None, save_all=save_all, return_acc_len=return_acc_len, seq2seq=seq2seq) \ for train_index in range(i, j)) error_perfs.extend(cur_perfs) i = j for i in range(M): perfs[error_indices[i]] = error_perfs[i] return perfs
def _nlp_sub(disc_clsdict, gold_clsdict, names, label, verbose, n_jobs): # ned ned = NED cov = coverage if verbose: print ' nlp ({2}): subsampled {0} files in {1} sets'\ .format(sum(map(len, names)), len(names), label) with verb_print(' nlp ({0}): calculating scores' .format(label), verbose, False, True, False): ned_score = Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0, pre_dispatch='n_jobs')(delayed(ned)\ (disc_clsdict.restrict(ns, True)) for ns in names) cov_score = Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0, pre_dispatch='n_jobs')(delayed(cov)\ (disc_clsdict.restrict(ns, False), gold_clsdict.restrict(ns, False)) for ns in names) # don't replace nan's by 1, but ignore them, unless all values in ned_score # are nan ned_score, cov_score = np.array(ned_score), np.array(cov_score) ned_score, cov_score = aggregate(ned_score, 1), aggregate(cov_score) return np.array(ned_score), np.array(cov_score)
def compute_pairwise_distances(sets, metric, sp_areas=None): if metric == 'nonoverlap-area': partial_distance_matrix = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric=metric, sp_areas=sp_areas) for s in np.array_split(range(len(sets)), 16)) distance_matrix = np.vstack(partial_distance_matrix) np.fill_diagonal(distance_matrix, 0) return distance_matrix elif hasattr(metric, '__call__'): partial_distance_matrix = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(cdist)(s, sets, metric=metric) for s in np.array_split(sets, 16)) distance_matrix = np.vstack(partial_distance_matrix) np.fill_diagonal(distance_matrix, 0) return distance_matrix elif metric == 'overlap-size': partial_overlap_mat = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric='overlap-size') for s in np.array_split(range(len(sets)), 16)) overlap_matrix = np.vstack(partial_overlap_mat) return overlap_matrix else: partial_overlap_mat = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric=metric) for s in np.array_split(range(len(sets)), 16)) overlap_matrix = np.vstack(partial_overlap_mat) distance_matrix = 1 - overlap_matrix np.fill_diagonal(distance_matrix, 0) return distance_matrix
def do_cmd_multiproc(cmd, analyzer, hash_tab, filename_iter, matcher, outdir, report, ncores): """ Run the actual command, using multiple processors """ if cmd == 'precompute': # precompute fingerprints with joblib msgslist = joblib.Parallel(n_jobs=ncores)( joblib.delayed(file_precompute)(analyzer, file, outdir, audfprint_analyze.PRECOMPEXT) for file in filename_iter ) # Collapse into a single list of messages for msgs in msgslist: report(msgs) elif cmd == 'match': # Running queries in parallel msgslist = joblib.Parallel(n_jobs=ncores)( # Would use matcher.file_match_to_msgs(), but you # can't use joblib on an instance method joblib.delayed(matcher_file_match_to_msgs)(matcher, analyzer, hash_tab, filename) for filename in filename_iter ) for msgs in msgslist: report(msgs) elif cmd == 'new' or cmd == 'add': # We add by forking multiple parallel threads each running # analyzers over different subsets of the file list multiproc_add(analyzer, hash_tab, filename_iter, report, ncores) else: # This is not a multiproc command raise ValueError("unrecognized multiproc command: "+cmd)
def fit(self, imgs): """ compute connectivities """ if self.metric == 'wavelet': jobs = (delayed(wavelet_worker)(img, self.masker, self.regu, self.lbda, self.nb_vanishmoment, self.norm, self.q, self.nbvoies, self.distn, self.wtype, self.j1, self.j2) for img in imgs) elif self.metric == 'dfa': jobs = (delayed(dfa_worker)(img, self.masker, self.regu, self.lbda, self.wtype, self.j1, self.j2) for img in imgs) elif self.metric=='welch': jobs = (delayed(welch_worker)(img, self.masker, self.regu, self.lbda, ) for img in imgs) else: raise ValueError("the metric dico = %s is not yet implemented" % (self.metric,)) ts = Parallel(n_jobs=5, verbose=5)(jobs) self.hurst = ts return self.hurst
def main(): #configurable parameters num_cores = 30 #multiprocessing.cpu_count() main_urls = [("http://www.mothering.com/forum/306-unassisted-childbirth",331), #("http://www.mothering.com/forum/69-vaccinations-archives",1), ("http://www.mothering.com/forum/443-i-m-not-vaccinating",191), ("http://www.mothering.com/forum/373-selective-delayed-vaccination",114), ("http://www.mothering.com/forum/17507-vaccinating-schedule",7) ] for main_url,nsubpages in main_urls: forum_label = main_url.split("/")[-1] start = time.time() print "Running on ", num_cores, " CPU cores" print "Scraping ",forum_label real_links = Parallel(n_jobs=num_cores)(delayed(doPage)(main_url,ipage) for ipage in range(nsubpages)) #somehow we get duplicates.... so set() it real_links = set([item for sublist in real_links for item in sublist]) end = time.time() print "Elapsed time %s" % (end-start) #print real_links results = Parallel(n_jobs=num_cores)(delayed(doTexts)(l) for l in real_links) results = [item for sublist in results for item in sublist] #save the data with open(forum_label+'_out.csv','w') as out: csv_out=csv.writer(out,delimiter='|') csv_out.writerow(['username','timestamp','text']) for row in results: csv_out.writerow(row) end2 = time.time() print "Total elapsed time %s" % (end2-start)
def fit_mvpa(self, data, labels): """ Fit Searchlight for MVPA Parameters: data: 4D numpy array - (x, y, z, condition vols) labels: classifier labels """ print('Running searchlight Decoding') x, y, z, nobjects = data.shape # now the first dimension of data is directly indexable by # subspace index of the searchlight centers data = data.reshape((x*y*z, nobjects)) # test run_per_center # for x in self.allIndices: # t = run_per_center(data, x, labels) if self.verbose is True: scores = Parallel(n_jobs=self.njobs)( delayed(run_per_center)( data, x, labels) for x in tqdm(self.allIndices)) else: scores = Parallel(n_jobs=self.njobs)( delayed(run_per_center)( data, x, labels) for x in self.allIndices) print('\n') self.MVPA = np.zeros((x*y*z)) self.MVPA[list(self.centerIndices)] = scores self.MVPA = self.MVPA.reshape((x, y, z))
def __call__(self, filenames): batch_num = 1 batch_means = np.zeros(((self.size[0]**2)*self.channels,1)) start_time = time.clock() for filenames,next_filenames in get_next(list(chunks(filenames,self.batch_size))): if batch_num == 1: rows = Parallel(n_jobs=self.n_jobs)( delayed(_process_tag_item)(self.size,self.channels,filename) for filename in filenames) data = np.vstack([r for r in rows if r is not None]).T if data.shape[1] > 5: # print 'Over 20' mean = data.mean(axis=1).reshape(((self.size[0]**2)*self.channels,1)) # print mean # print mean.shape data = data - mean # else: # print 'Less than 20' # mean = self.model.train_data_provider.data_mean # print mean # print mean.shape # data = data - mean self.model.start_predictions(data) if next_filenames is not None: rows = Parallel(n_jobs=self.n_jobs)( delayed(_process_tag_item)(self.size,self.channels,filename) for filename in next_filenames) names = [name for (r,name) in zip(rows,filenames) if r is not None]; self.model.finish_predictions(names,self.num_results,self.threshold) batch_num += 1
def dump_csv(llclat, llclon, urclat, urclon, start_date_str, end_date_str, altitude_layer): assert type(llclat) is int assert type(llclon) is int assert type(llclat) is int assert type(llclat) is int llclat = int(llclat) llclon = int(llclon) urclat = int(urclat) urclon = int(urclon) start_date = datetime.strptime(start_date_str, '%Y-%m-%d') end_date = datetime.strptime(end_date_str, '%Y-%m-%d') url_and_files = get_url_and_files_from_dates(start_date, end_date) joblib.Parallel(n_jobs=N_PROC)(joblib.delayed(download_file)(url, local) for url, local in url_and_files) files_allegedly_downloaded = [f[1] for f in url_and_files] files_to_process = get_files_to_process(files_allegedly_downloaded) results = joblib.Parallel(n_jobs=N_PROC)(joblib.delayed(render_map)( f, llclat, llclon, urclat, urclon, altitude_layer) for f in files_to_process) node_interp = np.array(results) node_interp = node_interp.reshape(-1, node_interp.shape[-1]) df_interp = pd.DataFrame( data=node_interp, columns=['unix_epoch', 'lat', 'lon', 'temp']) df_interp.to_csv('node_temp.csv') return(df_interp)
def process(self, num_cores = 10): print "Running prokka for protein annotation (excedpt if faas already provided)" to_prokka = [g for g in self.genomes if not os.path.exists(g.proteom)] prokka_stuff = Parallel(n_jobs=num_cores)(delayed(prokka)(i) for i in tqdm(to_prokka)) to_mash = [g for g in self.genomes if not os.path.exists(g.genome + ".msh")] print "running mash hashing" mashstuff= Parallel(n_jobs=num_cores)(delayed(mash)(i) for i in tqdm(to_mash)) print "running CheckM" to_check = [g for g in self.genomes if not os.path.exists(g.genome.replace(".fna",".checkm.json")) or not g.checkm_meta] checkmstuff= Parallel(n_jobs=num_cores)(delayed(checkm)(i) for i in tqdm(to_check)) print "computing genome sizes" for g in tqdm(self.genomes): if not g.size: g.compute_size() print "computing gc contents" for g in tqdm(self.genomes): if not g.size: g.compute_gc() print "making fake reads" for g in tqdm(self.genomes): if not os.path.exists(g.fakereads): g.make_fake_reads(read_len=150)
def load_stl(fname): from joblib import Parallel, delayed import features X = np.fromfile('../stl/'+fname, dtype=np.uint8) X = X.reshape((X.size/3/96/96, 3, 96, 96)).transpose((0,3,2,1)) dispImg(X[:100, :, :, [2,1,0]], 10, fname+'_org.jpg') n_jobs = 10 cmap_size = (8,8) N = X.shape[0] H = np.asarray(Parallel(n_jobs=n_jobs)( delayed(features.hog)(X[i]) for i in xrange(N) )) H_img = np.repeat(np.asarray([ hog_picture(H[i], 9) for i in xrange(100) ])[:, :,:,np.newaxis], 3, 3) dispImg(H_img, 10, fname+'_hog.jpg') H = H.reshape((H.shape[0], H.size/N)) X_small = np.asarray(Parallel(n_jobs=n_jobs)( delayed(cv2.resize)(X[i], cmap_size) for i in xrange(N) )) crcb = np.asarray(Parallel(n_jobs=n_jobs)( delayed(cv2.cvtColor)(X_small[i], cv.CV_RGB2YCrCb) for i in xrange(N) )) crcb = crcb[:,:,:,1:] crcb = crcb.reshape((crcb.shape[0], crcb.size/N)) feature = np.concatenate(((H-0.2)*10.0, (crcb-128.0)/10.0), axis=1) print feature.shape return feature, X[:,:,:,[2,1,0]]
def __call__(self, all_names_and_labels, shuffle=False): batch_num = 1 batch_means = np.zeros(((self.size[0]**2)*self.channels,1)) self.count_correct = 0 self.count_incorrect = 0 start_time = time.clock() for names_and_labels,n_l_next in get_next(list(chunks(all_names_and_labels,self.batch_size))): loop_time = time.clock() if batch_num == 1: rows = Parallel(n_jobs=self.n_jobs)( delayed(_process_tag_item)(self.size,self.channels,name) for name, label in names_and_labels) data = np.vstack([r for r in rows if r is not None]).T if len(names_and_labels) > 20: mean = data.mean(axis=1).reshape(((self.size[0]**2)*self.channels,1)) data = data - mean if self.model is not None: self.model.start_predictions(data) if n_l_next is not None: rows = Parallel(n_jobs=self.n_jobs)( delayed(_process_tag_item)(self.size,self.channels,name) for name, label in n_l_next) if self.model is not None: tags = self.model.finish_predictions() else: tags = [('No model',0.0) for name in names_and_labels] self.write_to_xml(zip(tags,names_and_labels)) batch_num += 1 print "Tagged %d images in %.02f seconds" % (len(names_and_labels),time.clock()-loop_time) print "Tagging complete. Tagged %d images in %.02f seconds" % (len(all_names_and_labels),time.clock()-start_time)
def count_reads_in_windows(bed_file, args): chromosome_size_dict = create_genome_size_dict(args.genome) chromosomes = natsorted(list(chromosome_size_dict.keys())) if not args.paired_end: parallel_count_reads = partial(_count_reads_in_windows, bed_file, args) else: parallel_count_reads = partial(_count_reads_in_windows_paired_end, bed_file, args) info("Binning chromosomes {}".format(", ".join([c.replace("chr", "") for c in chromosomes]))) chromosome_dfs = Parallel(n_jobs=args.number_cores)( delayed(parallel_count_reads)(chromosome_size_dict[chromosome], chromosome, strand) for chromosome, strand in product(chromosomes, ["+", "-"])) info("Merging the bins on both strands per chromosome.") both_chromosome_strand_dfs = [df_pair for df_pair in _pairwise(chromosome_dfs)] merged_chromosome_dfs = Parallel( n_jobs=args.number_cores)(delayed(merge_chromosome_dfs)(df_pair) for df_pair in both_chromosome_strand_dfs) return merged_chromosome_dfs
def compareAlgorithms(numRuns): f = plt.figure(5) plt.clf() plt.hold(True) maxFitnessHists, minMisstepsHists = zip(*Parallel(n_jobs=-1)(delayed(GA)(i, 1000, True, False) for i in range(numRuns))) maxFitnessHists = np.array(maxFitnessHists) stdDev = maxFitnessHists.std(axis=0) avg = maxFitnessHists.mean(axis=0) plt.plot(np.arange(len(avg)), avg, color='g') plt.fill_between(np.arange(len(avg)), avg - stdDev, avg + stdDev, facecolor='g', alpha=0.2) m = maxFitnessHists maxFitnessHists, minMisstepsHists = zip(*Parallel(n_jobs=-1)(delayed(SA)(i, 1000, False) for i in range(numRuns))) maxFitnessHists = np.array(maxFitnessHists) stdDev = maxFitnessHists.std(axis=0) avg = maxFitnessHists.mean(axis=0) plt.plot(np.arange(len(avg)), avg, color='m', label= "Simulated annealing") plt.fill_between(np.arange(len(avg)), avg - stdDev, avg + stdDev, facecolor='m', alpha=0.2) plt.xlabel('generations / periods') plt.ylabel('fitness') green_patch = patches.Patch(color='green', label='Genetic Algorithm') purple_patch = patches.Patch(color='magenta', label='Simulated Annealing') plt.legend(handles=[green_patch, purple_patch], loc='upper left') f.canvas.draw() f.show() return m, maxFitnessHists
def get_correlation_between_mean_score_and_error(self): """Compute the correlation between: * mean genuine score and false reject count * mean impostor score and false acceptance count False reject count and flase reject count is computed thanks to a global threshold. This threshold is the threshold giving the EER. Correlation is computed using Pearson correlation factor. """ # We need the EER threshold eer, thr = self.get_eer_and_threshold() # We need to compute error rate of each user # Get genuine reject of each users fr = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \ (delayed(_parallel_false_reject_helper)(self.get_genuine_presentations_of_user(userid), thr, self._type) \ for userid in self._users_id)) # Get impostors accept of each users fa = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \ (delayed(_parallel_false_accept_helper)(self.get_impostor_presentations_of_user(userid), thr, self._type) \ for userid in self._users_id)) #compute the correlations return pearsonr(fr, self._genuine_scores)[0], pearsonr(fa, self._impostor_scores)[0], eer
def batch_align(image_list, dest_dir="output"): """ Correct the sharking on the series of images :param image_list: The input series of images :param dest_dir: The destination directory """ if not path.exists(dest_dir): mkdir(dest_dir) if path.isdir(dest_dir): print "Aligning %d images, output in %s, this may take a while" % (len(im_list), dest_dir) ref_img = io.imread(image_list[0]) r = Parallel(n_jobs=4, backend="threading", verbose=25)( delayed(find_shift)(io.imread(img), ref_img) for img in image_list[1:]) y_shift = map(lambda x: x[0], r) x_shift = map(lambda x: x[1], r) print min(y_shift), max(y_shift), min(x_shift), max(x_shift) crop = [int(min(y_shift)) - 1, int(max(y_shift)) + 1, int(min(x_shift)) - 1, int(max(x_shift)) + 1] correct(ref_img, (0, 0), "%s/%s" % (dest_dir, path.basename(image_list[0])), crop) Parallel(n_jobs=4, backend="threading", verbose=25)( delayed(correct)(io.imread(img), r[k], "%s/%s" % (dest_dir, path.basename(image_list[k])), crop) for k, img in enumerate(image_list[1:])) else: print "Output dir does not exists or is not a directory : %s" % dest_dir
def update_parallel(self, fixed, moving): if hasattr(self.regularizer, "set_operator"): self.regularizer.set_operator(shape=fixed.shape) self.forward_vector_fields.delta_vector_fields = np.array( Parallel(self.n_jobs)( delayed(derivative)( self.similarity, fixed[-i - 1], moving[i], self.deformation.backward_dets[-i - 1], self.forward_vector_fields[i], self.regularizer, self.learning_rate) for i in xrange(self.n_step_half + 1) ) ) self.backward_vector_fields.delta_vector_fields = np.array( Parallel(self.n_jobs)( delayed(derivative)( self.similarity, moving[-i - 1], fixed[i], self.deformation.forward_dets[-i - 1], self.backward_vector_fields[i], self.regularizer, self.learning_rate) for i in xrange(self.n_step_half + 1) ) ) self.forward_vector_fields.update() self.backward_vector_fields.update() self.integrate_vector_fields()
def multi_main(n_jobs, FILENAME, FUN, **kargs): if FUN == MLEM2_LERS: joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(FUN)(FILENAME, iter1, iter2) for (iter1, iter2) in product(kargs["ITERS"][0], kargs["ITERS"][1]) ) elif FUN == MLEM2_delAttrRule_LERS: joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attributes) for (iter1, iter2, delfun, cls, attributes) in product( kargs["ITERS"][0], kargs["ITERS"][1], kargs["DELFUNS"], kargs["CLASSES"], kargs["ATTRIBUTES"] ) ) elif FUN == MLEM2_delERule_LERS: joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attribute_value) for (iter1, iter2, delfun, cls, attribute_value) in product( kargs["ITERS"][0], kargs["ITERS"][1], kargs["DELFUNS"], kargs["CLASSES"], kargs["ATTRIBUTE_VALUE"] ) ) elif FUN == MLEM2_delEAlphaRule_LERS: joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attribute_value, alpha) for (iter1, iter2, delfun, cls, attribute_value, alpha) in product( kargs["ITERS"][0], kargs["ITERS"][1], kargs["DELFUNS"], kargs["CLASSES"], kargs["ATTRIBUTE_VALUE"], kargs["ALPHA"], ) ) else: print("unknown function") return 0
def get_population_fitness_tasks(pop,taskdata,targetdata,params): #nsplits,clf,obj_weight): # first get cc for each item in population cc_recon=numpy.zeros(len(pop)) predacc_insample=numpy.zeros(len(pop)) if params.objective_weights[0]>0: if __USE_MULTIPROC__: cc_recon=Parallel(n_jobs=num_cores)(delayed(get_reconstruction_error)(ct,taskdata,targetdata,params) for ct in pop) else: cc_recon=[get_reconstruction_error(ct,taskdata,targetdata,params) for ct in pop] else: cc_recon=[0] if params.objective_weights[1]>0: if __USE_MULTIPROC__: cc_subsim=Parallel(n_jobs=num_cores)(delayed(get_subset_corr)(ct,taskdata,targetdata) for ct in pop) else: cc_subsim=[get_subset_corr(ct,taskdata,targetdata) for ct in pop] else: cc_subsim=[0] maxcc=[numpy.max(cc_recon),numpy.max(cc_subsim)] cc_recon=scale(cc_recon) cc_subsim=scale(cc_subsim) try: print('corr recon-subsim:',numpy.corrcoef(cc_recon,cc_subsim)[0,1]) except: pass cc=cc_recon*params.objective_weights[0] + cc_subsim*params.objective_weights[1] return cc,maxcc
def _addCols(df): l = ['teff', 'tefferr', 'logg', 'loggerr', 'feh', 'feherr'] r = [i+'new' for i in l] # Apply correction from Mortier+ 2014 df.rename(columns={'loggnew': 'loggSpec'}, inplace=True) idx = (df.teffnew >= 4500) & (df.teffnew <= 7050) & (df.loggSpec > 4.2) df.loc[idx, 'loggnew'] = df.loggSpec[idx] - 3.89E-4*df.teffnew[idx] + 2.10 df.loc[~idx, 'loggnew'] = df.loggSpec[~idx] df.loc[df.loggnew < 4.2, 'loggnew'] = df.loggSpec[df.loggnew < 4.2] # Get R and M from Torres+ df['R'] = np.array(Parallel(n_jobs=4)(delayed(radTorres)(*df.loc[star, l].values) for star in df.index))[:, 0] df['Rnew'] = np.array(Parallel(n_jobs=4)(delayed(radTorres)(*df.loc[star, r].values) for star in df.index))[:, 0] df['Rperc'] = (df.Rnew-df.R)/df.R * 100 df['M'] = np.array(Parallel(n_jobs=4)(delayed(massTorres)(*df.loc[star, l].values) for star in df.index))[:, 0] df['Mnew'] = np.array(Parallel(n_jobs=4)(delayed(massTorres)(*df.loc[star, r].values) for star in df.index))[:, 0] df['Mperc'] = (df.Mnew-df.M)/df.M * 100 df['loggPerc'] = (df.logg-df.loggnew)/df.logg * 100 df.R = df.R.apply(round, args=(3,)) df.Rnew = df.Rnew.apply(round, args=(3,)) df.Rperc = df.Rperc.apply(round, args=(1,)) df.Mnew = df.Mnew.apply(round, args=(3,)) df.M = df.M.apply(round, args=(3,)) df.Mperc = df.Mperc.apply(round, args=(1,)) df.loggPerc = df.loggPerc.apply(round, args=(1,)) df.loggnew = df.loggnew.apply(round, args=(2,)) return df
def parallel(func, inputs, n_jobs, expand_args=False): """ Convenience wrapper around joblib's parallelization. """ if expand_args: return Parallel(n_jobs=n_jobs)(delayed(func)(*args) for args in inputs) else: return Parallel(n_jobs=n_jobs)(delayed(func)(arg) for arg in inputs)
def analysis(foldername, outdir, referencenum, exten, n_estimators, min_samples_leaf, max_depth): """ Start the analysis Input: 1) Path to the driver directory 2) Path where the submission file should be written 3) Number of drivers to compare against """ start = datetime.now() submission_id = datetime.now().strftime("%H_%M_%B_%d_%Y") folders = [os.path.join(foldername, f) for f in os.listdir(foldername) if os.path.isdir(os.path.join(foldername, f))] nonstandfeatfile = 'Features66-NOSTAND-nreprot.csv' # generates csv file with NON STANDARIZED features to calculate means and standards afterwards: if os.path.exists(nonstandfeatfile): print 'initial calculation of all features for standarizing purposes will be skipped because file exists:',nonstandfeatfile pass else: allfeats = Parallel(n_jobs=60)(delayed(F_Features4onedriver)(folder, exten) for folder in folders) with open(nonstandfeatfile, 'a') as featsfile: csvwriter = csv.writer(featsfile, delimiter=',') for item in allfeats: for i in xrange(len(item)): csvwriter.writerow(item[i]) ## Choose between one of the following two lines: # STAND = False STAND = True if STAND: # calculates means and standard deviations in features: means, stds = F_calcmeanstsds(nonstandfeatfile) else: means = None stds = None # sample drivers to compare individual ones: seed(13) # referencefolders = [folders[i] for i in sorted(sample(xrange(len(folders)), referencenum))] referencefolders = [folders[i] for i in sorted(sample(xrange(len(folders)), int(len(folders)/3)))] print 'Generating refdata not in parallel, please wait some minutes...' referencedrivers = [] for referencefolder in referencefolders: # referencedrivers.append(Driver(referencefolder, exten, STAND, means=means, stds=stds)) referencedrivers.append(DriverSelect(referencefolder, exten, STAND, means=means, stds=stds)) generatedata(referencedrivers) results = Parallel(n_jobs=60)(delayed(perform_analysis)(folder, exten, STAND, means, stds, n_estimators, min_samples_leaf, max_depth) for folder in folders) namesubmisfile = "RFR13-nrro-R0.3-spacbrtrpalxyab-std-e%i-s%i-d%i.csv" % (n_estimators, min_samples_leaf, max_depth) with open(os.path.join(outdir, namesubmisfile), 'w') as writefile: writefile.write("driver_trip,prob\n") for item in results: writefile.write("%s\n" % item) print 'submission file ',namesubmisfile,' written' print 'Done, elapsed time: %s' % str(datetime.now() - start)
def make_surrogates_ctps(phase_array, nrepeat=1000, mode='shuffle', n_jobs=4, verbose=None): ''' calculate surrogates from an array of (phase) trials by means of shuffling the phase Parameters ---------- phase_trial : 4d ndarray of dimension [nfreqs x ntrials x nchan x nsamples] Optional: nrepeat: mode: 2 different modi are allowed. 'mode=shuffle' whill randomly shuffle the phase values. This is the default 'mode=shift' whill randomly shift the phase values n_jobs: number of cpu nodes to use verbose: verbose level (does not work yet) Returns ------- pt : shuffled phase trials ''' from joblib import Parallel, delayed from mne.parallel import parallel_func from mne.preprocessing.ctps_ import kuiper nfreq, ntrials, nsources, nsamples = phase_array.shape pk = np.zeros((nfreq, nrepeat, nsources, nsamples), dtype='float32') # create surrogates: parallised over nrepeats parallel, my_kuiper, _ = parallel_func(kuiper, n_jobs, verbose=verbose) for ifreq in range(nfreq): for isource in range(nsources): # print ">>> working on frequency: ",bp[ifreq,:]," source: ",isource+1 print ">>> working on frequency range: ",ifreq + 1," source: ",isource + 1 pt = phase_array[ifreq, :, isource, :] # extract [ntrials, nsamp] if(mode=='shuffle'): # shuffle phase values for all repetitions pt_s = Parallel(n_jobs=n_jobs, verbose=0)(delayed(shuffle_data) (pt) for i in range(nrepeat)) else: # shift all phase values for all repetitions pt_s = Parallel(n_jobs=n_jobs, verbose=0)(delayed(shift_data) (pt) for i in range(nrepeat)) # calculate Kuiper's statistics for each phase array out = parallel(my_kuiper(i) for i in pt_s) # store stat and pk in different arrays out = np.array(out, dtype='float32') # ks[ifreq,:,isource,:] = out[:,0,:] # is actually not needed pk[ifreq, :, isource, :] = out[:, 1, :] # [nrepeat, pk_idx, nsamp] return pk
def run(f,r,args=None, threads=0, verbose=0): if threads == 0: threads = multiprocessing.cpu_count() if args: return Parallel(n_jobs=threads, verbose=verbose)(delayed(f)(i, *args) for i in r) else: return Parallel(n_jobs=threads, verbose=verbose)(delayed(f)(i) for i in r)
def __init__(self, data_same, mean, std, nframes=1, batch_size=1, marginf=0, only_same=False): dtw_costs = zip(*data_same)[5] self._orig_x1s = zip(*data_same)[3] self._orig_x2s = zip(*data_same)[4] self._words_frames = numpy.asarray([fb.shape[0] for fb in self._orig_x1s]) self.print_mean_DTW_costs(dtw_costs) self._mean = mean self._std = std self._nframes = nframes self._nwords = batch_size self._margin = marginf self._only_same = only_same # marginf says if we pad taking a number of frames as margin same_spkr = 0 for i, tup in enumerate(data_same): if tup[1] == tup[2]: same_spkr += 1 ratio = same_spkr * 1. / len(data_same) print "ratio same spkr / all for same:", ratio data_diff = [] ldata_same = len(data_same)-1 same_spkr_diff = 0 for i in xrange(len(data_same)): word_1 = random.randint(0, ldata_same) word_1_type = data_same[word_1][0] word_2 = random.randint(0, ldata_same) while data_same[word_2][0] == word_1_type: word_2 = random.randint(0, ldata_same) wt1 = random.randint(0, 1) wt2 = random.randint(0, 1) if data_same[word_1][1+wt1] == data_same[word_2][1+wt2]: same_spkr_diff += 1 p1 = data_same[word_1][3+wt1] p2 = data_same[word_2][3+wt2] r1 = p1[:min(len(p1), len(p2))] r2 = p2[:min(len(p1), len(p2))] data_diff.append((r1, r2)) ratio = same_spkr_diff * 1. / len(data_diff) print "ratio same spkr / all for diff:", ratio self._data_same = zip(zip(*data_same)[3], zip(*data_same)[4], zip(*data_same)[-2], zip(*data_same)[-1]) self._data_diff = data_diff self.remix() if self._nframes > 1: # pad the orig_xes1/2 once and for all self._orig_x1s = joblib.Parallel(n_jobs=cpu_count()-3)( joblib.delayed(pad)(x, self._nframes, self._margin) for x in self._orig_x1s) self._orig_x2s = joblib.Parallel(n_jobs=cpu_count()-3)( joblib.delayed(pad)(x, self._nframes, self._margin) for x in self._orig_x2s)
def transform(self, catalog, subjects_id): catalog_ = copy.deepcopy(catalog) study_dir = make_dir(self.data_dir, self.study_id, strict=False) if isinstance(self.subject_key_, dict): save_table(self.subject_key_, os.path.join(study_dir, 'subject_key.txt')) save_table(self.task_key_, os.path.join(study_dir, 'task_key.txt'), merge=self.merge_tasks) save_table({'TR': catalog_[0]['tr']}, os.path.join(study_dir, 'scan_key.txt')) model_dir = make_dir(study_dir, 'models', self.model_id, strict=False) save_task_contrasts(model_dir, catalog_[0], merge=self.merge_tasks) save_condition_key(model_dir, catalog_[0], merge=self.merge_tasks) n_jobs = -1 if self.n_jobs != 1 else 1 self.encoder_ = IntraEncoder(hrf_model=self.hrf_model, drift_model=self.drift_model, memory=self.memory, n_jobs=n_jobs) all_niimgs = self.encoder_.fit_transform(catalog_, subjects_id) if subjects_id is None: subjects_id = [doc['subject_id'] for doc in catalog] outputs = Parallel(n_jobs=self.n_jobs)( delayed(_compute_glm)( LinearModeler(masker=self.masker, reporter=os.path.join( study_dir, subject_id, 'model', self.model_id), glm_model=self.glm_model, hrf_model=self.hrf_model, contrast_type=self.contrast_type, output_z=self.output_z, output_stat=self.output_stat, output_effects=self.output_effects, output_variance=self.output_variance), niimgs=niimgs, design_matrices=design_matrices, contrasts=doc['contrasts']) for subject_id, doc, niimgs, design_matrices in zip( subjects_id, catalog_, all_niimgs, self.encoder_.design_matrices_)) if self.resample: Parallel(n_jobs=n_jobs)( delayed(_resample_img)( doc[dtype][cid], self.target_affine, self.target_shape, ) for doc in outputs for dtype in doc for cid in doc[dtype]) return outputs
def test_simple(loop): with cluster() as (s, [a, b]): with parallel_backend('distributed', loop=loop, scheduler_host=('127.0.0.1', s['port'])): seq = Parallel()(delayed(inc)(i) for i in range(10)) assert seq == [inc(i) for i in range(10)] seq = Parallel()(delayed(inc)(i) for i in range(10)) assert seq == [inc(i) for i in range(10)]
def process_batch(image_db, label_db, fnames_b, y_b): print "Reading the images and labels" with Parallel(n_jobs=-1) as parallel: Xb = parallel(delayed(load_im_tuple) (fname, i) for i, fname in fnames_b) yb = parallel(delayed(load_y_tuple)(y, i) for i, y in y_b) print "Writing image data" _write_batch_lmdb(image_db, Xb) print "Writing label data" _write_batch_lmdb(label_db, yb)
def get_training_sets(): X = list(joblib.Parallel(n_jobs=-1)( joblib.delayed(get_features_for_path)(i) for i in TRAIN_DIR.iterdir())) y = list(joblib.Parallel(n_jobs=-1)( joblib.delayed(get_target_for_path)(i) for i in TARGET_DIR.iterdir())) X = np.concatenate(X) y = np.concatenate(y) logging.info("Finished loading") return X, y
def rptree_leaf_array_parallel(rp_forest): result = joblib.Parallel(n_jobs=-1, prefer="threads")( joblib.delayed(get_leaves_from_tree)(rp_tree) for rp_tree in rp_forest) # result = [get_leaves_from_tree(rp_tree) for rp_tree in rp_forest] return result
dct_x = torch_apply(dct.dct, dct_x) dct_x = dct_x.to(device) y = y.to(device) outputs_grad = [] outputs = [] for i in range(len(models)): out = models[i](dct_x[i, ...]) outputs_grad.append(out) outputs.append(out.detach()) # This line makes multiple calls to train_slice function # Parallelization Parallel(n_jobs=16, prefer="threads", verbose=0)( delayed(train_slice)(i, models[i], dct_x[i, ...], y, outputs, ops[i]) \ for i in range(len(models)) ) res = torch.empty(shape[0], 10, shape[2]) for i in range(len(models)): res[i, ...] = models[i](dct_x[i, ...]) res = torch_apply(dct.idct, res).to(device) res = scalar_tubal_func(res) res = torch.transpose(res, 0, 1) criterion = nn.CrossEntropyLoss() total_loss = criterion(res, y) _, predicted = torch.max(res, 1) total += y.size(0)
Y_test_result = np.concatenate((Y_test_result, Y)) del [[X, Y, gp, Y_pred]] Y_test_result = np.ravel(Y_test_result) Y_pred_result = np.ravel(Y_pred_result) SVM_overall_accuracy = accuracy_score(Y_test_result, Y_pred_result) print("subject = "+str(testing_subject)+" window_size = "+str(window_size)+" phase_number = "+str(phase_number)+" Accuracy = "+str(SVM_overall_accuracy)) base_path_dir = "/HDD/hipexo/Inseung/Result/" <<<<<<< HEAD text_file1 = base_path_dir + "SVM_phasesweep.txt" ======= text_file1 = base_path_dir + SVM_saving_file + ".txt" >>>>>>> e1f27d97f3bebba058c4f85ce5f6a72f3dceee6f msg1 = ' '.join([str(testing_subject),str(window_size),str(transition_point),str(phase_number),str(SVM_overall_accuracy),"\n"]) return text_file1, msg1 run_combos = [] for testing_subject in [6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 27 ,28]: for window_size in [350]: for transition_point in [0.2]: for phase_number in [1]: for kernel_type in ['rbf']: run_combos.append([testing_subject, window_size, transition_point, phase_number, kernel_type]) result = Parallel(n_jobs=-1)(delayed(SVM_parallel)(combo) for combo in run_combos) for r in result: with open(r[0],"a+") as f: f.write(r[1])
def uncles(X, type='A', Ks=[n for n in range(4, 21, 4)], params=None, methods=None, methodsDetailed=None, U=None, Utype='PM', relabel_technique='minmin', setsP=None, setsN=None, dofuzzystretch=False, wsets=None, wmethods=None, GDM=None, smallestClusterSize=11, CoPaMfinetrials=1, CoPaMfinaltrials=1, binarise_techniqueP='DTB', binarise_paramP=np.arange(0.0, 1.1, 0.1, dtype='float'), binarise_techniqueN='DTB', binarise_paramN=np.concatenate(([sys.float_info.epsilon], np.arange(0.1, 1.1, 0.1, dtype='float'))), Xnames=None, deterministic=False, ncores=1): Xloc = ds.listofarrays2arrayofarrays(X) L = len(Xloc) # Number of datasets # Fix parameters if params is None: params = {} if setsP is None: setsP = [x for x in range(int(math.floor(L / 2)))] if setsN is None: setsN = [x for x in range(int(math.floor(L / 2)), L)] setsPN = np.array(np.concatenate((setsP, setsN), axis=0), dtype=int) Xloc = Xloc[setsPN] L = np.shape(Xloc)[0] # Number of datasets if wsets is None: wsets = np.array([1 for x in range(L)]) else: wsets = np.array(wsets)[setsPN] if GDM is None: Ng = np.shape(Xloc[0])[0] GDMloc = np.ones([Ng, L], dtype='bool') else: GDMloc = GDM[:, setsPN] Ng = GDMloc.shape[0] if Xnames is None: Xnames = ['X{0}'.format(l) for l in range(L)] if methods is None: methods = [['k-means']] # largest_DS = np.max([x.shape[0] for x in Xloc]) # if (largest_DS <= maxgenesinsetforpdist): # if (deterministic): # methods = [['k-means'], ['HC']] # else: # methods = [['k-means'], ['SOMs'], ['HC']] # else: # if (deterministic): # methods = [['k-means']] # else: # methods = [['k-means'], ['SOMs']] else: largest_DS = np.max([x.shape[0] for x in Xloc]) if (largest_DS > maxgenesinsetforpdist): methods = [ m for m in methods if 'hc' not in [entry.lower() for entry in m] ] if not methods: io.log('No valid base clustering can be used. Please note that clust would not use HC clustering ' \ 'on datasets with more than {0} genes. You have a dataset with {1} genes.' \ ''.format(maxgenesinsetforpdist, largest_DS)) io.log('Clust will terminate here.') io.log(op.bottomline(), addextrastick=False) sys.exit() if methodsDetailed is None: methodsDetailedloc = np.array([methods for l in range(L)]) else: methodsDetailedloc = methodsDetailed[setsPN] if wmethods is None: wmethods = [[1 for x in m] for m in methodsDetailedloc] elif not isinstance(wmethods[0], (list, tuple, np.ndarray)): wmethods = np.tile(methods, [L, 1]) else: wmethods = np.array(wmethods)[setsPN] setsPloc = [ii for ii in range(len(setsP))] if L > len(setsPloc): setsNloc = [ii for ii in range(len(setsPloc), L)] Ds = [nu.closest_to_square_factors(k) for k in Ks] # Grid sizes for the SOMs method for each value of K NKs = len(Ks) # Number of K values # Clustering if U is None: Utype = 'PM' Uloc = np.array([None] * (L * NKs)).reshape([L, NKs]) totalparallel = np.sum(Ks) * np.sum( [len(meths) for meths in methodsDetailedloc]) for meths in methodsDetailedloc: for meth in meths: if 'k-means' in meth: totalparallel += np.max(Ks) * np.max(Ks) continue io.resetparallelprogress(totalparallel) for l in range(L): # Cache kmeans initialisations for the dataset once to save time: cl.cache_kmeans_init(Xloc[l], Ks, methodsDetailedloc[l], datasetID=l) # Now go to parallel clustering with warnings.catch_warnings(): warnings.simplefilter("ignore") Utmp = Parallel(n_jobs=ncores)\ (delayed(clustDataset) (Xloc[l], Ks[ki], Ds[ki], methodsDetailedloc[l], GDMloc[:, l], Ng, l) for ki in range(NKs)) Utmp = [u for u in Utmp] for ki in range(NKs): Uloc[l, ki] = Utmp[ki] gc.collect() #io.updateparallelprogress(np.sum(Ks) * len(methodsDetailedloc)) else: Uloc = ds.listofarrays2arrayofarrays(U)[setsPN] # Calculate a CoPaM for each dataset at each K CoPaMsFine = np.array([None] * (L * NKs)).reshape([L, NKs]) for l in range(L): for ki in range(NKs): if Utype.lower() == 'pm': CoPaMsFineTmp = [ generateCoPaM(Uloc[l, ki], relabel_technique=relabel_technique, X=[Xloc[l]], w=wmethods[l], K=Ks[ki], GDM=GDMloc[:, l].reshape([-1, 1])) for i in range(CoPaMfinetrials) ] elif Utype.lower() == 'idx': CoPaMsFineTmp = \ [generateCoPaMfromidx(Uloc[l, ki], relabel_technique=relabel_technique, X=Xloc, w=wmethods[l], K=Ks[ki]) for i in range(CoPaMfinetrials)] else: raise ValueError('Invalid Utype') CoPaMsFine[l, ki] = generateCoPaM(CoPaMsFineTmp, relabel_technique=relabel_technique, X=[Xloc[l]], GDM=GDMloc[:, l].reshape([-1, 1])) if dofuzzystretch: CoPaMsFine[l, ki] = fuzzystretch(CoPaMsFine[l, ki]) # Calculate the final CoPaM for each K CoPaMs = np.array([None] * (CoPaMfinaltrials * NKs)).reshape( [CoPaMfinaltrials, NKs]) CoPaMsP = np.array([None] * (CoPaMfinaltrials * NKs)).reshape( [CoPaMfinaltrials, NKs]) CoPaMsN = np.array([None] * (CoPaMfinaltrials * NKs)).reshape( [CoPaMfinaltrials, NKs]) for t in range(CoPaMfinaltrials): for ki in range(NKs): if type == 'A': if Utype.lower() == 'pm': CoPaMs[t, ki] = generateCoPaM( CoPaMsFine[:, ki], relabel_technique=relabel_technique, w=wsets, X=Xloc, GDM=GDMloc) elif Utype.lower() == 'idx': CoPaMs[t, ki] = generateCoPaMfromidx( CoPaMsFine[:, ki], relabel_technique=relabel_technique, X=Xloc, w=wsets, GDM=GDMloc) else: raise ValueError('Invalid Utype') elif type == 'B': if Utype.lower() == 'pm': CoPaMsP[t, ki] = generateCoPaM( CoPaMsFine[setsPloc, ki], relabel_technique=relabel_technique, X=Xloc, w=wsets[setsPloc], GDM=GDMloc[:, setsPloc]) CoPaMsN[t, ki] = generateCoPaM( CoPaMsFine[setsNloc, ki], relabel_technique=relabel_technique, X=Xloc, w=wsets[setsNloc], GDM=GDMloc[:, setsNloc]) elif Utype.lower() == 'idx': CoPaMsP[t, ki] = generateCoPaMfromidx( CoPaMsFine[setsPloc, ki], relabel_technique=relabel_technique, X=Xloc, w=wsets[setsPloc], GDM=GDMloc[:, setsPloc]) CoPaMsN[t, ki] = generateCoPaMfromidx( CoPaMsFine[setsNloc, ki], relabel_technique=relabel_technique, X=Xloc, w=wsets[setsNloc], GDM=GDMloc[:, setsNloc]) else: raise ValueError('Invalid Utype') else: raise ValueError( 'Invalid UNCLES type. It has to be either A or B') # Binarise NPp = len(binarise_paramP) # Number of P params NNp = len(binarise_paramN) # Number of N params if type == 'A': B = np.zeros([CoPaMfinaltrials, NPp, 1, NKs], dtype=object) Mc = np.zeros([CoPaMfinaltrials, NKs], dtype=object) elif type == 'B': B = np.zeros([CoPaMfinaltrials, NPp, NNp, NKs], dtype=object) Mc = np.zeros([CoPaMfinaltrials, NKs], dtype=object) for t in range(CoPaMfinaltrials): for ki in range(NKs): if type == 'A': # Pre-sorting binarisation for p in range(NPp): B[t, p, 0, ki] = binarise(CoPaMs[t, ki], binarise_techniqueP, binarise_paramP[p]) Mc[t, ki] = [np.sum(Bp, axis=0) for Bp in B[t, :, 0, ki]] # Sorting CoPaMs[t, ki] = sortclusters(CoPaMs[t, ki], Mc[t, ki], smallestClusterSize) # Post-sorting binarisation for p in range(NPp): B[t, p, 0, ki] = binarise(CoPaMs[t, ki], binarise_techniqueP, binarise_paramP[p]) Mc[t, ki] = [np.sum(Bp, axis=0) for Bp in B[t, :, 0, ki]] elif type == 'B': # Pre-sorting binarisation BP = [ binarise(CoPaMsP[t, ki], binarise_techniqueP, binarise_paramP[p]) for p in range(NPp) ] McP = [np.sum(BPp, axis=0) for BPp in BP] BN = [ binarise(CoPaMsN[t, ki], binarise_techniqueN, binarise_paramN[p]) for p in range(NNp) ] McN = [np.sum(BNp, axis=0) for BNp in BN] # Sorting CoPaMsP[t, ki] = sortclusters(CoPaMsP[t, ki], McP, smallestClusterSize) CoPaMsN[t, ki] = sortclusters(CoPaMsN[t, ki], McN, smallestClusterSize) # Post-sorting binarisation BP = [ binarise(CoPaMsP[t, ki], binarise_techniqueP, binarise_paramP[p]) for p in range(NPp) ] McP = [np.sum(BPp, axis=0) for BPp in BP] BN = [ binarise(CoPaMsN[t, ki], binarise_techniqueN, binarise_paramN[p]) for p in range(NNp) ] McN = [np.sum(BNp, axis=0) for BNp in BN] # UNCLES B logic for pp in range(NPp): for pn in range(NNp): B[t, pp, pn, ki] = BP[pp] B[t, pp, pn, ki][np.any(BN[pn], axis=1)] = False # Fill Mc Mc[t, ki] = [None] * Ks[ki] for k in range(Ks[ki]): Mc[t, ki][k] = np.zeros([NPp, NNp]) for pp in range(NPp): for pn in range(NNp): Mc[t, ki][k][pp, pn] = np.sum(B[t, pp, pn, ki][:, k]) # Prepare and return the results: params = dict( params, **{ 'methods': methods, 'setsP': setsPloc, 'setsN': setsNloc, 'dofuzzystretch': dofuzzystretch, 'type': type, 'Ks': Ks, 'NKs': NKs, 'wsets': wsets, 'wmethods': wmethods, 'Ds': Ds, 'L': L, 'CoPaMs': CoPaMs, 'smallestclustersize': smallestClusterSize, 'GDM': GDMloc }) UnclesRes = collections.namedtuple('UnclesRes', ['B', 'Mc', 'params', 'X', 'U']) return UnclesRes(B, Mc, params, Xloc, Uloc)
def main(): parser = argparse.ArgumentParser(description="calculate MCD.") parser.add_argument("--conf", type=str, help="configuration file") parser.add_argument("--spkr_conf", type=str, help="speaker configuration file") parser.add_argument( "--featdir", type=str, help="root directory of ground truth h5", ) parser.add_argument("--outwavdir", type=str, help="converted waveform directory") parser.add_argument( "--out", type=str, help="if omitted, then output to sys.stdout", ) parser.add_argument("--n_jobs", default=1, type=int, help="number of parallel jobs") args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) " "%(levelname)s: %(message)s", ) # load configure files conf = load_yaml(args.conf) spkr_conf = load_yaml(args.spkr_conf) # load converted files. If mcep, use h5; else, waveform if conf["output_feat_type"] == "mcep": converted_files = sorted(list(Path(args.outwavdir).glob("*.h5"))) else: converted_files = sorted(list(Path(args.outwavdir).rglob("*.wav"))) logging.info(f"number of utterances = {len(converted_files)}") # load ground truth scp featdir = Path(args.featdir) / conf["feature"]["label"] gt_feats = open_featsscp(featdir / "eval" / "feats.scp") if args.out is None: out = sys.stdout else: out = open(args.out, "w", encoding="utf-8") MCD_list = Parallel(args.n_jobs)([ delayed(calculate)(cv_path, gt_feats, conf, spkr_conf) for cv_path in converted_files ]) # summarize by pair pairwise_MCD = {} for k, v in MCD_list: orgspk, tarspk, _ = k.split("-") pair = orgspk + " " + tarspk if pair not in pairwise_MCD: pairwise_MCD[pair] = [] pairwise_MCD[pair].append(v) for k in sorted(pairwise_MCD.keys()): mcd_list = pairwise_MCD[k] mean_mcd = float(sum(mcd_list) / len(mcd_list)) out.write(f"{k} {mean_mcd:.3f}\n")
def calculate_accuracy( path_embeddings, meter_acc: tnt.meter.ClassErrorMeter, meter_auc: tnt.meter.AUCMeter, type='range', norm='l2', triplet_similarity='cos', mode='cpu', embedding_size=None, class_max_dist=None, # precomputed class_centroids=None, y_list=None, #precumputed sample_count=None, #precomputed paths_embs_idx_path_pairs=None): # precomputed paths_embs = FileUtils.listSubFiles(path_embeddings) # calculate centroids first if class_max_dist is None: class_centroids = {} class_max_dist = {} y_list = [] paths_embs_idx_path_pairs = [] sample_count = 0 for path_emb in paths_embs: if path_emb.endswith('.json'): y_each = int(os.path.basename(path_emb).split('.')[0]) path_emb_json = f'{path_embeddings}/{y_each}.json' path_emb_mem = f'{path_embeddings}/{y_each}.mmap' emb_json = FileUtils.loadJSON(path_emb_json) emb_mem = np.memmap(path_emb_mem, mode='r', dtype=np.float16, shape=(emb_json['count'], embedding_size)) paths_embs_idx_path_pairs.append((sample_count, y_each)) sample_count += emb_json['count'] y_list += (np.ones( (emb_json['count'], ), dtype=np.int) * y_each).tolist() class_centroids[y_each] = np.average(emb_mem, axis=0) if norm == 'l2': class_centroids[y_each] = normalize_vec( class_centroids[y_each]) np_class_centroids_tiled = np.tile(class_centroids[y_each], (len(emb_mem), 1)) list_dists = get_distance(np_class_centroids_tiled, emb_mem, triplet_similarity, mode).tolist() list_dists = sorted(list_dists, reverse=False) list_dists = list_dists[:max( 2, int(len(list_dists) * 0.9) )] # drop 10 top percent embeddings as they could contain noise class_max_dist[y_each] = list_dists[ -1] # last largest distance classes_size = int(np.max(y_list)) + 1 # store distance matrix as memmap for optimization path_dists_mem = f'{path_embeddings}/dists.mmap' is_exist_dists_mem = os.path.exists(path_dists_mem) dists_mem = np.memmap(path_dists_mem, mode='r+' if is_exist_dists_mem else 'w+', dtype=np.float16, shape=(sample_count, classes_size)) #dists_mem.flush() path_centroids_mem = f'{path_embeddings}/dists.mmap' is_exist_centroids_mem = os.path.exists(path_centroids_mem) centroids_mem = np.memmap(path_centroids_mem, mode='r+' if is_exist_centroids_mem else 'w+', dtype=np.float16, shape=(classes_size, embedding_size)) for key, value in class_centroids.items(): centroids_mem[key] = value #centroids_mem.flush() if not is_exist_dists_mem: Parallel(n_jobs=multiprocessing.cpu_count() * 2, backend='threading')( delayed(process_dists)(idx_start, y_each, y_list, path_embeddings, sample_count, classes_size, embedding_size, triplet_similarity, mode) for idx_start, y_each in paths_embs_idx_path_pairs) dists_mem = np.memmap(path_dists_mem, mode='r', dtype=np.float16, shape=(sample_count, classes_size)) # iterate through precomputed distances to add to data to meters for mem optimization chunk_size = 1024 for idx_chunk_start in range(sample_count // chunk_size + 1): idx_chunk_end = min(sample_count, idx_chunk_start + chunk_size) chunk_each_size = idx_chunk_end - idx_chunk_start if chunk_each_size == 0: break if type == 'range': predicted = np.zeros((chunk_each_size, classes_size), dtype=np.float) else: predicted = np.ones( (chunk_each_size, classes_size), dtype=np.float) * 1e9 target = np.zeros((chunk_each_size, classes_size), dtype=np.float) for idx_y in class_max_dist.keys(): max_dist = class_max_dist[idx_y] for idx_class in range(chunk_each_size): target[idx_class, y_list[idx_chunk_start + idx_class]] = 1.0 dists = dists_mem[idx_chunk_start:idx_chunk_end] if type == 'range': for idx_emb, dist in enumerate(dists): if max_dist > dist[idx_y]: predicted[idx_emb, idx_y] += 1.0 else: predicted[:, idx_y] = np.minimum( predicted[:, idx_y], dists[:, idx_y] ) # store for each class closest embedding with distance value if type == 'range': predicted = predicted / (np.sum(predicted, axis=1, keepdims=True) + 1e-18) else: # TODO softmax/hardmax based accuracy idx_class = np.argmin( predicted, axis=1) # for each sample select closest distance predicted = np.zeros_like(predicted) # init probabilities vector predicted[ np.arange(predicted.shape[0]), idx_class] = 1.0 # for each sample set prob 100% by columns y_chunk = np.array(y_list[idx_chunk_start:idx_chunk_end]) meter_acc.add(predicted, y_chunk) # AssertionError: targets should be binary (0, 1) idxes_classes = np.argmax(predicted, axis=1) target_tp = np.array(np.equal(y_chunk, idxes_classes), dtype=np.int) meter_auc.add(np.max(predicted, axis=1), target_tp) return class_max_dist, class_centroids, y_list, sample_count, paths_embs_idx_path_pairs
for lack in lack_list: sub_dir = 'tmp_L{:d}/'.format(lack) source = np.load(out_dir + sub_dir + "{:0>3d}_tmp_cat.npy".format(index)).item() beam = np.load(beam_dir + "{:d}d_beam_sigma{:d}.npy".format(int(dim - lack), sigma)) #========================================================================================= start = time.time() #after_smooth = dict() for i, key in enumerate(source.keys()): #========================================================= # Percentage Indicator if i % 100 == 0: print('Now: ' + str(float(i) / len(source) * 100) + '%') #========================================================= # Do Gaussian Smooth gal_pos = list(key) if gal_pos.count("Lack") <= (len(shape) - 3): gal_pos_array = np.asarray(gal_pos) gal_pos_array_str = np.array(gal_pos_array, dtype=str) no_lack_ind = np.where(gal_pos_array_str != "Lack")[0] Parallel(n_jobs=10)(delayed(gaussian_smooth)( pos, gal_pos_array, gal_pos_array_str, new_key, no_lack_ind) for pos in beam) break end = time.time() print("Saving result ...\n") chdir(out_dir + sub_dir) #np.save("{:0>3d}_{:d}d_after_smooth".format(int(index), int(dim-lack)), np.array(after_smooth)) print("Gaussian Smooth took {:.3f} secs\n".format(end - start))
# how many systems to probe and average over systems = 1000 start = 220000 # look at the same examples as the network test dataset #loop over all global optimization configurations for i in range(len(ftype)): #print a nice headder for the log file print('\n\n', '-' * 10, 'Starting, FT=%d, NG=%d' % (ftype[i], nglob[i]), '-' * 10) sta = datetime.datetime.now() print(sta) #parallel call #parallelized by system, so each system runs independently on a core results = Parallel(n_jobs=cores)( delayed(gen_fcn_pd)(g, ftype[i], nglob[i]) for g in tqdm(range(start, start + systems))) end = datetime.datetime.now() #system runtime per system #particular to the number of parallel calls print('Runtime:', end - sta) sys_runtime = (end - sta) / systems print('Per System:', sys_runtime) #calcuate the metrics to find the algorithm optimzaiton performance in each configuration results = np.array(results) #calculate RMSE thickness and materials accuracy metrics metrics = accutest( np.transpose( np.array([ np.argmax(tm1[start:start + systems, :], axis=1),
parser.add_argument("-d", "--dstdir", type=str, help="dst image folder") parser.add_argument("-n", "--n_jobs", type=int, default=30, help="parallel jobs") parser.add_argument("-p", "--parallel", action='store_true', default=False, help="if parallel") args = parser.parse_args() srcdir = args.srcdir dstdir = args.dstdir n_jobs = args.n_jobs parallel = args.parallel def squeeze_along_z(filename): print(filename) srcpath = os.path.join(srcdir, filename) dstpath = os.path.join(dstdir, filename) npsrc, header = nrrd.read(srcpath) npsrc = npsrc.astype(np.float16) npdst = np.average(npsrc, axis=2) npdst = npdst.astype(np.int32) nrrd.write(dstpath, npdst) filelist = os.listdir(srcdir) if parallel: Parallel(n_jobs=n_jobs, backend="multiprocessing")( delayed(squeeze_along_z)(filename) for filename in filelist) else: for filename in filelist: squeeze_along_z(filename)
#except: # logger.error("Excel conversion script failed") # sys.exit(1) end_time_report_seconds = time.time() report_seconds = [end_time_report_seconds, -start_time_report_seconds] time_report_seconds = sum(report_seconds) #few remaning things: emailing, confluence page adding attachements etc. #few remaning things: emailing, confluence page adding attachements etc. start_time_compression = time.time() if multiload == True: #joblib_method = "processes" joblib_method = "threads" if verbose == True: joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.outArchiveV)('archiving in parallel (' + joblib_method + '): ' + config.cyan + report_file, report_file, env, new_tmp) for report_file in report_output_list ) else: joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.outArchive)('archiving in parallel (' + joblib_method + '): ' + config.cyan + report_file, report_file, env, new_tmp) for report_file in report_output_list ) if nobteq == True: logger.warning('log archiving will be omitted') else: if verbose == True: joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.logArchiveV)('archiving in parallel (' + joblib_method + '): ' + config.cyan + log_file, log_file, new_log) for log_file in log_file_list ) else: joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.logArchive)('archiving in parallel (' + joblib_method + '): ' + config.cyan + log_file, log_file, new_log) for log_file in log_file_list ) else: for report_file in report_output_list: if verbose == True: config.outArchiveV('archiving (same process/thread): ' + config.cyan + report_file, report_file, env, new_tmp) else: config.outArchive('archiving (same process/thread): ' + config.cyan + report_file, report_file, env, new_tmp)
if load_pr: print("Loading existing precision/recall info") existing = { model for model in models if os.path.isfile(pr_file.format(model)) } for model in existing: with open(pr_file.format(model), 'rb') as f: info[model] = pickle.load(f) models = list(set(models) - existing) print("Computing precision/recall") output = [ x for x in Parallel(n_jobs=-1)( delayed(_process_file)(pred_dir, model, file) for model in models for file in os.listdir(os.path.join(pred_dir, model))) if x is not None ] for file in output: info[file.model].append(file) print(info.keys()) # FIXME: Why is this not working? # Save precision and recall #for model in models: # with open(pr_file.format(model), 'wb') as f: # pickle.dump(info[model], f) print("Computing means and stds") with open(f1_file, 'w') as score:
def overlap(p, xt, yt, diat, rott, chord, B, x0, y0, dia, Vinf, pointcalc, param=None, veltype='ind', integration='gskr'): """ Calculating wake velocities around a turbine based on wake overlap from surrounding turbines (using the 21-point Gauss-Kronrod rule quadrature integration; Simpson's rule integration can be used via VAWT_Wake_Model.f90) Parameters ---------- p : int number of points to calculate the velocity around a turbine (typically 36) xt : array downstream positions of surrounding turbine(s) in flow domain (m) yt : array lateral position of surrounding turbine(s) in flow domain (m) diat : array diameters of surrounding turbines (m) rott : array rotation rates of surrounding turbines (rad/s) chord : float chord length of the turbines (m) B : int number of turbine blades x0 : float downstream position in flow domain of turbine to be calculated (m) y0 : float lateral position in flow domain of turbine to be calculated (m) dia : float diameter of turbine to be calculated (m) Vinf : float free stream velocity (m/s) pointcalc : bool calculate the overlap at a point (True) or at p points around the blade flight path (False) param : array the coefficients used for the EMG distributions ('None' will provide the published coefficients automatically) veltype : string the type of velocity to calculate ('all': velocity magnitude, 'x': x-induced velocity, 'y': y-induced velocity, 'ind': vector of both x- and y-induced velocities without free stream, 'vort': vorticity profile neglecting integration) integration : string the type of integration method used ('simp': Simpson's Rule, 'gskr': 21 Point Gauss-Kronrod Rule) m : int the number of downstream divisions requested for Simpson's Rule (must be divisible by 2); neglected otherwise n : int the number of downstream divisions requested for Simpson's Rule (must be divisible by 2); neglected otherwise Returns ---------- velx : array final induced x-velocity at each point around the turbine being calculated (m/s) vely : array final induced y-velocity at each point around the turbine being calculated (m/s) """ # initializing local variables and arrays t = np.size(xt) # number of turbines xd = np.zeros(p) yd = np.zeros(p) velx = np.zeros(p) vely = np.zeros(p) velx_int = np.zeros(p) vely_int = np.zeros(p) # Use parallelization (with joblib) parallel = True # parallel = False # finding points around the flight path of the blades for i in range(p): if pointcalc == False: theta = (2.0 * pi / p) * i - (2.0 * pi / p) / 2.0 xd[i] = x0 - sin(theta) * (dia / 2.0) yd[i] = y0 + cos(theta) * (dia / 2.0) elif pointcalc == True: xd[0] = x0 yd[0] = y0 intex = np.zeros(p) intey = np.zeros(p) if (t == 1): # coupled configuration (only two VAWTs) if pointcalc == False: if parallel == True: wake = Parallel(n_jobs=-1)(delayed(velocity_field)( xt[0], yt[0], xd[j], yd[j], Vinf, diat[0], rott[0], chord, B, param, veltype, integration) for j in range(p)) for i in range(p): velx[i] = wake[i][0] * Vinf vely[i] = wake[i][1] * Vinf elif parallel == False: for j in range(p): wake = velocity_field(xt[0], yt[0], xd[j], yd[j], Vinf, diat[0], rott[0], chord, B, param, veltype, integration) velx[j] = wake[0] * Vinf vely[j] = wake[1] * Vinf elif pointcalc == True: wake = velocity_field(xt[0], yt[0], xd[0], yd[0], Vinf, diat[0], rott[0], chord, B, param, veltype, integration) velx[0] = wake[0] * Vinf vely[0] = wake[1] * Vinf else: # multiple turbine wake overlap if pointcalc == False: if parallel == True: wake = Parallel(n_jobs=-1)(delayed(velocity_field)( xt[w], yt[w], xd[q], yd[q], Vinf, diat[w], rott[w], chord, B, param, veltype, integration) for w in range(t) for q in range(p)) for j in range(t): for k in range(p): if parallel == True: velx_int[k] = -wake[k + j * p][0] vely_int[k] = wake[k + j * p][1] elif parallel == False: wake = velocity_field(xt[j], yt[j], xd[k], yd[k], Vinf, diat[j], rott[j], chord, B, param, veltype, integration) velx_int[k] = -wake[0] vely_int[k] = wake[1] # sum of squares of velocity deficits if (velx_int[k] >= 0.0): intex[k] = intex[k] + (velx_int[k])**2 else: intex[k] = intex[k] - (velx_int[k])**2 if (vely_int[k] >= 0.0): intey[k] = intey[k] + (vely_int[k])**2 else: intey[k] = intey[k] - (vely_int[k])**2 elif pointcalc == True: for j in range(t): wake = velocity_field(xt[j], yt[j], xd[0], yd[0], Vinf, diat[j], rott[j], chord, B, param, veltype, integration) velx_int[0] = -wake[0] vely_int[0] = wake[1] # sum of squares of velocity deficits if (velx_int[0] >= 0.0): intex[0] = intex[0] + (velx_int[0])**2 else: intex[0] = intex[0] - (velx_int[0])**2 if (vely_int[0] >= 0.0): intey[0] = intey[0] + (vely_int[0])**2 else: intey[0] = intey[0] - (vely_int[0])**2 # square root of sum of squares for l in range(p): if (intex[l] >= 0.0): velx[l] = -Vinf * (sqrt(intex[l])) else: velx[l] = Vinf * (sqrt(fabs(intex[l]))) if (intey[l] >= 0.0): vely[l] = Vinf * (sqrt(intey[l])) else: vely[l] = -Vinf * (sqrt(fabs(intey[l]))) return velx, vely
for ev in env: writeCache(ev, cache) cache = {} cnt += 1 for ev in env: writeCache(ev, cache) for iter in range((max_n_synth // 1000000) + 1): if iter == 0: n_imgs = max_n_synth % 1000000 else: n_imgs = 1000000 n_jobs = torch.cuda.device_count() kwargs_gen = (dict(device=i, nsamples=int(n_imgs / n_jobs)) for i in range(n_jobs)) data = Parallel(n_jobs=n_jobs)(delayed(GenImgs)(**kwargs) for kwargs in kwargs_gen) for d in data: for i in tqdm(range(len(d[0]))): imageKey = 'image-%09d' % cnt labelKey = 'label-%09d' % cnt cache[imageKey] = d[0][i] cache[labelKey] = d[1][i] if (cnt - cnt_orig) % 1000 == 0: for n in range(len(n_synth)): if n_synth[n] >= (cnt - cnt_orig): writeCache(env[n], cache) cache = {} cnt += 1
def load(self): cols = [] with open(self.file, 'r') as f: cols = f.readline().replace('\n', '').split(self.sep) for col in self.drop_col: if col in cols: cols.remove(col) df = pd.read_csv(self.file, sep=self.sep, usecols=cols) attributes = list(df.keys()) attributes.remove(self.tid_col) if self.label_col and self.label_col != self.tid_col: attributes.remove(self.label_col) lat_lon = self.lat in attributes and self.lon in attributes if lat_lon: attributes.remove(self.lat) attributes.remove(self.lon) tids = sorted(df[self.tid_col].unique()) def load_tids(s): ret = [] for idx in range(s.start, s.stop): tid = tids[idx] traj = df.loc[df[self.tid_col] == tid, attributes].values if lat_lon: loc = df.loc[df[self.tid_col] == tid, [self.lat, self.lon]].values new_traj = [] for i, _ in enumerate(loc): point = list(traj[i]) point.append(loc[i]) new_traj.append(point) traj = new_traj ret.append(traj) return ret labels = None func = delayed(load_tids) data = Parallel(n_jobs=self.n_jobs, verbose=0)( func(s) for s in gen_even_slices(len(tids), self.n_jobs)) data = np.concatenate(data) if self.label_col: labels = df \ .drop_duplicates(subset=[self.tid_col, self.label_col], inplace=False) \ .sort_values(self.tid_col, ascending=True, inplace=False)[self.label_col].values if lat_lon: attributes.append('lat_lon') return TrajectoryData(attributes=attributes, data=data, tids=tids, labels=labels)
def optimize( self, func, # type: ObjectiveFuncType n_trials=None, # type: Optional[int] timeout=None, # type: Optional[float] n_jobs=1, # type: int catch=(), # type: Union[Tuple[()], Tuple[Type[Exception]]] callbacks=None, # type: Optional[List[Callable[[Study, structs.FrozenTrial], None]]] gc_after_trial=True # type: bool ): # type: (...) -> None """Optimize an objective function. Args: func: A callable that implements objective function. n_trials: The number of trials. If this argument is set to :obj:`None`, there is no limitation on the number of trials. If :obj:`timeout` is also set to :obj:`None`, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM. timeout: Stop study after the given number of second(s). If this argument is set to :obj:`None`, the study is executed without time limitation. If :obj:`n_trials` is also set to :obj:`None`, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM. n_jobs: The number of parallel jobs. If this argument is set to :obj:`-1`, the number is set to CPU count. catch: A study continues to run even when a trial raises one of the exceptions specified in this argument. Default is an empty tuple, i.e. the study will stop for any exception except for :class:`~optuna.exceptions.TrialPruned`. callbacks: List of callback functions that are invoked at the end of each trial. gc_after_trial: Flag to execute garbage collection at the end of each trial. By default, garbage collection is enabled, just in case. You can turn it off with this argument if memory is safely managed in your objective function. """ if not isinstance(catch, tuple): raise TypeError( "The catch argument is of type \'{}\' but must be a tuple.". format(type(catch).__name__)) if not self._optimize_lock.acquire(False): raise RuntimeError( "Nested invocation of `Study.optimize` method isn't allowed.") try: if n_jobs == 1: self._optimize_sequential(func, n_trials, timeout, catch, callbacks, gc_after_trial, None) else: time_start = datetime.datetime.now() if n_trials is not None: _iter = iter(range(n_trials)) elif timeout is not None: # This is needed for mypy actual_timeout = timeout # type: float _iter = iter( lambda: (datetime.datetime.now() - time_start). total_seconds() > actual_timeout, True) else: # The following expression makes an iterator that never ends. _iter = iter(int, 1) with Parallel(n_jobs=n_jobs, prefer="threads") as parallel: if not isinstance(parallel._backend, joblib.parallel.ThreadingBackend) and \ isinstance(self._storage, storages.InMemoryStorage): msg = 'The default storage cannot be shared by multiple processes. ' \ 'Please use an RDB (RDBStorage) when you use joblib for ' \ 'multi-processing. The usage of RDBStorage can be found in ' \ 'https://optuna.readthedocs.io/en/stable/tutorial/rdb.html.' warnings.warn(msg, UserWarning) _logger.warning(msg) parallel( delayed(self._optimize_sequential) (func, 1, timeout, catch, callbacks, gc_after_trial, time_start) for _ in _iter) finally: self._optimize_lock.release()
def parallel_df(func, df, series): n_jobs = min(cpu_count(), len(df.columns)) col_chunks = np.array_split(range(len(df.columns)), n_jobs) lst = Parallel(n_jobs=n_jobs)(delayed(func)(df.iloc[:, col_chunk], series) for col_chunk in col_chunks) return pd.concat(lst)
pass #added data = data_new #added batches = [data[i:i + 1] for i in xrange(0, len(data))] dataset = MolTreeDataset(batches, vocab, assm=False) loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=lambda x: x[0]) torch.manual_seed(args.seed) def helper(batch): mol_batch = batch[0] x_tree_vecs, _, x_mol_vecs = model.encode(batch[1], batch[2]) assert x_tree_vecs.size(0) == x_mol_vecs.size(0) for k in xrange(args.num_decode): z_tree_vecs, z_mol_vecs = model.fuse_noise(x_tree_vecs, x_mol_vecs) smiles = mol_batch[0].smiles new_smiles = model.decode(z_tree_vecs[0].unsqueeze(0), z_mol_vecs[0].unsqueeze(0)) if new_smiles != None: print smiles, new_smiles Parallel(n_jobs=args.n_core)(delayed(helper)(batch) for batch in loader) #for batch in loader:
ax = fig.add_subplot(gs[1,2:4]) ax.plot(ph*24, f, '.k', ms=5) print(ph.max(), ph.min()) ax.set_xlim(-48*dur, 48*dur) ax.set_ylim(1-1.5*depth, 1+5e-3) ax.set_title(r''+ targetfile + r' $P=%.5f$' % period + r' SNR=%f' % snr) ax.set_xlabel('Hours from mid-transit') ax.set_ylabel('Normalized flux') plt.show() else: from joblib import Parallel, delayed, Memory #memory = Memory('./cachedir', verbose=0) #costoso = memory.cache(run_BLS) allfiles = glob.glob(folder + 'TIC*.dat') #results = np.memmap('temp.npz', dtype='float32', mode='w+', shape=(len(allfiles),9)) #results = np.array(Parallel(n_jobs=args.ncpu, verbose=0)(delayed(costoso)(f) for f in tqdm(allfiles))) results = np.array(Parallel(n_jobs=args.ncpu, verbose=0)(delayed(run_BLS)(f) for f in tqdm(allfiles))) order = np.argsort(results[:,5])[::-1] results = results[order] print(results) np.savetxt(args.output, results, fmt='%s')
# for x in range(len(f)): # For every row in the constant image # for y in range(len(f[x])): # For every column in that row # try: # val_a = f[x][y] # except IndexError: # val_a = 1 # try: # val_b = e[x + offset_x][y + offset_y] # except IndexError: # val_b = 1 # val = min(val_a, val_b) # if smallest_row is None and val < THRESHOLD: # smallest_row = (x,y) # if val < THRESHOLD and y < smallest_col[1]: # smallest_col = (x,y) # if val < THRESHOLD: # greatest_row = (x,y) # if val < THRESHOLD and y > greatest_col[1]: # greatest_col = (x,y) # f[x][y] = val # Record the super-imposed value into f # config = Configuration(smallest_row,smallest_col,greatest_row,greatest_col,f,e,j) # Creates a new configuration # if configurations is None or config < configurations: # configurations = config # return configurations Parallel(n_jobs=2)(delayed(analyze)( c, skimage.color.rgb2gray( skimage.io.imread('images_0-3_jpg/{}'.format(img))), img) for img in directory)
#nmax = 40000 #Número máximo hasta el que queremos buscar primos nmax = 400000 #Número máximo hasta el que queremos buscar primos inputs = range(0, nmax) def isPrime(num): if num < 1: return False elif num == 2: return True else: for i in range(2, num): if num % i == 0: return False return True starttime = time.time() num_cores = multiprocessing.cpu_count() #Si accedemos a zonas de memoria comunes (listas diccionarios...) , require='sharedmem' #pero el rendimiento cae mucho: results = Parallel(n_jobs=num_cores )(delayed(isPrime)(i) for i in inputs) print ('encontrados en si %s' % results.count(True)) print ('encontrados en no %s' % results.count(False)) print('That took {} seconds'.format(time.time() - starttime))
def generate(self, n_generations=100, population_size=50, individual_size=10, monitor=None): log.info( f'Starting working. n_generations {n_generations}, population_size {population_size}, individual_size {individual_size}' ) parallel_backend = None try: if population_size != 1: parallel_backend = parallel_backend( 'threading', n_jobs=self.n_parallel_jobs) population = self.toolbox.population( size=population_size, individual_size=individual_size) # Evaluate the entire population #fitnesses = map(toolbox.evaluate, population) fitnesses = Parallel()(delayed(self.toolbox.evaluate)(individual) for individual in population) for ind, fit in zip(population, fitnesses): ind.fitness.values = fit for g in range(n_generations): log.debug(f'Running generation {g}') # Select the next generation individuals offspring = self.toolbox.select(population, len(population)) log.debug(f'g{g} offspring') # Clone the selected individuals offspring = [self.toolbox.clone(o) for o in offspring] #offspring = map(toolbox.clone, offspring) log.debug(f'g{g} clone') if (self.crossover_prob > 0): # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < crossover_prob: self.toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values log.debug(f'g{g} crossover') if (self.mutation_prob > 0): for mutant in offspring: if random.random() < self.mutation_prob: self.toolbox.mutate(mutant, self.drawing_problem) del mutant.fitness.values log.debug(f'g{g} mutation') # Evaluate the individuals with an invalid fitness invalid_ind = [ ind for ind in offspring if not ind.fitness.valid ] #fitnesses = map(toolbox.evaluate, invalid_ind) fitnesses = Parallel()( delayed(self.toolbox.evaluate)(individual) for individual in invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit # The population is entirely replaced by the offspring population[:] = offspring log.debug(f'{g} final population') if monitor is not None: monitor.submit(population) return population finally: if parallel_backend is not None: parallel_backend.close()
def setupFeeds(check_update, documents): for item, doc in documents.items(): if "netset" in item or "ipset" in item: iUpdateHrs = update_interval / 60 / 60 Parallel(n_jobs=multiprocessing.cpu_count(), prefer='threads')( delayed(GetFireHoleLists)(check_update, itm) for itm in doc)
def get_bboxes_val(annotations_dir): bboxes = [] videos = os.listdir(annotations_dir) bboxes = sum(Parallel(n_jobs=8)(delayed(get_folder_bndbox)(annotations_dir, video, 'val') for video in tqdm(videos, total=len(videos), file=sys.stdout)), []) return bboxes
if bSkipFeeds: print("Fetching new and updated feeds... [Update older than 24 hrs]") setupFeeds(args.skip_update, documents) LoadFeeds(documents) if getFQDN: print("Note: Hostname lookups will increase processing time.") lstColumns = ["IP, City, Country, ASN, ASN Org, FQDN, Indicators"] lstResults = [] if args.file: with open(args.file, "r", encoding='utf-8') as f: lstResults = Parallel(n_jobs=multiprocessing.cpu_count(), prefer='threads')(delayed(ipProcess)(ip) for ip in f) elif args.ip: lstResults.append(ipProcess(args.ip.rstrip())) else: print("Provide an ip or file to process...") #Remove skipped lines that didn't have threat feed hits if (bHitsOnly): lstResults = [i for i in lstResults if i] #Output results print("\r\n") print("\r\n".join(lstColumns)) print("\r\n".join(lstResults))
def remember(experiment, occlusion=None, bars_type=None, tolerance=0): """ Creates images from features. Uses the decoder part of the neural networks to (re)create images from features. Parameters ---------- experiment : TYPE DESCRIPTION. occlusion : TYPE, optional DESCRIPTION. The default is None. tolerance : TYPE, optional DESCRIPTION. The default is 0. Returns ------- None. """ for i in range(constants.training_stages): testing_data_filename = constants.data_name + constants.testing_suffix testing_data_filename = constants.data_filename( testing_data_filename, i) testing_features_filename = constants.features_name( experiment, occlusion, bars_type) + constants.testing_suffix testing_features_filename = constants.data_filename( testing_features_filename, i) testing_labels_filename = constants.labels_name + constants.testing_suffix testing_labels_filename = constants.data_filename( testing_labels_filename, i) memories_filename = constants.memories_name(experiment, occlusion, bars_type, tolerance) memories_filename = constants.data_filename(memories_filename, i) labels_filename = constants.labels_name + constants.memory_suffix labels_filename = constants.data_filename(labels_filename, i) model_filename = constants.model_filename(constants.model_name, i) testing_data = np.load(testing_data_filename) testing_features = np.load(testing_features_filename) testing_labels = np.load(testing_labels_filename) memories = np.load(memories_filename) labels = np.load(labels_filename) model = tf.keras.models.load_model(model_filename) # Drop the classifier. autoencoder = Model(model.input, model.output[1]) autoencoder.summary() # Drop the encoder input_mem = Input(shape=(constants.domain, )) decoded = get_decoder(input_mem) decoder = Model(inputs=input_mem, outputs=decoded) decoder.summary() for dlayer, alayer in zip(decoder.layers[1:], autoencoder.layers[31:]): dlayer.set_weights(alayer.get_weights()) produced_images = decoder.predict(testing_features) n = len(testing_labels) Parallel(n_jobs=constants.n_jobs, verbose=5)(delayed(store_images)( original, produced, constants.testing_directory(experiment, occlusion, bars_type), i, j, label) for (j, original, produced, label) in zip( range(n), testing_data, produced_images, testing_labels)) total = len(memories) steps = len(constants.memory_fills) step_size = int(total / steps) for j in range(steps): print('Decoding memory size ' + str(j) + ' and stage ' + str(i)) start = j * step_size end = start + step_size mem_data = memories[start:end] mem_labels = labels[start:end] produced_images = decoder.predict(mem_data) Parallel(n_jobs=constants.n_jobs, verbose=5)( delayed(store_memories)(label, produced, features, constants.memories_directory( experiment, occlusion, bars_type, tolerance), i, j) for (produced, features, label) in zip(produced_images, mem_data, mem_labels))
# threshold_vec=np.arange(0,.3,0.1) num_cores = multiprocessing.cpu_count() # volumes=Parallel(n_jobs=num_cores)( # delayed(read_image)(path_fuzzy=path_fuzzy[i], # path_gt=path_gt[i],i=i) # for i in range(len(path_fuzzy)))# res_all = [] for cntr in range(len(path_fuzzy)): xsl_nm = test_path + out_dir + str.split( str.split(path_gt[cntr], '/')[-1], '_gtv.mha')[0] + '.xlsx' [logits, labels] = read_imgs(path_fuzzy[cntr], path_gt[cntr]) res = Parallel(n_jobs=num_cores)( delayed(tp_tn_fp_fn)(logits=logits, labels=labels, threshold=threshold_vec[i], path_fuzzy=path_fuzzy[cntr], cntr=cntr) for i in range(len(threshold_vec))) df = pd.DataFrame(res, columns=pd.Index( ['name', 'threshold', 'TP', 'TN', 'FP', 'FN'], name='Genus')) # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter(xsl_nm, engine='xlsxwriter') # Convert the dataframe to an XlsxWriter Excel object. df.to_excel(writer, sheet_name='Sheet1') # Close the Pandas Excel writer and output the Excel file.
y.sort() # Uncomment the next two lines if you want to sparsify the plot further. # y = y[0::500] # x = x[0::500] plt.scatter(x, y, marker='.') # plt.show() plt.xlabel("Kth nearest Neighbour") plt.ylabel("Distance") plt.savefig(filename + ".png") plt.clf() return files = [] for file in glob.glob("output_twitter.txt*"): files.append(file) files.sort() print files num_cores = 8 results = Parallel(n_jobs=num_cores)(delayed(processInput)(file) for file in files)
def Ridge_OptimalAlpha_KFold(Training_Data, Training_Score, Fold_Quantity, Alpha_Range, ResultantFolder, Parallel_Quantity): Subjects_Quantity = len(Training_Score) Sorted_Index = np.argsort(Training_Score) Training_Data = Training_Data[Sorted_Index, :] Training_Score = Training_Score[Sorted_Index] Inner_EachFold_Size = np.int( np.fix(np.divide(Subjects_Quantity, Fold_Quantity))) MaxSize = Inner_EachFold_Size * Fold_Quantity EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize tmp = np.arange(Fold_Quantity - 1, -1, -1) EachFold_Max = EachFold_Max - tmp Remain = np.mod(Subjects_Quantity, Fold_Quantity) for j in np.arange(Remain): EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity print(Alpha_Range) Inner_Corr = np.zeros((Fold_Quantity, len(Alpha_Range))) Inner_MAE_inv = np.zeros((Fold_Quantity, len(Alpha_Range))) Alpha_Quantity = len(Alpha_Range) for k in np.arange(Fold_Quantity): Inner_Fold_K_Index = np.arange(k, EachFold_Max[k], Fold_Quantity) Inner_Fold_K_Data_test = Training_Data[Inner_Fold_K_Index, :] Inner_Fold_K_Score_test = Training_Score[Inner_Fold_K_Index] Inner_Fold_K_Data_train = np.delete(Training_Data, Inner_Fold_K_Index, axis=0) Inner_Fold_K_Score_train = np.delete(Training_Score, Inner_Fold_K_Index) Parallel(n_jobs=Parallel_Quantity, backend="threading")(delayed(Ridge_SubAlpha)( Inner_Fold_K_Data_train, Inner_Fold_K_Score_train, Inner_Fold_K_Data_test, Inner_Fold_K_Score_test, Alpha_Range[l], l, ResultantFolder) for l in np.arange(len(Alpha_Range))) for l in np.arange(Alpha_Quantity): print(l) Alpha_l_Mat_Path = ResultantFolder + '/Alpha_' + str(l) + '.mat' Alpha_l_Mat = sio.loadmat(Alpha_l_Mat_Path) Inner_Corr[k, l] = Alpha_l_Mat['Corr'][0][0] Inner_MAE_inv[k, l] = Alpha_l_Mat['MAE_inv'] os.remove(Alpha_l_Mat_Path) Inner_Corr = np.nan_to_num(Inner_Corr) Inner_Corr_Mean = np.mean(Inner_Corr, axis=0) Inner_Corr_Mean = (Inner_Corr_Mean - np.mean(Inner_Corr_Mean)) / np.std(Inner_Corr_Mean) Inner_MAE_inv_Mean = np.mean(Inner_MAE_inv, axis=0) Inner_MAE_inv_Mean = (Inner_MAE_inv_Mean - np.mean(Inner_MAE_inv_Mean) ) / np.std(Inner_MAE_inv_Mean) Inner_Evaluation = Inner_Corr_Mean + Inner_MAE_inv_Mean Inner_Evaluation_Mat = { 'Inner_Corr': Inner_Corr, 'Inner_MAE_inv': Inner_MAE_inv, 'Inner_Evaluation': Inner_Evaluation } sio.savemat(ResultantFolder + '/Inner_Evaluation.mat', Inner_Evaluation_Mat) Optimal_Alpha_Index = np.argmax(Inner_Evaluation) Optimal_Alpha = Alpha_Range[Optimal_Alpha_Index] return (Optimal_Alpha, Inner_Corr, Inner_MAE_inv)
for subset in ['images', 'instances', 'labels']: try: os.mkdir(os.path.join(data_path, image_set + '_new', subset)) except: pass for image_set in ['training', 'validation']: for subset in ['images', 'instances', 'labels']: target_path = os.path.join(data_path, image_set, subset) file_names = os.listdir(target_path) file_names = [ os.path.join(target_path, file_name) for file_name in file_names ] result = Parallel(n_jobs=8)(delayed(downscale)(r) for r in file_names) for image_set in ['testing']: for subset in ['images']: target_path = os.path.join(data_path, image_set, subset) file_names = os.listdir(target_path) file_names = [ os.path.join(target_path, file_name) for file_name in file_names ] result = Parallel(n_jobs=8)(delayed(downscale)(r) for r in file_names)
if is_timeseries_split: estimated_rewards_by_reg_model = reg_model.predict( context=bandit_feedback["context_test"], ) else: estimated_rewards_by_reg_model = reg_model.predict( context=bandit_feedback["context"][~is_for_reg_model], ) performance_reg_model_b = evaluate_reg_model( bandit_feedback=bandit_feedback, is_timeseries_split=is_timeseries_split, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, is_for_reg_model=is_for_reg_model, ) return performance_reg_model_b processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)( [delayed(process)(i) for i in np.arange(n_runs)] ) # save performance of the regression model in './logs' directory. if not is_mrdr: performance_reg_model = {metric: dict() for metric in ["auc", "rce"]} for b, performance_reg_model_b in enumerate(processed): for metric, metric_value in performance_reg_model_b.items(): performance_reg_model[metric][b] = metric_value DataFrame(performance_reg_model).describe().T.round(6).to_csv( log_path / f"performance_reg_model.csv" )