def report_perf(self, optimizer, X, y, title, callbacks=None): """ optimizer = a sklearn or a skopt optimizer X = the training set y = our target title = a string label for the experiment """ start = time.time() start_cpu = time.process_time() if callbacks: mprint(f'start tuning {title}...') optimizer.fit(X, y, callback=callbacks) else: mprint(f'start tuning {title}...') optimizer.fit(X, y) time_cost_CPU = time.process_time() - start_cpu time_cost = time.time() - start result = {} result['best_score'] = optimizer.best_score_ result['best_score_std'] = optimizer.get_cv_results_( )['std_test_score'][optimizer.best_index_] result['best_parmas'] = optimizer.best_params_["model"] result['params'] = optimizer.get_cv_results_()['params'] result['CPU_Time'] = round(time_cost_CPU, 0) result['Time_cost'] = round(time_cost, 0) result['all_cv_results'] = optimizer.get_cv_results_( )['mean_test_score'][:] result['CV'] = optimizer.get_cv_results_() print("") time_cost_CPU = round(result['CPU_Time'], 0) time_cost = round(result['Time_cost'], 0) cand = len(result['all_cv_results']) best_cv = round(result['best_score'], 8) best_cv_sd = round(result['best_score_std'], 4) print( f'took CPU Time: {time_cost_CPU}s,clock time: {time_cost}s, candidates checked:{cand} ,best CV score: {best_cv} \u00B1 {best_cv_sd}' ) print("") return result
NORTH = 0 SOUTH = 1 EAST = 2 WEST = 3 if __name__ == "__main__": comm = MPI.COMM_WORLD mpi_rows = int(np.floor(np.sqrt(comm.size))) mpi_cols = comm.size // mpi_rows if mpi_rows * mpi_cols > comm.size: mpi_cols -= 1 if mpi_rows * mpi_cols > comm.size: mpi_rows -= 1 mprint("=" * 78) mprint("Running %d parallel processes (ranks)" % (comm.size)) mprint("Creating a %d x %d processor grid..." % (mpi_rows, mpi_cols)) # Create a 2d cartesian grid with periodic boundary conditions ccomm = comm.Create_cart((mpi_rows, mpi_cols), periods=(True, True), reorder=True) my_mpi_row, my_mpi_col = ccomm.Get_coords(ccomm.rank) # Identifiy our neighbours on the grid neigh = [0, 0, 0, 0] neigh[NORTH], neigh[SOUTH] = ccomm.Shift(0, 1) neigh[EAST], neigh[WEST] = ccomm.Shift(1, 1)
def optimise_step(self, df_train, df_target, npoints=1, nrandom=1, n_iter=50, set_callbacks=True): """Evaluates the data. Build the pipeline. If no parameters are set, default configuration for each step is used Parameters ---------- space : dict, default = None. df_train : pandas dataframe of shape = (n_train, n_features) The train dataset with numerical features. y_train : pandas series of shape = (n_train,) The numerical encoded target for classification tasks. max_evals : int, default = 20, max evaluation times set_callbacks (opt): bool,default: True If callable then callback(res) is called after each call to func. If list of callables, then each callable in the list is called. ---------- Returns --------- result : dict - result['best_score'] : Best Score after Tuning - result['best_score_std'] : Standar Divation of best score - result['best_parmas'] : Best parameters - result['params'] : all paramsters (# = checked candicated) - result['time_cost(s)'] : total time of finding out the best parameters - result['all_cv_results'] : all cv results - result['mean_score_time'] : time for each cv result """ # checke parallel strategy ce = Categorical_encoder() X = ce.fit_transform(df_train, df_target) if len(df_train.dtypes[df_train.dtypes == 'float'].index) != 0: scal = Scaler() X = scal.fit_transform(X, df_target) self.perform_scaling is True else: pass mid_result = {} tuning_result = {} if len(pd.DataFrame(X).columns) > 20: search_space_LGB = Classifier( strategy="LightGBM").get_search_spaces( need_feature_selection=True) search_space_SVC = Classifier(strategy="SVC").get_search_spaces( need_feature_selection=True) search_spaces = [search_space_SVC, search_space_LGB] else: search_space_LGB = Classifier( strategy="LightGBM").get_search_spaces( need_feature_selection=False) search_space_SVC = Classifier(strategy="SVC").get_search_spaces( need_feature_selection=False) search_spaces = [search_space_SVC, search_space_LGB] # Initialize a pipeline fs = None for i in range(len(search_spaces)): if isinstance(search_spaces, tuple): for p in search_spaces[i][0].keys(): if (p.startswith("fs__")): fs = feature_selector() else: print( ">> Number of Features < 20, ignore feature selection" ) pass else: for p in search_spaces[i].keys(): if (p.startswith("fs__")): fs = feature_selector() else: pass # Do we need to cache transformers? cache = False if (fs is not None): if ("fs__strategy" in search_spaces): if (search_spaces["fs__strategy"] != "variance"): cache = True else: pass else: pass mprint(f'Start turning Hyperparameters .... ') print("") print(">>> Categorical Features have encoded with :" + str({'strategy': ce.strategy})) print("") if self.perform_scaling is True: print(">>> Numerical Features have encoded with :" + scal.__class__.__name__) print("") for baseestimator in self.baseEstimator: # Pipeline creation lgb = Classifier(strategy="LightGBM").get_estimator() # rf = Classifier(strategy="RandomForest").get_estimator() # svc = Classifier(strategy="SVC").get_estimator() if (fs is not None): if cache: pipe = Pipeline([('fs', fs), ('model', lgb)], memory=self.to_path) else: pipe = Pipeline([('fs', fs), ('model', lgb)]) else: if cache: pipe = Pipeline([('model', lgb)], memory=self.to_path) else: pipe = Pipeline([('model', lgb)]) if (self.parallel_strategy is True): opt = BayesSearchCV(pipe, search_spaces=search_spaces, scoring=self.scoring, cv=self.cv, npoints=npoints, n_jobs=-1, n_iter=n_iter, nrandom=nrandom, return_train_score=False, optimizer_kwargs={ 'base_estimator': baseestimator, "acq_func": "EI" }, random_state=self.random_state, verbose=self.verbose, refit=self.refit) else: opt = BayesSearchCV(pipe, search_spaces=search_spaces, scoring=self.scoring, cv=self.cv, npoints=npoints, n_jobs=1, n_iter=n_iter, nrandom=nrandom, return_train_score=False, optimizer_kwargs={ 'base_estimator': baseestimator, "acq_func": "EI" }, random_state=self.random_state, verbose=self.verbose, refit=self.refit) if not isinstance(baseestimator, GaussianProcessRegressor): if set_callbacks is True: mid_result = self.report_perf( opt, X, df_target, ' with Surrogate Model:' + baseestimator, callbacks=[ self.on_step, DeadlineStopper(60 * 60) # ,DeltaYStopper(0.000001) ]) else: mid_result = self.report_perf( opt, X, df_target, ' with Surrogate Model: ' + baseestimator, ) tuning_result[baseestimator] = mid_result else: if set_callbacks is True: mid_result = self.report_perf( opt, X, df_target, ' with Surrogate Model:' + baseestimator.__class__.__name__, callbacks=[ self.on_step, DeadlineStopper(60 * 60) # ,DeltaYStopper(0.000001) ]) else: mid_result = self.report_perf( opt, X, df_target, ' with Surrogate Model: ' + baseestimator.__class__.__name__, ) tuning_result[baseestimator.__class__.__name__] = mid_result bests = pd.DataFrame() for key in tuning_result.keys(): if tuning_result[key]['best_score'] == max( d['best_score'] for d in tuning_result.values()): bests = bests.append( { 'best_score': tuning_result[key]['best_score'], 'best_SM': key, 'time': tuning_result[key]['Time_cost'] }, ignore_index=True) bests = bests.sort_values( by=['time'], ascending=True).reset_index(drop=True) best_base_estimator = bests['best_SM'][0] best_param = tuning_result[best_base_estimator]['best_parmas'] print("") print('######## Congratulations! Here is the Best Parameters: #######') print('Best Score is:', tuning_result[best_base_estimator]['best_score']) try: print('with Surrogate Model ' + best_base_estimator) except: print('with Surrogate Model ' + best_base_estimator.__class__.__name__) pprint.pprint(best_param) self.best_param_ = best_param return best_param, tuning_result
#!/usr/bin/env python from __future__ import division from time import time from mpi4py import MPI import numpy as np from util import mprint, num_bytes sizes = [2**n for n in range(1, 24)] runs = 50 comm = MPI.COMM_WORLD mprint("Benchmarking Reduce performance on %d parallel MPI processes..." % comm.size) mprint("%15s | %12s | %12s" % ("Size (bytes)", "Time (msec)", "Bandwidth")) for s in sizes: data = np.ones(s) res = np.empty_like(data) comm.Barrier() t_min = np.inf for i in range(runs): t0 = time() comm.Reduce([data, MPI.DOUBLE], [res, MPI.DOUBLE]) t = time() - t0 t_min = min(t, t_min) comm.Barrier()
tf.global_variables_initializer().run() #TODO Load net seems not work origiter = saver.iter train_batch_nums = len(trainset) // batch_size test_batch_nums = len(testset) // batch_size iters = origiter if origiter > 0: util.loadNet(saver.latest, model, sess) if os.path.isfile('../wts/train/' + mission_path + 'popt.npz'): util.loadAdam('../wts/train/' + mission_path + 'popt.npz', opt, model.weights, sess) util.mprint("Restored to iteration %d" % origiter) test_image_paths = testset[:batch_size] contour, contour_imgs, ori_imgs, sparse_colors = sess.run( [d.precontour, d.imgs_color, imgs_pos, d.sparse_color], feed_dict=d.dict(test_image_paths, gen_learning_rate_val, dis_learning_rate_val, False)) np.save("../results/output.npy", contour_imgs) np.save("../results/sparse.npy", sparse_colors) np.save("../results/ori.npy", ori_imgs) np.save("../results/contour.npy", contour[:, :, :, 0]) for i in range(5): output_img = (255 * (1 + ori_imgs[i]) / 2).astype(int) skimage.io.imsave(os.path.join(result_path, 'ori_' + str(i) + '.jpg'),
def fit(self, X, y_bin, n_iter=10, need_callback=True): enc = encoder.Categorical_encoder() X = enc.fit_transform(X, y_bin) if len(X.dtypes[X.dtypes == 'float'].index) != 0: scal = Scaler() X = scal.fit_transform(X, y_bin) else: pass np.random.seed(55) s_lgb, f_lgb = 0, 0 s_svc, f_svc = 0, 0 p_lgb = np.random.beta(s_lgb + 1.0, f_lgb + 1.0) p_svc = np.random.beta(s_svc + 1.0, f_svc + 1.0) # Converting average precision score into a scorer suitable for model selection avg_prec = make_scorer(roc_auc_score, greater_is_better=True, needs_proba=False) rs = {} adj_iter = int(n_iter / 10) mprint("Running RandomSearchCV...") while (adj_iter > 0): start = time.time() start_cpu = time.process_time() if (p_lgb >= p_svc): key = "LGBMClassifier" else: key = "SVC" start = time.time() start_cpu = time.process_time() model = self.models[key] params = self.params[key] param_keys, param_vecs = zip(*params.items()) param_keys = list(param_keys) param_vecs = list(param_vecs) def objective(param_vec): params = dict(zip(param_keys, param_vec)) model.set_params(**params) score = cross_val_score(model, X, y_bin, cv=5, n_jobs=-1, scoring=avg_prec) self.score_std.append(np.std(score)) return -np.mean(score) def on_step(gp_round): scores = np.sort(gp_round.func_vals) score = scores[0] # print("best score: %s" % score) if score == -1: print('Interrupting!') return True if need_callback: print('Running with Callback function....') gp_round = dummy_minimize( func=objective, dimensions=list(param_vecs), n_calls=10, callback=[ on_step # , DeadlineStopper(60 * 10) ], # ,on_stepDeltaYStopper(0.000001) random_state=self.random_state, verbose=self.verbose) else: gp_round = dummy_minimize(func=objective, dimensions=list(param_vecs), n_calls=10, random_state=self.random_state, verbose=self.verbose) rm_result = {} results = [] score = [] p = [] for err, param_vec in zip(gp_round.func_vals, gp_round.x_iters): params = dict(zip(param_keys, param_vec)) mparams = dict({"model": model.__class__.__name__}, **params) score.append(-err) p.append(mparams) bes = np.argmax(score) best_index = np.argmin(gp_round.func_vals) rm_result['best_score'] = -gp_round.fun rm_result['best_params'] = p[bes] rm_result['params'] = params rm_result['all_cv_results'] = -gp_round.func_vals clock_time = time.time() - start cpu_time = time.process_time() - start_cpu rm_result['test_score_std'] = self.score_std rm_result['best_score_std'] = round(self.score_std[best_index], 4) rm_result['CPU_Time'] = round(cpu_time, 2) rm_result['Time_cost'] = round(clock_time, 2) cand = len(rm_result['all_cv_results']) cpu = round(cpu_time) clock = round(clock_time) best_cv_sd = rm_result['best_score_std'] best_score = round(-gp_round.fun, 4) adj_iter -= 1 rs[adj_iter] = rm_result if rs[adj_iter]['best_score'] > np.mean( [d['all_cv_results'] for d in rs.values()]): if gp_round.x.__class__.__name__ == "SVC": f_svc += 1 elif gp_round.x.__class__.__name__ == "LGBMClassifier": f_lgb += 1 else: if gp_round.x.__class__.__name__ == "SVC": s_svc += 1 elif gp_round.x.__class__.__name__ == "LGBMClassifier": s_lgb += 1 p_lgb = np.random.beta(s_lgb + 1.0, f_lgb + 1.0) p_svc = np.random.beta(s_svc + 1.0, f_svc + 1.0) self.cand += int(cand) self.clock += clock if self.final_best_score < best_score: self.final_best_score = best_score self.final_best_std = best_cv_sd else: self.final_best_score = self.final_best_score self.final_best_std = self.final_best_std print( f'Finished, took CPU Time: {cpu}s,clock time: {self.clock}s, candidates checked:{self.cand} ,best CV score: {self.final_best_score} \u00B1 {self.final_best_std}' ) print("") bests = pd.DataFrame() for key in rs.keys(): if rs[key]['best_score'] == max(d["best_score"] for d in rs.values()): bests = bests.append( { 'best_score': rs[key]['best_score'], 'best_param': rs[key]['best_params'], "ind": key, 'time': rs[key]['Time_cost'] }, ignore_index=True) bests = bests.sort_values( by=['time'], ascending=True).reset_index(drop=True) best_param = rs[key]['best_params'] score = rs[key]['best_score'] print("") print('######## Congratulations! Here is the Best Parameters: #######') print('Best Score is:', score) print('Best Model is:') pprint.pprint(best_param) # rs["Time_cost"] = self.clock return rs
(options, args) = parser.parse_args() # Parse dtype argument if options.dtype == "float32": dtype_str = "np.float32" dtype = np.float32 elif options.dtype == "float64": dtype_str = "np.float64" dtype = np.float64 else: print("[FATAL] Unknown type %s" % options.dtype) benches = options.benches.split(",") comm = MPI.COMM_WORLD mprint() mprint("Running %d parallel MPI processes: Results display collective performance" % comm.size) mprint() # Calculate sizes nbytes = options.nbytes * 1024 * 1024 size = nbytes // np.dtype(dtype).itemsize if 'O1' in benches: linear_benchcodes = ( ("x = 1 * a" , 1 , 2 ), ("x = a * a" , 1 , 3 ), ("x = a * b" , 1 , 3 ), ("x = a * b * c" , 2 , 6 ), ("x = a[::2] * b[::2]" , 0.5, 1.5), ("x = np.exp(a)" , 1 , 2 ),
#!/usr/bin/env python from __future__ import division from time import time from mpi4py import MPI import numpy as np from util import mprint, num_bytes sizes = [2**n for n in range(1, 24)] runs = 20 comm = MPI.COMM_WORLD mprint("Benchmarking braodcast performance on %d parallel MPI processes..." % comm.size) mprint("%15s | %12s | %12s" % ("Size (bytes)", "Time (msec)", "Bandwidth (MiBytes/s)")) for s in sizes: data = np.ones(s) comm.Barrier() t0 = time() for i in range(runs): comm.Bcast([data, MPI.DOUBLE], 0) comm.Barrier() t = (time() - t0) / runs mprint("%15d | %12.3f | %12.3f" % (data.nbytes, t * 1000, data.nbytes / t / 1024 / 1024))
rs = np.random.RandomState(0) loss_D_val = 0. loss_G_val = 0. train_batch_nums = len(trainset) // batch_size test_batch_nums = len(testset) // batch_size iters = origiter if origiter > 0: util.loadNet(saver.latest, model, sess) if os.path.isfile('../wts/train/' + mission_path + 'popt.npz'): util.loadAdam('../wts/train/' + mission_path + 'popt.npz', opt, model.weights, sess) for k in range((origiter + train_batch_nums - 1) // train_batch_nums): idx = rs.permutation(len(trainset)) util.mprint("Restored to iteration %d" % origiter) # show display categories show_train_nms = ['Loss_G'] show_test_nms = ['Loss_G_V'] if args.pre == 1: show_train_nms = ['Loss_Recon', 'Loss_G', 'Loss_D', 'Loss_adv_G'] show_test_nms = ['Loss_Recon_V', 'Loss_G_V', 'Loss_D_V', 'Loss_adv_G_V'] print("trainset length: %d" % len(trainset)) while iters <= MAXITER: #TODO dont loop on epoch if iters % train_batch_nums == 0:
#!/usr/bin/env python from __future__ import division import numpy as np from mpi4py import MPI from util import mprint, num_bytes #============================================================================= # Main comm = MPI.COMM_WORLD mprint("-" * 78) mprint(" Running %d parallel processes..." % comm.size) mprint("-" * 78) my_N = 10 + comm.rank my_a = comm.rank * np.ones(my_N) N = comm.allreduce(my_N) a = comm.gather(my_a) mprint("Gathered array: %s" % a)