def HpBandSter(T, NS, tp : TuningProblem, computer : Computer, options: Options = None, run_id="HpBandSter", niter=1): # Initialize min_budget = options['budget_min'] # Minimum budget used during the optimization. max_budget = options['budget_max'] # Maximum budget used during the optimization. budget_base = options['budget_base'] n_iterations = NS # Number of iterations performed by the optimizer n_workers = 1 # Number of workers to run in parallel. X = [] Y = [] # Xopt = [] # Yopt = [] data = Data(tp) server = hpbandster.core.nameserver.NameServer(run_id=run_id, host='127.0.0.1', port=None) server.start() # Tune stats = { "time_total": 0, "time_fun": 0 } timefun=0 t1 = time.time_ns() for i in range(len(T)): workers=[] for j in range(n_workers): w = HpBandSterWorker(t=T[i], NS=NS, tp=tp, computer=computer, niter=niter, run_id=run_id, nameserver='127.0.0.1', id=j) w.run(background=True) workers.append(w) # XZ: set eta=3, bmin=.1, bmax=1, so smax=2 bohb = hpbandster.optimizers.BOHB(configspace=workers[0].get_configspace(), run_id=run_id, nameserver='127.0.0.1', min_budget=min_budget, max_budget=max_budget, eta=budget_base) res = bohb.run(n_iterations=n_iterations, min_n_workers=n_workers) config_mapping = res.get_id2config_mapping() xs = [[config_mapping[idx]['config'][p] for p in tp.parameter_space.dimension_names] for idx in config_mapping.keys()] ys = [[(k, v['loss']) for k,v in res[idx].results.items()] for idx in config_mapping.keys()] X.append(xs) tmp = np.array(ys).reshape((len(ys), 1)) Y.append(tmp) timefun=timefun+workers[0].timefun bohb.shutdown(shutdown_workers=True) t2 = time.time_ns() stats['time_total'] = (t2-t1)/1e9 stats['time_fun'] = timefun # Finalize server.shutdown() data.I=T data.P=X data.O=Y # Finalize return (data, stats)
def load_history_func_eval(self, data: Data, problem: Problem, Igiven: np.ndarray): """ Init history database JSON file """ if (self.tuning_problem_name is not None): json_data_path = self.history_db_path + "/" + self.tuning_problem_name + ".json" if os.path.exists(json_data_path): print("[HistoryDB] Found a history database file") if self.file_synchronization_method == 'filelock': with FileLock(json_data_path + ".lock"): with open(json_data_path, "r") as f_in: history_data = json.load(f_in) elif self.file_synchronization_method == 'rsync': temp_path = json_data_path + "." + self.process_uid + ".temp" os.system("rsync -a " + json_data_path + " " + temp_path) with open(temp_path, "r") as f_in: history_data = json.load(f_in) os.system("rm " + temp_path) else: with open(json_data_path, "r") as f_in: history_data = json.load(f_in) num_tasks = len(Igiven) num_loaded_data = 0 PS_history = [[] for i in range(num_tasks)] OS_history = [[] for i in range(num_tasks)] for func_eval in history_data["func_eval"]: if (self.check_load_deps(func_eval)): task_id = self.search_func_eval_task_id( func_eval, problem, Igiven) if (task_id != -1): # # current policy: skip loading the func eval result # # if the same parameter data has been loaded once (duplicated) # # YL: only need to search in PS_history[task_id], not PS_history # if self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]): # current policy: allow duplicated samples # YL: This makes RCI-based multi-armed bandit much easier to implement, maybe we can add an option for changing this behavior if False: # self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]): continue else: parameter_arr = [] for k in range(len(problem.PS)): if type(problem.PS[k] ).__name__ == "Categoricalnorm": parameter_arr.append( str(func_eval["tuning_parameter"][ problem.PS[k].name])) elif type(problem.PS[k] ).__name__ == "Integer": parameter_arr.append( int(func_eval["tuning_parameter"][ problem.PS[k].name])) elif type( problem.PS[k]).__name__ == "Real": parameter_arr.append( float(func_eval["tuning_parameter"] [problem.PS[k].name])) else: parameter_arr.append( func_eval["tuning_parameter"][ problem.PS[k].name]) PS_history[task_id].append(parameter_arr) OS_history[task_id].append(\ [func_eval["evaluation_result"][problem.OS[k].name] \ for k in range(len(problem.OS))]) num_loaded_data += 1 if (num_loaded_data > 0): data.I = Igiven #IS_history data.P = PS_history data.O = [] # YL: OS is a list of 2D numpy arrays for i in range(len(OS_history)): if (len(OS_history[i]) == 0): data.O.append(np.empty(shape=(0, problem.DO))) else: data.O.append(np.array(OS_history[i])) if (any(ele == [None] for ele in OS_history[i])): print( "history data contains null function values" ) exit() # print ("data.I: " + str(data.I)) # print ("data.P: " + str(data.P)) # print ("data.O: " + str(OS_history)) else: print("no history data has been loaded") else: print("[HistoryDB] Create a JSON file at " + json_data_path) if self.file_synchronization_method == 'filelock': with FileLock(json_data_path + ".lock"): with open(json_data_path, "w") as f_out: json_data = { "tuning_problem_name": self.tuning_problem_name, "model_data": [], "func_eval": [] } json.dump(json_data, f_out, indent=2) elif self.file_synchronization_method == 'rsync': temp_path = json_data_path + "." + self.process_uid + ".temp" with open(temp_path, "w") as f_out: json_data = { "tuning_problem_name": self.tuning_problem_name, "model_data": [], "func_eval": [] } json.dump(json_data, f_out, indent=2) os.system("rsync -u " + temp_path + " " + json_data_path) os.system("rm " + temp_path) else: with open(json_data_path, "w") as f_out: json_data = { "tuning_problem_name": self.tuning_problem_name, "model_data": [], "func_eval": [] } json.dump(json_data, f_out, indent=2)
def MB_LCM(self, NS = None, Igiven = None, **kwargs): """ Igiven : a list of tasks NS : number of samples in the highest budget arm """ np.set_printoptions(suppress=False,precision=4) print('\n\n\n------Starting MB_LCM (multi-arm bandit with LCM) with %d samples for task'%(NS),Igiven) stats = { "time_total": 0, "time_sample_init": 0, "time_fun": 0, "time_search": 0, "time_model": 0 } time_fun=0 time_sample_init=0 time_search=0 time_model=0 self.NSs=[int(self.options['budget_max']/x*NS) for x in self.budgets] info = [[x,y] for x,y in zip(self.budgets,self.NSs)] print('total samples:',info) data = Data(self.tp) # having the budgets not fully sampled before SH data1 = Data(self.tp) # having the budgets fully sampled before SH data1.I=[] data1.P=[] data1.O=[] data1.D=[] for s in range(len(self.budgets)): # loop over the budget levels budget = self.budgets[s] ns = self.NSs[s] newtasks=[] for s1 in range(s,len(self.budgets)): for t in range(len(Igiven)): budget1 = self.budgets[s1] tmp = [budget1]+Igiven[t] newtasks.append(tmp) gt = GPTune(self.tp, computer=self.computer, data=data, options=self.options) (data, modeler, stats0) = gt.MLA(NS=ns, Igiven=newtasks, NI=len(newtasks), NS1=int(ns/2)) data1.I += data.I[0:len(Igiven)] data1.P += data.P[0:len(Igiven)] data1.O += data.O[0:len(Igiven)] data1.D += data.D[0:len(Igiven)] del data.I[0:len(Igiven)] del data.P[0:len(Igiven)] del data.O[0:len(Igiven)] del data.D[0:len(Igiven)] stats['time_total'] += stats0['time_total'] stats['time_fun'] += stats0['time_fun'] stats['time_model'] += stats0['time_model'] stats['time_search'] += stats0['time_search'] stats['time_sample_init'] += stats0['time_sample_init'] # print(data1.I) # print(data1.P) # print(data1.O) self.data.I = Igiven self.data.P = data1.P[0:len(Igiven)] # this will be updated by SH self.data.O = data1.O[0:len(Igiven)] # this will be updated by SH #todo SH on each arm and return all samples of the highest fidelity in self.data return (copy.deepcopy(self.data), stats)