Пример #1
0
def HpBandSter(T, NS, tp : TuningProblem, computer : Computer, options: Options = None, run_id="HpBandSter", niter=1):

    # Initialize
    min_budget   = options['budget_min'] # Minimum budget used during the optimization.
    max_budget   = options['budget_max'] # Maximum budget used during the optimization.
    budget_base  = options['budget_base']
    n_iterations = NS # Number of iterations performed by the optimizer
    n_workers    = 1  # Number of workers to run in parallel.
    
    X = []
    Y = []
    # Xopt = []
    # Yopt = []
    data = Data(tp)

    server = hpbandster.core.nameserver.NameServer(run_id=run_id, host='127.0.0.1', port=None)
    server.start()

    # Tune
    stats = {
        "time_total": 0,
        "time_fun": 0
    }

    timefun=0
    t1 = time.time_ns()
    for i in range(len(T)):

        workers=[]
        for j in range(n_workers):
            w = HpBandSterWorker(t=T[i], NS=NS, tp=tp, computer=computer, niter=niter, run_id=run_id, nameserver='127.0.0.1', id=j)
            w.run(background=True)
            workers.append(w)
            
        # XZ: set eta=3, bmin=.1, bmax=1, so smax=2
        bohb = hpbandster.optimizers.BOHB(configspace=workers[0].get_configspace(), run_id=run_id, nameserver='127.0.0.1', min_budget=min_budget, max_budget=max_budget, eta=budget_base)
        res = bohb.run(n_iterations=n_iterations, min_n_workers=n_workers)

        config_mapping = res.get_id2config_mapping()

        xs = [[config_mapping[idx]['config'][p] for p in tp.parameter_space.dimension_names] for idx in config_mapping.keys()]
        ys = [[(k, v['loss']) for k,v in res[idx].results.items()] for idx in config_mapping.keys()]
        
        X.append(xs)
        tmp = np.array(ys).reshape((len(ys), 1))
        Y.append(tmp)
        timefun=timefun+workers[0].timefun
        bohb.shutdown(shutdown_workers=True)

    t2 = time.time_ns()
    stats['time_total'] = (t2-t1)/1e9	
    stats['time_fun'] = timefun
    # Finalize

    server.shutdown()

    data.I=T
    data.P=X
    data.O=Y
    # Finalize

    return (data, stats)
Пример #2
0
    def load_history_func_eval(self, data: Data, problem: Problem,
                               Igiven: np.ndarray):
        """ Init history database JSON file """
        if (self.tuning_problem_name is not None):
            json_data_path = self.history_db_path + "/" + self.tuning_problem_name + ".json"
            if os.path.exists(json_data_path):
                print("[HistoryDB] Found a history database file")
                if self.file_synchronization_method == 'filelock':
                    with FileLock(json_data_path + ".lock"):
                        with open(json_data_path, "r") as f_in:
                            history_data = json.load(f_in)
                elif self.file_synchronization_method == 'rsync':
                    temp_path = json_data_path + "." + self.process_uid + ".temp"
                    os.system("rsync -a " + json_data_path + " " + temp_path)
                    with open(temp_path, "r") as f_in:
                        history_data = json.load(f_in)
                    os.system("rm " + temp_path)
                else:
                    with open(json_data_path, "r") as f_in:
                        history_data = json.load(f_in)

                num_tasks = len(Igiven)

                num_loaded_data = 0

                PS_history = [[] for i in range(num_tasks)]
                OS_history = [[] for i in range(num_tasks)]

                for func_eval in history_data["func_eval"]:
                    if (self.check_load_deps(func_eval)):
                        task_id = self.search_func_eval_task_id(
                            func_eval, problem, Igiven)
                        if (task_id != -1):
                            # # current policy: skip loading the func eval result
                            # # if the same parameter data has been loaded once (duplicated)
                            # # YL: only need to search in PS_history[task_id], not PS_history
                            # if self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]):

                            # current policy: allow duplicated samples
                            # YL: This makes RCI-based multi-armed bandit much easier to implement, maybe we can add an option for changing this behavior
                            if False:  # self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]):
                                continue
                            else:
                                parameter_arr = []
                                for k in range(len(problem.PS)):
                                    if type(problem.PS[k]
                                            ).__name__ == "Categoricalnorm":
                                        parameter_arr.append(
                                            str(func_eval["tuning_parameter"][
                                                problem.PS[k].name]))
                                    elif type(problem.PS[k]
                                              ).__name__ == "Integer":
                                        parameter_arr.append(
                                            int(func_eval["tuning_parameter"][
                                                problem.PS[k].name]))
                                    elif type(
                                            problem.PS[k]).__name__ == "Real":
                                        parameter_arr.append(
                                            float(func_eval["tuning_parameter"]
                                                  [problem.PS[k].name]))
                                    else:
                                        parameter_arr.append(
                                            func_eval["tuning_parameter"][
                                                problem.PS[k].name])
                                PS_history[task_id].append(parameter_arr)
                                OS_history[task_id].append(\
                                    [func_eval["evaluation_result"][problem.OS[k].name] \
                                    for k in range(len(problem.OS))])
                                num_loaded_data += 1

                if (num_loaded_data > 0):
                    data.I = Igiven  #IS_history
                    data.P = PS_history
                    data.O = []  # YL: OS is a list of 2D numpy arrays
                    for i in range(len(OS_history)):
                        if (len(OS_history[i]) == 0):
                            data.O.append(np.empty(shape=(0, problem.DO)))
                        else:
                            data.O.append(np.array(OS_history[i]))
                            if (any(ele == [None] for ele in OS_history[i])):
                                print(
                                    "history data contains null function values"
                                )
                                exit()
                    # print ("data.I: " + str(data.I))
                    # print ("data.P: " + str(data.P))
                    # print ("data.O: " + str(OS_history))
                else:
                    print("no history data has been loaded")
            else:
                print("[HistoryDB] Create a JSON file at " + json_data_path)

                if self.file_synchronization_method == 'filelock':
                    with FileLock(json_data_path + ".lock"):
                        with open(json_data_path, "w") as f_out:
                            json_data = {
                                "tuning_problem_name":
                                self.tuning_problem_name,
                                "model_data": [],
                                "func_eval": []
                            }
                            json.dump(json_data, f_out, indent=2)
                elif self.file_synchronization_method == 'rsync':
                    temp_path = json_data_path + "." + self.process_uid + ".temp"
                    with open(temp_path, "w") as f_out:
                        json_data = {
                            "tuning_problem_name": self.tuning_problem_name,
                            "model_data": [],
                            "func_eval": []
                        }
                        json.dump(json_data, f_out, indent=2)
                    os.system("rsync -u " + temp_path + " " + json_data_path)
                    os.system("rm " + temp_path)
                else:
                    with open(json_data_path, "w") as f_out:
                        json_data = {
                            "tuning_problem_name": self.tuning_problem_name,
                            "model_data": [],
                            "func_eval": []
                        }
                        json.dump(json_data, f_out, indent=2)
Пример #3
0
    def MB_LCM(self, NS = None, Igiven = None, **kwargs):
        """
        Igiven       : a list of tasks
        NS           : number of samples in the highest budget arm
        """

        np.set_printoptions(suppress=False,precision=4)
        print('\n\n\n------Starting MB_LCM (multi-arm bandit with LCM) with %d samples for task'%(NS),Igiven)

        stats = {
            "time_total": 0,
            "time_sample_init": 0,
            "time_fun": 0,
            "time_search": 0,
            "time_model": 0
        }
        time_fun=0
        time_sample_init=0
        time_search=0
        time_model=0

        self.NSs=[int(self.options['budget_max']/x*NS) for x in self.budgets]
        info = [[x,y] for x,y in zip(self.budgets,self.NSs)]
        print('total samples:',info)

        data = Data(self.tp)   # having the budgets not fully sampled before SH
        data1 = Data(self.tp)  # having the budgets fully sampled before SH
        data1.I=[]
        data1.P=[]
        data1.O=[]
        data1.D=[]

        for s in range(len(self.budgets)): # loop over the budget levels
            budget = self.budgets[s]
            ns = self.NSs[s]
            newtasks=[]
            for s1 in range(s,len(self.budgets)):
                for t in range(len(Igiven)):
                    budget1 = self.budgets[s1]
                    tmp = [budget1]+Igiven[t]
                    newtasks.append(tmp)

            gt = GPTune(self.tp, computer=self.computer, data=data, options=self.options)
            (data, modeler, stats0) = gt.MLA(NS=ns, Igiven=newtasks, NI=len(newtasks), NS1=int(ns/2))
            data1.I += data.I[0:len(Igiven)]
            data1.P += data.P[0:len(Igiven)]
            data1.O += data.O[0:len(Igiven)]
            data1.D += data.D[0:len(Igiven)]
            del data.I[0:len(Igiven)]
            del data.P[0:len(Igiven)]
            del data.O[0:len(Igiven)]
            del data.D[0:len(Igiven)]


            stats['time_total'] += stats0['time_total']
            stats['time_fun'] += stats0['time_fun']
            stats['time_model'] += stats0['time_model']
            stats['time_search'] += stats0['time_search']
            stats['time_sample_init'] += stats0['time_sample_init']

        # print(data1.I)
        # print(data1.P)
        # print(data1.O)
        self.data.I = Igiven
        self.data.P = data1.P[0:len(Igiven)]  # this will be updated by SH
        self.data.O = data1.O[0:len(Igiven)]  # this will be updated by SH
        #todo SH on each arm and return all samples of the highest fidelity in self.data

        return (copy.deepcopy(self.data), stats)