def run_bohb(runtime, b, cs): min_budget = 4 max_budget = 108 hb_run_id = '0' NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 workers = [] for i in range(num_workers): w = MyWorker(b=b, run_id=hb_run_id, id=i, nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) workers.append(w) bohb = BOHB(configspace=cs, run_id=hb_run_id, min_budget=min_budget, max_budget=max_budget, nameserver=ns_host, nameserver_port=ns_port, ping_interval=10, min_bandwidth=0.3) n_iters = 300 results = bohb.run(n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown()
def optimize_hyperparameters(model_class, parameters, train_and_validate_fn, num_iterations, min_budget=0.01, working_dir="./bohby_workspace/"): # Make sure the working directory exists os.makedirs(working_dir, exist_ok=True) # Generate a configspace from the given parameters config_space = generate_configspace(parameters) # Start a local nameserver for communication NS = hpns.NameServer(run_id=_runid, nic_name="lo", working_directory=working_dir) ns_host, ns_port = NS.start() # Define the worker worker = WrapWorker(model_class, train_and_validate_fn, working_directory=working_dir, nameserver=ns_host, nameserver_port=ns_port, run_id=_runid) worker.run(background=True) # Enable live logging so a run can be canceled at any time and we can still recover the results result_logger = json_result_logger(directory=working_dir, overwrite=True) # Optimization bohb = BOHB(configspace=config_space, working_directory=working_dir, run_id=_runid, eta=2, min_budget=min_budget, max_budget=1, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600, result_logger=result_logger) res = bohb.run(n_iterations=num_iterations) # Clean up bohb.shutdown(shutdown_workers=True) NS.shutdown() # Best found config run_results = hpres.logged_results_to_HB_result(working_dir) id2conf = run_results.get_id2config_mapping() incumbent_id = run_results.get_incumbent_id() incumbent_config = id2conf[incumbent_id]['config'] incumbent_runs = run_results.get_runs_by_id(incumbent_id) val_loss = incumbent_runs[-1].loss return val_loss, incumbent_config
def main(xargs, api): torch.set_num_threads(4) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) logger.log('{:} use api : {:}'.format(time_string(), api)) api.reset_time() search_space = get_search_spaces(xargs.search_space, 'nats-bench') if xargs.search_space == 'tss': cs = get_topology_config_space(search_space) config2structure = config2topology_func() else: cs = get_size_config_space(search_space) config2structure = config2size_func(search_space) hb_run_id = '0' NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 workers = [] for i in range(num_workers): w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataset=xargs.dataset, api=api, run_id=hb_run_id, id=i) w.run(background=True) workers.append(w) start_time = time.time() bohb = BOHB(configspace=cs, run_id=hb_run_id, eta=3, min_budget=1, max_budget=12, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth) results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() # print('There are {:} runs.'.format(len(results.get_all_runs()))) # workers[0].total_times # workers[0].trajectory current_best_index = [] for idx in range(len(workers[0].trajectory)): trajectory = workers[0].trajectory[:idx+1] arch = max(trajectory, key=lambda x: x[0])[1] current_best_index.append(api.query_index_by_arch(arch)) best_arch = max(workers[0].trajectory, key=lambda x: x[0])[1] logger.log('Best found configuration: {:} within {:.3f} s'.format(best_arch, workers[0].total_times[-1])) info = api.query_info_str_by_arch(best_arch, '200' if xargs.search_space == 'tss' else '90') logger.log('{:}'.format(info)) logger.log('-'*100) logger.close() return logger.log_dir, current_best_index, workers[0].total_times
def main(xargs, nas_bench): assert torch.cuda.is_available(), "CUDA is not available." torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) if xargs.dataset == "cifar10": dataname = "cifar10-valid" else: dataname = xargs.dataset if xargs.data_path is not None: train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) split_Fpath = "configs/nas-benchmark/cifar-split.txt" cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log("Load split file from {:}".format(split_Fpath)) config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, { "class_num": class_num, "xshape": xshape }, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=xargs.workers, pin_memory=True, ) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True, ) logger.log( "||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}" .format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": train_loader, "valid_loader": valid_loader, } else: config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, None, logger) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": None, "valid_loader": None } # nas dataset load assert xargs.arch_nas_dataset is not None and os.path.isfile( xargs.arch_nas_dataset) search_space = get_search_spaces("cell", xargs.search_space_name) cs = get_configuration_space(xargs.max_nodes, search_space) config2structure = config2structure_func(xargs.max_nodes) hb_run_id = "0" NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0) ns_host, ns_port = NS.start() num_workers = 1 # nas_bench = AANASBenchAPI(xargs.arch_nas_dataset) # logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string())) workers = [] for i in range(num_workers): w = MyWorker( nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataname=dataname, nas_bench=nas_bench, time_budget=xargs.time_budget, run_id=hb_run_id, id=i, ) w.run(background=True) workers.append(w) start_time = time.time() bohb = BOHB( configspace=cs, run_id=hb_run_id, eta=3, min_budget=12, max_budget=200, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth, ) results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() real_cost_time = time.time() - start_time id2config = results.get_id2config_mapping() incumbent = results.get_incumbent_id() logger.log("Best found configuration: {:} within {:.3f} s".format( id2config[incumbent]["config"], real_cost_time)) best_arch = config2structure(id2config[incumbent]["config"]) info = nas_bench.query_by_arch(best_arch, "200") if info is None: logger.log("Did not find this architecture : {:}.".format(best_arch)) else: logger.log("{:}".format(info)) logger.log("-" * 100) logger.log("workers : {:.1f}s with {:} archs".format( workers[0].time_budget, len(workers[0].seen_archs))) logger.close() return logger.log_dir, nas_bench.query_index_by_arch( best_arch), real_cost_time
seed=args.seed, nameserver=ns_host, nameserver_port=ns_port, run_id=args.run_id) worker.run(background=True) #instantiate BOHB and run it result_logger = hputil.json_result_logger(directory=args.working_directory, overwrite=True) HPB = BOHB(configspace=worker.get_config_space(), working_directory=args.working_directory, run_id=args.run_id, eta=eta, min_budget=min_budget, max_budget=max_budget, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600, result_logger=result_logger) res = HPB.run(n_iterations=args.num_iterations, min_n_workers=args.total_num_workers) with open(os.path.join(args.working_directory, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) HPB.shutdown(shutdown_workers=True) NS.shutdown()
workers = [] for i in range(num_workers): w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=hb_run_id, id=i) w.run(background=True) workers.append(w) bohb = BOHB( configspace=cs, run_id=hb_run_id, eta=3, min_budget=min_budget, max_budget=max_budget, nameserver=ns_host, nameserver_port=ns_port, # optimization_strategy=args.strategy, num_samples=args.num_samples, random_fraction=args.random_fraction, bandwidth_factor=args.bandwidth_factor, ping_interval=10, min_bandwidth=args.min_bandwidth) results = bohb.run(args.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() time.sleep(5) if args.benchmark == "nas_cifar10a" or args.benchmark == "nas_cifar10b" or args.benchmark == "nas_cifar10c": res = b.get_results(ignore_invalid_configs=True)
nameserver_port=ns_port, run_id=hb_run_id, id=0) evaluator = UltraoptEvaluator(data, 'balanced_accuracy') worker.evaluator = evaluator worker.run(background=True) HDL = get_no_ordinal_HDL() CS = hdl2cs(HDL) CS.seed(trial * 10 + 5) bohb = BOHB( configspace=CS, run_id=hb_run_id, # just test KDE eta=2, min_budget=1, max_budget=1, nameserver=ns_host, nameserver_port=ns_port, num_samples=64, random_fraction=33, bandwidth_factor=3, ping_interval=10, min_bandwidth=.3) results = bohb.run(max_iter, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) res[f"trial-{trial}"] = evaluator.losses NS.shutdown() time.sleep(1) res = raw2min(res) m = res.mean(1)
def main(xargs, nas_bench): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) assert xargs.dataset == 'cifar10', 'currently only support CIFAR-10' train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) split_Fpath = 'configs/nas-benchmark/cifar-split.txt' cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log('Load split file from {:}'.format(split_Fpath)) config_path = 'configs/nas-benchmark/algos/R-EA.config' config = load_config(config_path, { 'class_num': class_num, 'xshape': xshape }, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=xargs.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True) logger.log( '||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}' .format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log('||||||| {:10s} ||||||| Config={:}'.format( xargs.dataset, config)) extra_info = { 'config': config, 'train_loader': train_loader, 'valid_loader': valid_loader } # nas dataset load assert xargs.arch_nas_dataset is not None and os.path.isfile( xargs.arch_nas_dataset) search_space = get_search_spaces('cell', xargs.search_space_name) cs = get_configuration_space(xargs.max_nodes, search_space) config2structure = config2structure_func(xargs.max_nodes) hb_run_id = '0' NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 #nas_bench = AANASBenchAPI(xargs.arch_nas_dataset) #logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string())) workers = [] for i in range(num_workers): w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, nas_bench=nas_bench, run_id=hb_run_id, id=i) w.run(background=True) workers.append(w) bohb = BOHB(configspace=cs, run_id=hb_run_id, eta=3, min_budget=3, max_budget=108, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth) # optimization_strategy=xargs.strategy, num_samples=xargs.num_samples, results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = results.get_id2config_mapping() incumbent = results.get_incumbent_id() logger.log('Best found configuration: {:}'.format( id2config[incumbent]['config'])) best_arch = config2structure(id2config[incumbent]['config']) info = nas_bench.query_by_arch(best_arch) if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch)) else: logger.log('{:}'.format(info)) logger.log('-' * 100) logger.log('workers : {:}'.format(workers[0].test_time)) logger.close() return logger.log_dir, nas_bench.query_index_by_arch(best_arch)
def main(): args = parse_args() # Set log level logging.basicConfig(level={ 'critical': logging.CRITICAL, 'warning': logging.WARNING, 'info': logging.INFO, 'debug': logging.DEBUG }[args.loglevel]) # Name for the current experiment (optimization, not single training) exp_name = args.exp_name or get_default_exp_name(args) + (('_' + args.exp_suffix) if args.exp_suffix else '') logdir = os.path.join(args.logdir, exp_name) shared_dir = os.path.join(logdir, 'master') os.makedirs(shared_dir, exist_ok=True) # Also creates logdir if it does not exist host = hpns.nic_name_to_host(args.nic_name) # If this is meant to be a worker process, launch it if args.worker: w = AbasWorker(run_id=exp_name, source=args.source, target=args.target, net=args.net, load_workers=args.load_workers, max_iter=args.max_iter, logdir=args.logdir, ds_root=args.data_root, no_tqdm=args.no_tqdm, gpu=args.gpu, run_n_avg=args.run_n_avg, da_method=args.da, model_criterion=args.criterion, run_model_criterion=args.run_criterion or args.criterion, kill_diverging=args.kill_diverging, host=host, timeout=args.timeout) w.load_nameserver_credentials(working_directory=shared_dir) w.run(background=False) # Nothing to do, exit print("Done") exit(0) # If we are here we expect to be a master if not args.master: print("Nothing to do (not a master nor a worker process)") exit(1) # Running as master! # Log info Logger(logdir=logdir, run_name='master', use_tqdm=False, use_tb=False) # Init the nameserver (random port) ns = hpns.NameServer(run_id=exp_name, host=host, port=0, working_directory=shared_dir) ns_host, ns_port = ns.start() print("Nameserver on {}:{}".format(ns_host, ns_port)) # These hyperparameters are passed through the command line and are not optimized hp = { 'base.lr': args.lr, 'base.bs': args.bs, 'base.wd': args.wd, } # Load previous runs previous_res = None if args.previous != '': if os.path.isdir(args.previous): previous_res = hpres.logged_results_to_HBS_result(args.previous) else: with open(args.previous, 'rb') as fp: previous_res = pickle.load(fp) # Safe file removal remove_file(os.path.join(shared_dir, 'config.json')) remove_file(os.path.join(shared_dir, 'results.json')) # Launch BOHB opt_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False) bohb = BOHB( configspace=AbasWorker.get_configspace(hp), previous_result=previous_res, run_id=exp_name, min_budget=args.min_budget, max_budget=args.max_budget, eta=args.eta, host=host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=15, result_logger=opt_logger ) res = bohb.run(n_iterations=args.num_iterations, min_n_workers=args.num_workers) # Done bohb.shutdown(shutdown_workers=True) ns.shutdown() # Save results id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() all_runs = res.get_all_runs() with open(os.path.join(logdir, 'result_{}.pkl'.format(exp_name)), 'wb') as fp: pickle.dump(res, fp) print(f"Best found configuration: {id2config[incumbent]['config']}") print(f"Total number of sampled unique configurations: {len(id2config.keys())}") print(f"Total runs {len(res.get_all_runs())}") print("ABAS run took {:.1f} seconds".format( all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
NS = hpns.NameServer(run_id='example1', host='127.0.0.1', port=0, working_directory=working_dir) ns_host, ns_port = NS.start() worker = KerasWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, timeout=120) worker.run(background=True) bohb = BOHB(configspace=worker.get_configspace(), working_directory=working_dir, run_id=run_id, min_budget=min_num_epochs, max_budget=max_num_epochs, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger) res = bohb.run(n_iterations=n_iterations) # store results with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown bohb.shutdown(shutdown_workers=True) NS.shutdown()
num_workers = 1 min_budget = 30 max_budget = 90 # initialise BOHB NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() workers = [] for i in range(num_workers): w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=hb_run_id, id=i) w.run(background=True) workers.append(w) bohb = BOHB(configspace=search_space, run_id=hb_run_id, eta=3, min_budget=min_budget, max_budget=max_budget, nameserver=ns_host, nameserver_port=ns_port, ping_interval=10, min_bandwidth=min_bandwidth) # run BOHB results = bohb.run(int(args.n_iters+args.n_init), min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() # process the returned results to give the same format bo_results = [] curr_best = np.inf for item in results.data.items(): key, datum = item budget = datum.budget hyperparams = datum.config object_values = datum.results[budget]['loss']
workers.append(w) # Step 3: # In the last of the 3 Steps, we create a optimizer object. # It samples configurations from the ConfigurationSpace. # The number of sampled configurations is determined by the # parameters eta, min_budget and max_budget. # After evaluating each configuration, starting with the minimum budget # on the same subset size, only a fraction of 1 / eta of them # 'advances' to the next round. At the same time the current budget will be doubled. # This process runs until the maximum budget is reached. HB = BOHB( configspace=config_space, run_id=run_id, eta=3, min_budget=9, max_budget=243, # HB parameters nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, ping_interval=10**6) # Then start the optimizer. The n_iterations parameter specifies # the number of iterations to be performed in this run # It will wait till minimum n workers are ready HB.run(n_iterations=4, min_n_workers=num_workers) # After the optimizer run, we shutdown the master. HB.shutdown(shutdown_workers=True) NS.shutdown() # Just to demonstrate, let's read in the logged runs rather than the returned result from HB.run