def run_optimization_algorithm(self, pipeline_config, run_id, ns_host,
                                   ns_port, nameserver, task_id):
        config_space = self.pipeline.get_hyperparameter_search_space(
            **pipeline_config)

        self.logger.info("[AutoNet] Start " + pipeline_config["algorithm"])

        # initialize optimization algorithm
        loggers = [
            json_result_logger(directory=pipeline_config["result_logger_dir"],
                               overwrite=True)
        ]
        if pipeline_config['use_tensorboard_logger']:
            loggers.append(tensorboard_logger())

        HB = self.get_optimization_algorithm_instance(
            config_space=config_space,
            run_id=run_id,
            pipeline_config=pipeline_config,
            ns_host=ns_host,
            ns_port=ns_port,
            loggers=loggers)

        # start algorithm
        min_num_workers = pipeline_config["min_workers"] if task_id != -1 else 1

        reduce_runtime = pipeline_config["max_budget"] if pipeline_config[
            "budget_type"] == "time" else 0
        HB.run_until(runtime=(pipeline_config["max_runtime"] - reduce_runtime),
                     n_iterations=pipeline_config["num_iterations"],
                     min_n_workers=min_num_workers)

        HB.shutdown(shutdown_workers=True)
        nameserver.shutdown()
Пример #2
0
def runBOHB(cfg):
    run_id = "0"

    # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown
    port = int(30000 + random.random() * 10000)

    ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port)
    ns.start()

    w = BOHBWorker(cfg=cfg,
                   nameserver="127.0.0.1",
                   run_id=run_id,
                   nameserver_port=port)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=cfg["bohb_log_dir"],
                                             overwrite=True)

    bohb = BOHB(
        configspace=get_configspace(cfg['model']),
        run_id=run_id,
        min_budget=cfg["bohb_min_budget"],
        max_budget=cfg["bohb_max_budget"],
        eta=cfg["bohb_eta"],
        nameserver="127.0.0.1",
        nameserver_port=port,
        result_logger=result_logger,
    )

    res = bohb.run(n_iterations=cfg["bohb_iterations"])
    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
Пример #3
0
def run_master(args):
    NS = hpns.NameServer(run_id=args.run_id,
                         nic_name=args.nic_name,
                         working_directory=args.bohb_root_path)
    ns_host, ns_port = NS.start()

    # Start a background worker for the master node
    if args.optimize_generalist:
        w = AggregateWorker(run_id=args.run_id,
                            host=ns_host,
                            nameserver=ns_host,
                            nameserver_port=ns_port,
                            working_directory=args.bohb_root_path,
                            n_repeat=args.n_repeat,
                            has_repeats_as_budget=args.n_repeat is None,
                            time_budget=args.time_budget,
                            time_budget_approx=args.time_budget_approx,
                            performance_matrix=args.performance_matrix)
    else:
        w = SingleWorker(run_id=args.run_id,
                         host=ns_host,
                         nameserver=ns_host,
                         nameserver_port=ns_port,
                         working_directory=args.bohb_root_path,
                         n_repeat=args.n_repeat,
                         dataset=args.dataset,
                         time_budget=args.time_budget,
                         time_budget_approx=args.time_budget_approx)
    w.run(background=True)

    # Create an optimizer
    result_logger = hpres.json_result_logger(directory=args.bohb_root_path,
                                             overwrite=False)

    if args.previous_run_dir is not None:
        previous_result = hpres.logged_results_to_HBS_result(
            args.previous_run_dir)
    else:
        pervious_result = None

    logger = logging.getLogger(__file__)
    logging_level = getattr(logging, args.logger_level)
    logger.setLevel(logging_level)

    optimizer = BOHB(configspace=get_configspace(),
                     run_id=args.run_id,
                     host=ns_host,
                     nameserver=ns_host,
                     nameserver_port=ns_port,
                     min_budget=args.n_repeat_lower_budget,
                     max_budget=args.n_repeat_upper_budget,
                     result_logger=result_logger,
                     logger=logger,
                     previous_result=previous_result)

    res = optimizer.run(n_iterations=args.n_iterations)

    # Shutdown
    optimizer.shutdown(shutdown_workers=True)
    NS.shutdown()
Пример #4
0
def optimize_hyperparameters(model_class,
                             parameters,
                             train_and_validate_fn,
                             num_iterations,
                             min_budget=0.01,
                             working_dir="./bohby_workspace/"):
    # Make sure the working directory exists
    os.makedirs(working_dir, exist_ok=True)

    # Generate a configspace from the given parameters
    config_space = generate_configspace(parameters)

    # Start a local nameserver for communication
    NS = hpns.NameServer(run_id=_runid,
                         nic_name="lo",
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    # Define the worker
    worker = WrapWorker(model_class,
                        train_and_validate_fn,
                        working_directory=working_dir,
                        nameserver=ns_host,
                        nameserver_port=ns_port,
                        run_id=_runid)
    worker.run(background=True)

    # Enable live logging so a run can be canceled at any time and we can still recover the results
    result_logger = json_result_logger(directory=working_dir, overwrite=True)

    # Optimization
    bohb = BOHB(configspace=config_space,
                working_directory=working_dir,
                run_id=_runid,
                eta=2,
                min_budget=min_budget,
                max_budget=1,
                host=ns_host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                ping_interval=3600,
                result_logger=result_logger)

    res = bohb.run(n_iterations=num_iterations)

    # Clean up
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Best found config
    run_results = hpres.logged_results_to_HB_result(working_dir)
    id2conf = run_results.get_id2config_mapping()

    incumbent_id = run_results.get_incumbent_id()
    incumbent_config = id2conf[incumbent_id]['config']
    incumbent_runs = run_results.get_runs_by_id(incumbent_id)

    val_loss = incumbent_runs[-1].loss

    return val_loss, incumbent_config
Пример #5
0
    def fit(self,
            pipeline_config,
            X_train,
            Y_train,
            X_valid,
            Y_valid,
            refit=False):

        autonet_logger = logging.getLogger('autonet')
        hpbandster_logger = logging.getLogger('hpbandster')

        level = self.logger_settings[pipeline_config['log_level']]
        autonet_logger.setLevel(level)
        hpbandster_logger.setLevel(level)

        autonet_logger.info("Start autonet with config:\n" +
                            str(pipeline_config))
        result_logger = []
        if not refit:
            result_logger = json_result_logger(
                directory=pipeline_config["result_logger_dir"], overwrite=True)
        return {
            'X_train': X_train,
            'Y_train': Y_train,
            'X_valid': X_valid,
            'Y_valid': Y_valid,
            'result_loggers': [result_logger],
            'shutdownables': []
        }
Пример #6
0
def run_bohb(exp_name, log_dir='EXP', iterations=20):
    
    run_dir = 'bohb-{}-{}'.format(log_dir, exp_name)
    if not os.path.exists(run_dir):
        utils.create_exp_dir(run_dir, scripts_to_save=glob.glob('*.py'))

    # log_format = '%(asctime)s %(message)s'
    # logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    #     format=log_format, datefmt='%m/%d %I:%M:%S %p')
    # fh = logging.FileHandler(os.path.join(run_dir, 'log.txt'))
    # fh.setFormatter(logging.Formatter(log_format))
    # logging.getLogger().addHandler(fh)

    result_logger = hpres.json_result_logger(directory=run_dir, overwrite=True)

    # Start a nameserver
    NS = hpns.NameServer(run_id=exp_name, host='127.0.0.1', port=0)
    ns_host, ns_port = NS.start()

    # Start a localserver
    worker = TorchWorker(run_id=exp_name, host='127.0.0.1', nameserver=ns_host, nameserver_port=ns_port,
                        timeout=120, run_dir=run_dir)
    worker.run(background=True)

    # Initialise optimiser
    bohb = BOHB(configspace=worker.get_configspace(),
                run_id=exp_name,
                host='127.0.0.1',
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger,
                min_budget=2, max_budget=5,
                )
    print('Worker running')
    res = bohb.run(n_iterations=iterations)
    # Store the results
    with open(os.path.join(run_dir, 'result.pkl'), 'wb') as file:
        pickle.dump(res, file)
    
    # Shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # get all runs
    all_runs = res.get_all_runs()

    # get id to configuration mapping as dictionary
    id2conf = res.get_id2config_mapping()

    # get best/incubent run
    best_run = res.get_incumbent_id()
    best_config = id2conf[best_run]['config']
    
    print(f"Best run id:{best_run}, \n Config:{best_config}")

    # Store all run info
    file = open(os.path.join(run_dir, 'summary.txt'), 'w')
    file.write(f"{all_runs}")
    file.close()
def run_bohb_parallel(id, run_id, bohb_workers):
    # get bohb params
    bohb_params = get_bohb_parameters()

    # get suitable interface (eth0 or lo)
    bohb_interface = get_bohb_interface()

    # get BOHB log directory
    working_dir = get_working_dir(run_id)

    # every process has to lookup the hostname
    host = hpns.nic_name_to_host(bohb_interface)

    os.makedirs(working_dir, exist_ok=True)

    if int(id) > 0:
        print('START NEW WORKER')
        time.sleep(10)
        w = BohbWorker(host=host, run_id=run_id, working_dir=working_dir)
        w.load_nameserver_credentials(working_directory=working_dir)
        w.run(background=False)
        exit(0)

    print('START NEW MASTER')
    ns = hpns.NameServer(run_id=run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = ns.start()

    w = BohbWorker(host=host,
                   nameserver=ns_host,
                   nameserver_port=ns_port,
                   run_id=run_id,
                   working_dir=working_dir)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=True)

    bohb = BohbWrapper(configspace=get_configspace(),
                       run_id=run_id,
                       eta=bohb_params['eta'],
                       host=host,
                       nameserver=ns_host,
                       nameserver_port=ns_port,
                       min_budget=bohb_params['min_budget'],
                       max_budget=bohb_params['max_budget'],
                       result_logger=result_logger)

    res = bohb.run(n_iterations=bohb_params['iterations'],
                   min_n_workers=int(bohb_workers))

    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'train_a': X_train.shape[1],
                        'train_b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

            def get_pipeline_config_options(self):
                return [
                    ConfigOption("result_logger_dir",
                                 default=".",
                                 type="directory"),
                    ConfigOption("optimize_metric", default="a", type=str),
                ]

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs',
                                                       result_logger_dir=".")
        pipeline.fit_pipeline(
            pipeline_config=pipeline_config,
            X_train=np.random.rand(15, 10),
            Y_train=np.random.rand(15, 5),
            X_valid=None,
            Y_valid=None,
            result_loggers=[json_result_logger(directory=".", overwrite=True)],
            dataset_info=None,
            shutdownables=[])

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparameter_config']
        print(pipeline[OptimizationAlgorithm.get_name()].fit_output)

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
Пример #9
0
def get_parameters(train_data, kFold, iterations, save=False, filepath = './result/loss_time_bohb.csv'):
    parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.')
    parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=1)
    parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=1)
    parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=iterations) # max value = 4
    # parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true')
    parser.add_argument('--shared_directory', type=str,help='A directory that is accessible for all processes, e.g. a NFS share.', default='./result')
    # parser.add_argument('--nic_name', type=str, default='lo')
    args = parser.parse_args()

    result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True)

    NS = hpns.NameServer(run_id='BOHB', host='127.0.0.1', port=None)
    NS.start()

    w = worker(train_data, kFold, nameserver='127.0.0.1', run_id='BOHB')
    w.run(background=True)


    bohb = BOHB(configspace=w.get_configspace(),
                run_id='BOHB', nameserver='127.0.0.1',
                min_budget=args.min_budget, max_budget=args.max_budget,
                result_logger=result_logger
                )
    res = bohb.run(n_iterations=args.n_iterations)

    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    info = res.get_runs_by_id(incumbent)

    parameter = id2config[incumbent]['config']
    min_error = info[0]['loss']

    if save:
        all_info = res.get_all_runs()
        timepoint_dic = []
        loss_dic = []

        for i in all_info:
            timepoint_dic.append(i['time_stamps']['finished'])
            loss_dic.append(i['loss'])

        save_to_csv.save(filepath, timepoint_dic, loss_dic)


    return parameter, min_error
Пример #10
0
def generate_bohb_data():
    import warnings
    import hpbandster.core.nameserver as hpns
    import hpbandster.core.result as hpres
    from hpbandster.optimizers import BOHB as BOHB

    run_id = '0'  # Every run has to have a unique (at runtime) id.
    NS = hpns.NameServer(run_id=run_id, host='localhost', port=0)
    ns_host, ns_port = NS.start()

    from neural_opt import MyWorker, get_configspace

    w = MyWorker(
        nameserver=ns_host,
        nameserver_port=ns_port,
        run_id=run_id,  # same as nameserver's
    )
    w.run(background=True)

    # Log the optimization results for later analysis
    result_logger = hpres.json_result_logger(
        directory='test/general_example/results/bohb_full_configspace',
        overwrite=True)

    bohb = BOHB(
        configspace=get_configspace(),
        run_id=run_id,  # same as nameserver's
        eta=2,
        min_budget=5,
        max_budget=100,  # Hyperband parameters
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
    )

    # Then start the optimizer. The n_iterations parameter specifies
    # the number of iterations to be performed in this run
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res = bohb.run(n_iterations=2)

    # After the run is finished, the services started above need to be shutdown.
    # This ensures that the worker, the nameserver and the master all properly exit
    # and no (daemon) threads keep running afterwards.
    # In particular we shutdown the optimizer (which shuts down all workers) and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Пример #11
0
def run_bohb_serial(run_id, experiment_wrapper):
    # get bohb parameters
    bohb_params = experiment_wrapper.get_bohb_parameters()

    # get BOHB log directory
    working_dir = get_working_dir(run_id)

    # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown
    port = int(30000 + random.random() * 10000)

    ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port)
    ns.start()

    w = BohbWorker(nameserver="127.0.0.1",
                   id=0,
                   run_id=run_id,
                   nameserver_port=port,
                   working_dir=working_dir,
                   experiment_wrapper = experiment_wrapper)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=True)

    bohb = BohbWrapper(
        configspace=experiment_wrapper.get_configspace(),
        run_id=run_id,
        eta=bohb_params['eta'],
        min_budget=bohb_params['min_budget'],
        max_budget=bohb_params['max_budget'],
        random_fraction=bohb_params['random_fraction'],
        nameserver="127.0.0.1",
        nameserver_port=port,
        result_logger=result_logger)

    res = bohb.run(n_iterations=bohb_params['iterations'])
    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
Пример #12
0
    def test_write_new_config(self):

        cs = CS.ConfigurationSpace()
        cs.add_hyperparameter(CS.CategoricalHyperparameter('test', [1]))

        with tempfile.TemporaryDirectory() as temp_dir:
            logger = json_result_logger(temp_dir)

            logger.new_config('1',
                              cs.sample_configuration().get_dictionary(),
                              {'test': 'test'})

            self.assertTrue(os.path.exists(temp_dir))
            self.assertTrue(
                os.path.exists(os.path.join(temp_dir, 'configs.json')))
            self.assertTrue(
                os.path.exists(os.path.join(temp_dir, 'results.json')))
            self.assertEqual(logger.config_ids, set('1'))

            with open(os.path.join(temp_dir, 'configs.json')) as fh:
                data = fh.read()
                data = data.rstrip()
                self.assertEqual(data, r'["1", {"test": 1}, {"test": "test"}]')
Пример #13
0
def model_select_bow_vae(args):
    dd = datetime.datetime.now()
    id_str = dd.strftime("%Y-%m-%d_%H-%M-%S")
    ns_port = get_port()
    worker, log_dir = get_worker(args, args.budget, id_str, ns_port)
    worker.search_mode = True
    result_logger = hpres.json_result_logger(directory=log_dir, overwrite=True)
    logging.info("Starting nameserver on port {}".format(ns_port))
    NS = hpns.NameServer(run_id=id_str, host='127.0.0.1', port=ns_port)
    NS.start()
    res = select_model(worker, args.config_space, args.iterations,
                       result_logger, id_str, ns_port)
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    logging.info('Best found configuration:', id2config[incumbent]['config'])
    logging.info(
        'Total budget corresponds to %.1f full function evaluations.' %
        (sum([r.budget for r in res.get_all_runs()]) / 32))
    inc_runs = res.get_runs_by_id(incumbent)
    inc_run = inc_runs[-1]
    inc_loss = inc_run.loss
    inc_config = id2config[incumbent]['config']
    logging.info("Best configuration loss = {}".format(inc_loss))
    logging.info("Best configuration {}".format(inc_config))
    with open(os.path.join(log_dir, 'model_selection_results.pkl'),
              'wb') as fh:
        pickle.dump(res, fh)
    with open(os.path.join(log_dir, 'best.model.config'), 'w') as fp:
        inc_config['training_epochs'] = args.budget
        specs = json.dumps(inc_config)
        fp.write(specs)
    worker.retrain_best_config(inc_config, inc_run.budget, args.seed,
                               args.num_final_evals)
    dd_finish = datetime.datetime.now()
    logging.info("Model selection run FINISHED. Time: {}".format(dd_finish -
                                                                 dd))
    NS.shutdown()
def runBohbSerial(run_id):
    # get BOHB log directory
    working_dir = get_working_dir(run_id)

    # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown
    port = int(30000 + random.random() * 10000)

    # select whether to process NLP or speech datasets
    use_nlp = 'NLP' in run_id

    ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port)
    ns.start()

    w = BOHBWorker(nameserver="127.0.0.1",
                   run_id=run_id,
                   nameserver_port=port,
                   working_dir=working_dir,
                   use_nlp=use_nlp)
    w.run(background=True)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=True)

    bohb = BohbWrapper(configspace=get_configspace(use_nlp),
                       run_id=run_id,
                       eta=BOHB_ETA,
                       min_budget=BOHB_MIN_BUDGET,
                       max_budget=BOHB_MIN_BUDGET,
                       nameserver="127.0.0.1",
                       nameserver_port=port,
                       result_logger=result_logger)

    res = bohb.run(n_iterations=BOHB_ITERATIONS)
    bohb.shutdown(shutdown_workers=True)
    ns.shutdown()

    return res
Пример #15
0
                                                       log=True)

        #
        cs.add_hyperparameters([
            dropout_rate, tfbs_filter_size, tfbs_kernel_length, dilation_level,
            dilation_kernel_length, compression_filter_size, learning_rate
        ])

        return cs


#
shared_dir = args.shared_dir

# results logger
result_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False)
# initialize name server
run_id = args.run_id
NS = hpns.NameServer(
    run_id=run_id,
    host='127.0.0.1',
    working_directory=shared_dir,
)
ns_host, ns_port = NS.start()
# define worker
worker = KerasWorker(dataset=args.dataset,
                     use_bg=False,
                     sequence_length=1000,
                     host='127.0.0.1',
                     run_id=run_id,
                     nameserver=ns_host,
Пример #16
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Tensorforce hyperparameter tuner, using BOHB optimizer (Bayesian Optimization '
        'and Hyperband)')
    # Environment arguments (from run.py)
    parser.add_argument(
        '-e',
        '--environment',
        type=str,
        help='Environment (name, configuration JSON file, or library module)')
    parser.add_argument(
        '-l',
        '--level',
        type=str,
        default=None,
        help='Level or game id, like `CartPole-v1`, if supported')
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help='Maximum number of timesteps per episode')
    parser.add_argument(
        '--import-modules',
        type=str,
        default=None,
        help='Import comma-separated modules required for environment')
    # Runner arguments (from run.py)
    parser.add_argument('-n',
                        '--episodes',
                        type=int,
                        help='Number of episodes')
    parser.add_argument(
        '-p',
        '--num-parallel',
        type=int,
        default=None,
        help='Number of environment instances to execute in parallel')
    # Tuner arguments
    parser.add_argument(
        '-r',
        '--runs-per-round',
        type=str,
        default='1,2,5,10',
        help=
        'Comma-separated number of runs per optimization round, each with a successively '
        'smaller number of candidates')
    parser.add_argument(
        '-s',
        '--selection-factor',
        type=int,
        default=3,
        help=
        'Selection factor n, meaning that one out of n candidates in each round advances to '
        'the next optimization round')
    parser.add_argument(
        '-i',
        '--num-iterations',
        type=int,
        default=1,
        help=
        'Number of optimization iterations, each consisting of a series of optimization '
        'rounds with an increasingly reduced candidate pool')
    parser.add_argument('-d',
                        '--directory',
                        type=str,
                        default='tuner',
                        help='Output directory')
    parser.add_argument('--restore',
                        type=str,
                        default=None,
                        help='Restore from given directory')
    parser.add_argument('--id',
                        type=str,
                        default='worker',
                        help='Unique worker id')
    args = parser.parse_args()

    if args.import_modules is not None:
        for module in args.import_modules.split(','):
            importlib.import_module(name=module)

    environment = dict(environment=args.environment)
    if args.level is not None:
        environment['level'] = args.level

    if False:
        host = nic_name_to_host(nic_name=None)
        port = 123
    else:
        host = 'localhost'
        port = None

    runs_per_round = tuple(int(x) for x in args.runs_per_round.split(','))
    print('Bayesian Optimization and Hyperband optimization')
    print(
        f'{args.num_iterations} iterations of each {len(runs_per_round)} rounds:'
    )
    for n, num_runs in enumerate(runs_per_round, start=1):
        num_candidates = round(
            math.pow(args.selection_factor,
                     len(runs_per_round) - n))
        print(f'round {n}: {num_candidates} candidates, each {num_runs} runs')
    print()

    server = NameServer(run_id=args.id,
                        working_directory=args.directory,
                        host=host,
                        port=port)
    nameserver, nameserver_port = server.start()

    worker = TensorforceWorker(
        environment=environment,
        max_episode_timesteps=args.max_episode_timesteps,
        num_episodes=args.episodes,
        base=args.selection_factor,
        runs_per_round=runs_per_round,
        num_parallel=args.num_parallel,
        run_id=args.id,
        nameserver=nameserver,
        nameserver_port=nameserver_port,
        host=host)
    worker.run(background=True)

    if args.restore is None:
        previous_result = None
    else:
        previous_result = logged_results_to_HBS_result(directory=args.restore)

    result_logger = json_result_logger(directory=args.directory,
                                       overwrite=True)

    optimizer = BOHB(configspace=worker.get_configspace(),
                     eta=args.selection_factor,
                     min_budget=0.9,
                     max_budget=math.pow(args.selection_factor,
                                         len(runs_per_round) - 1),
                     run_id=args.id,
                     working_directory=args.directory,
                     nameserver=nameserver,
                     nameserver_port=nameserver_port,
                     host=host,
                     result_logger=result_logger,
                     previous_result=previous_result)
    # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None,
    # top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3,
    # min_bandwidth=1e-3, **kwargs)
    # Master(run_id, config_generator, working_directory='.', ping_interval=60,
    # nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True,
    # job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None,
    # previous_result = None)
    # logger: logging.logger like object, the logger to output some (more or less meaningful)
    # information

    results = optimizer.run(n_iterations=args.num_iterations)
    # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={})
    # min_n_workers: int, minimum number of workers before starting the run

    optimizer.shutdown(shutdown_workers=True)
    server.shutdown()

    with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle:
        pickle.dump(results, filehandle)

    print('Best found configuration: {}'.format(
        results.get_id2config_mapping()[results.get_incumbent_id()]['config']))
    print('Runs:',
          results.get_runs_by_id(config_id=results.get_incumbent_id()))
    print('A total of {} unique configurations where sampled.'.format(
        len(results.get_id2config_mapping())))
    print('A total of {} runs where executed.'.format(
        len(results.get_all_runs())))
Пример #17
0
def run_bohb(args):

    scorer = TrueRougeScorer()

    run_id = infer_run_id_from_args(args)

    run_dir = os.path.join(args.hpb_runs_dir, run_id)

    if args.hpb_worker:
        w = BeamWorker(args, scorer, nameserver='127.0.0.1', run_id=run_id)
        w.run(background=False)
        exit(0)

    result_logger = hpres.json_result_logger(directory=run_dir,
                                             overwrite=getattr(
                                                 args, 'hpb_overwrite_run',
                                                 False))

    # Step 1: Start a nameserver
    # Every run needs a nameserver. It could be a 'static' server with a
    # permanent address, but here it will be started for the local machine with the default port.
    # The nameserver manages the concurrent running workers across all possible threads or clusternodes.
    # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer.
    NS = hpns.NameServer(run_id=run_id, host='127.0.0.1', port=None)
    NS.start()

    # Step 2: Start a worker
    # Now we can instantiate a worker, providing the mandatory information
    # Besides the sleep_interval, we need to define the nameserver information and
    # the same run_id as above. After that, we can start the worker in the background,
    # where it will wait for incoming configurations to evaluate.

    # Step 3: Run an optimizer
    # Now we can create an optimizer object and start the run.
    # Here, we run BOHB, but that is not essential.
    # The run method will return the `Result` that contains all runs performed.
    bohb = BOHB(configspace=get_conf_space(args),
                run_id=run_id,
                nameserver='127.0.0.1',
                result_logger=result_logger,
                min_budget=args.hpb_min_budget,
                max_budget=args.hpb_max_budget)
    res = bohb.run(n_iterations=args.hpb_n_iterations,
                   min_n_workers=args.hpb_n_workers)

    # Step 4: Shutdown
    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Step 5: Analysis
    # Each optimizer returns a hpbandster.core.result.Result object.
    # It holds informations about the optimization run like the incumbent (=best) configuration.
    # For further details about the Result object, see its documentation.
    # Here we simply print out the best config and some statistics about the performed runs.
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    print('Best found configuration:', id2config[incumbent]['config'])
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(res.get_all_runs()))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in res.get_all_runs()]) / args.hpb_max_budget))
Пример #18
0
    default='.')

args = parser.parse_args()
# Every process has to lookup the hostname
host = hpns.nic_name_to_host(args.nic_name)

if args.worker:
    w = worker('mnist.json',
               path='../surrogate_data',
               run_id=args.run_id,
               host=host)
    w.load_nameserver_credentials(working_directory=args.shared_directory)
    w.run(background=False)
    exit(0)

result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                         overwrite=True)

# Start a nameserver:
NS = hpns.NameServer(run_id=args.run_id,
                     host=host,
                     port=0,
                     working_directory=args.shared_directory)
ns_host, ns_port = NS.start()

# Start local worker
w = worker('mnist.json',
           path='../surrogate_data',
           run_id=args.run_id,
           host=host,
           nameserver=ns_host,
           nameserver_port=ns_port)
Пример #19
0
    # BOHB is usually so cheap, that we can
    # affort to run a worker on the master node, too.
    worker = worker(min_budget=min_budget,
                    max_budget=max_budget,
                    eta=eta,
                    search_space=args.space,
                    nasbench_data=nasbench,
                    seed=args.seed,
                    nameserver=ns_host,
                    nameserver_port=ns_port,
                    run_id=args.run_id)
    worker.run(background=True)

    #instantiate BOHB and run it
    result_logger = hputil.json_result_logger(directory=args.working_directory,
                                              overwrite=True)

    HPB = BOHB(configspace=worker.get_config_space(),
               working_directory=args.working_directory,
               run_id=args.run_id,
               eta=eta,
               min_budget=min_budget,
               max_budget=max_budget,
               host=ns_host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               ping_interval=3600,
               result_logger=result_logger)

    res = HPB.run(n_iterations=args.num_iterations,
                  min_n_workers=args.total_num_workers)
    return skopt_space


if __name__ == "__main__":
    NS = hpns.NameServer(run_id=RUN_ID,
                         host=HOST,
                         port=PORT,
                         working_directory=WORKING_DIRECTORY)
    ns_host, ns_port = NS.start()

    # Start local worker
    worker = TrainEvalWorker(run_id=RUN_ID,
                             nameserver=ns_host,
                             nameserver_port=ns_port)
    worker.run(background=True)
    result_logger = hpres.json_result_logger(
        directory="data/hpbandster/{}".format(RUN_ID), overwrite=False)
    optim = BOHB(
        configspace=worker.get_configspace(),
        run_id=RUN_ID,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        eta=HPO_PARAMS["eta"],
        min_budget=HPO_PARAMS["min_budget"],
        max_budget=HPO_PARAMS["max_budget"],
        num_samples=HPO_PARAMS["num_samples"],
        top_n_percent=HPO_PARAMS["top_n_percent"],
        min_bandwidth=HPO_PARAMS["min_bandwidth"],
        bandwidth_factor=HPO_PARAMS["bandwidth_factor"],
    )
    study = optim.run(n_iterations=HPO_PARAMS["n_calls"])
Пример #21
0
def fmin(func,
         config_space,
         func_args=(),
         eta=2,
         min_budget=2,
         max_budget=4,
         num_iterations=1,
         num_workers=1,
         output_dir='.'):
    """
    Starts a local BOHB optimization run for a function over a hyperparameter
    search space, which is referred to as configuration space.
    This function's purpose is to give a fast and easy way to run BOHB on a
    optimization objective on your local machine.

    The optimized function must satisfy the following conditions:
    - Contain a parameter ``budget``:
        This parameter is passed by the optimizer.
        Its meaning is defined by your interpretation of the budget used by your
        model. For example it may be the number of epochs for a neural network
        to train or the number of datapoints, the model receives.

        The idea is to run many configurations on a small budget and only
        take the best 1/``eta`` of them to the next round. In the next iteration,
        the configurations run on the doubled budget. This is repeated until
        only 2 configurations are left to run on the ``max_budget``.
        Therefore, bad configurations are rejected fast, and the good
        ones are explored more.
        The number of configurations with a minimum budget is calculated similar
        to the optimization run, just reversed. Having 2 configurations with
        ``max_budget``, in the iteration before ``eta``-times many
        configurations with half the budget are sampled, and so on.

    - Hyperparameter from the configuration space object:
        The function must implement all hyperparameters defined in the
        configuration space. The parameter name in the function call must be
        equal to the name of the hyperparameter. Otherwise, a exception will
        be thrown.

    - Function arguments in the right order:
        Function arguments, which are not hyperparameters and therefore not
        defined in the configuration space must be passed to the
        ``fmin`` call in the order of occurrence in the function signature.
        In the example below, the training data, X and y, is a
        use case for this kind of function arguments.

    Example::
        import numpy as np
        from FMin import fmin
        import ConfigSpace as CS

        # Create configuration space
        cs = CS.ConfigurationSpace()
        cs.add_hyperparameter(
            CS.UniformFloatHyperparameter('w', lower=-5, upper=5)
        )

        # Create data from function
        # f(x) = x + :math:`\mathcal{N}(0, 1)`
        X = np.random.uniform(-5, 5, 100)
        y = np.random.normal(X, 1)

        # The function calculates the mean squared error for the first
        # ``budget`` points compared to their responding true values.
        # The expected minimum is at w = 1.
        opt_func = lambda x, y, w, budget: np.mean((y[:int(budget)] - w*x[:int(budget)])**2)

        inc_best, inc_best_cfg, result = fmin(opt_func,
                                                  cs, func_args=(X, y),
                                                  min_budget=3,
                                                  max_budget=len(X),
                                                  num_iterations=3,
                                                  num_worker=1)

    Args:
        func (function): function to optimize. Must return a python scalar!
            See also the section above
            **The optimized function must satisfy the following conditions**
        config_space (ConfigSpace.ConfigurationSpace):
            Definition of the search space containing all hyperparameters
            and their value ranges. You can find its definition in
            the `ConfiSpace repository <https://github.com/automl/ConfigSpace/>`_.
        func_args (tuple): arguments, passed to the function by the user,
            e.g., the data (X,y). These arguments don't include
            optimized parameters. Those are defined in the
            configuration space object and will be passed by the master directly
            to the function.
        eta (float): In each iteration, a complete run of sequential halving
            is executed. In it, after evaluating each configuration on the
            same subset size, only a fraction of 1/eta of them 'advances' to
            the next round. Must be greater or equal to 2.
        min_budget (int, float, optional): Defines the minimum budget to
            evaluate configurations on it.
            In combination with the parameter `max_budget` and `eta`,
            the number of configurations to evaluate is determined.
            Read more about it in the
            `Quickstart <https://automl.github.io/HpBandSter/build/html/quickstart.html#id6>`_.
            By default `min_budget` and `max_budget` is set, so that only a few
            configurations with budgets from 1 to 4 are evaluated.
        max_budget (int, float, optional): Defines the maximum budget to
            evaluate configurations on it.
        num_iterations (int, optional):   number of iterations to be performed
            in this run. By default, this value is set to 1.
        num_workers (int, optional): number of parallel workers. By default, just
            one worker is used.
        output_dir (str, optional): HpBandSter stores the sampled
            configurations and the results on these configurations in two .json
            files. 'configs.json' and 'results.json'. Those files will be stored
            by default in the current directory (default='.').
            Also, we store the configuration space definition for later use to
            this directory. It may be used for further analysis via
            `CAVE <https://automl.github.io/CAVE/stable/>`_.

    Returns:
        hpbandster.core.result.Run - Best run.
            Run result with the best loss values of all budgets.
            It stores information about the
            - budget
            - the unique configuration id (tuple)
            - loss
            - time stamps: start time and end time for this run.

        Dict - Best found configuration.
            Containing the configuration (from the configuration space), which
            achieved the best results in optimization run

        hpbandster.core.result.Result - Result object stores all results from
            all results, which were evaluated. The best run and the best found
            configuration are extracted from this results-object.

    """
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)

    # Set up a local nameserver and start it
    ns = hpns.NameServer(run_id='fmin',
                         nic_name=None,
                         working_directory=output_dir)
    ns_host, ns_port = ns.start()

    # Create ``num_workers`` workers and pass the function as well as the
    # function arguments to each of them.
    workers = []
    for _ in range(num_workers):
        worker = FMinWorker(func=func,
                            func_args=func_args,
                            nameserver=ns_host,
                            nameserver_port=ns_port,
                            run_id='fmin')
        worker.run(background=True)
        workers.append(worker)

    # The result logger will store the intermediate results and the sampled
    # configurations in the passed directory.
    result_logger = hpres.json_result_logger(directory=output_dir,
                                             overwrite=True)

    # For hyperparameter importance analysis via CAVE we store the configuration
    # space definition to file.
    with open(output_dir / 'configspace.json', 'w') as f:
        f.write(json.write(config_space))

    # Set up a master, which is book keeping and decides what to run next.
    opt = BOHB(configspace=config_space,
               run_id='fmin',
               min_budget=min_budget,
               max_budget=max_budget,
               eta=eta,
               host=ns_host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               result_logger=result_logger)

    # The result object stores run information, e.g. the incumbent trajectory.
    # Force the master to wait until all workers are ready.
    result = opt.run(n_iterations=num_iterations, min_n_workers=num_workers)

    # After the run has finished, shut down the master and the workers
    opt.shutdown(shutdown_workers=True)
    ns.shutdown()

    # Save to result object to file.
    with open(output_dir / 'results.pkl', 'wb') as f:
        import pickle
        pickle.dump(result, f)

    # Return the optimal value and the responding configuration, as well as the
    # result object. The result object can be used in a second step for further
    # hyperparameter importance analysis with CAVE.
    id2config = result.get_id2config_mapping()
    incumbent = result.get_incumbent_id()
    inc_value = result.get_runs_by_id(incumbent)[-1]['loss']
    inc_cfg = id2config[incumbent]['config']

    return inc_value, inc_cfg, result
Пример #22
0
def main():
    parser = argparse.ArgumentParser(
        parents=[get_train_parser()],
        description='Parallel execution of hyper-tuning',
    )
    parser.add_argument('--run-id',
                        required=True,
                        help='Name of the run')
    parser.add_argument('--min-budget',
                        type=float,
                        help='Minimum budget used during the optimization',
                        default=1)
    parser.add_argument('--max-budget',
                        type=float,
                        help='Maximum budget used during the optimization',
                        default=64)
    parser.add_argument('--n-iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=3)
    parser.add_argument('--n-workers',
                        type=int,
                        help='Number of workers to run in parallel',
                        default=3)
    parser.add_argument('--eta',
                        type=int,
                        help='Parameter of the hyper-tuning algorithm',
                        default=4)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument('--hostname',
                        default=None,
                        help='IP of name server.')
    parser.add_argument('--shared-directory',
                        type=str,
                        help=('A directory that is accessible '
                              'for all processes, e.g. a NFS share'),
                        default='output/hypertune')
    args = parser.parse_args()
    print(args)

    MyWorker = WORKERS[args.model_type]

    if not args.hostname and socket.gethostname().lower().startswith('lenovo'):
        # If we are on cluster set IP
        args.hostname = hpns.nic_name_to_host('eno1')
    elif not args.hostname:
        args.hostname = '127.0.0.1'

    logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"),
                        format='%(asctime)s %(message)s',
                        datefmt='%I:%M:%S')

    args.callbacks = ['learning-rate-scheduler', 'early-stopping']

    if args.worker:
        # Start a worker in listening mode (waiting for jobs from master)
        w = MyWorker(
             args,
             run_id=args.run_id,
             host=args.hostname,
        )
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    result_logger = hpres.json_result_logger(
        directory=args.shared_directory,
        overwrite=True,
    )

    # Start a name server
    name_server = hpns.NameServer(
        run_id=args.run_id,
        host=args.hostname,
        port=0,
        working_directory=args.shared_directory,
    )
    ns_host, ns_port = name_server.start()

    # Run and optimizer
    bohb = HyperBand(
        configspace=MyWorker.get_configspace(),  # model can be an arg here?
        run_id=args.run_id,
        result_logger=result_logger,
        eta=args.eta,
        host=args.hostname,
        nameserver=ns_host,
        nameserver_port=ns_port,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
    )

    res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers)

    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    name_server.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    inc_runs = res.get_runs_by_id(incumbent)
    inc_run = inc_runs[-1]
    all_runs = res.get_all_runs()

    print("Best loss {:6.2f}".format(inc_run.loss))
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(all_runs))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in all_runs]) / args.max_budget))
    print('The run took  %.1f seconds to complete.' %
          (all_runs[-1].time_stamps['finished'] -
           all_runs[0].time_stamps['started']))
Пример #23
0
def main():
    parser = argparse.ArgumentParser(description='Tensorforce hyperparameter tuner')
    parser.add_argument(
        'environment', help='Environment (name, configuration JSON file, or library module)'
    )
    parser.add_argument(
        '-l', '--level', type=str, default=None,
        help='Level or game id, like `CartPole-v1`, if supported'
    )
    parser.add_argument(
        '-m', '--max-repeats', type=int, default=1, help='Maximum number of repetitions'
    )
    parser.add_argument(
        '-n', '--num-iterations', type=int, default=1, help='Number of BOHB iterations'
    )
    parser.add_argument(
        '-d', '--directory', type=str, default='tuner', help='Output directory'
    )
    parser.add_argument(
        '-r', '--restore', type=str, default=None, help='Restore from given directory'
    )
    parser.add_argument('--id', type=str, default='worker', help='Unique worker id')
    args = parser.parse_args()

    if args.level is None:
        environment = Environment.create(environment=args.environment)
    else:
        environment = Environment.create(environment=args.environment, level=args.level)

    if False:
        host = nic_name_to_host(nic_name=None)
        port = 123
    else:
        host = 'localhost'
        port = None

    server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port)
    nameserver, nameserver_port = server.start()

    worker = TensorforceWorker(
        environment=environment, run_id=args.id, nameserver=nameserver,
        nameserver_port=nameserver_port, host=host
    )
    # TensorforceWorker(run_id, nameserver=None, nameserver_port=None, logger=None, host=None, id=None, timeout=None)
    # logger: logging.logger instance, logger used for debugging output
    # id: anything with a __str__method, if multiple workers are started in the same process, you MUST provide a unique id for each one of them using the `id` argument.
    # timeout: int or float, specifies the timeout a worker will wait for a new after finishing a computation before shutting down. Towards the end of a long run with multiple workers, this helps to shutdown idling workers. We recommend a timeout that is roughly half the time it would take for the second largest budget to finish. The default (None) means that the worker will wait indefinitely and never shutdown on its own.

    worker.run(background=True)

    # config = cs.sample_configuration().get_dictionary()
    # print(config)
    # res = worker.compute(config=config, budget=1, working_directory='.')
    # print(res)

    if args.restore is None:
        previous_result = None
    else:
        previous_result = logged_results_to_HBS_result(directory=args.restore)

    result_logger = json_result_logger(directory=args.directory, overwrite=True)  # ???

    optimizer = BOHB(
        configspace=worker.get_configspace(), min_budget=0.5, max_budget=float(args.max_repeats),
        run_id=args.id, working_directory=args.directory,
        nameserver=nameserver, nameserver_port=nameserver_port, host=host,
        result_logger=result_logger, previous_result=previous_result
    )
    # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, **kwargs)
    # Master(run_id, config_generator, working_directory='.', ping_interval=60, nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True, job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None, previous_result = None)
    # logger: logging.logger like object, the logger to output some (more or less meaningful) information

    results = optimizer.run(n_iterations=args.num_iterations)
    # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={})
    # min_n_workers: int, minimum number of workers before starting the run

    optimizer.shutdown(shutdown_workers=True)
    server.shutdown()
    environment.close()

    with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle:
        pickle.dump(results, filehandle)

    print('Best found configuration:', results.get_id2config_mapping()[results.get_incumbent_id()]['config'])
    print('Runs:', results.get_runs_by_id(config_id=results.get_incumbent_id()))
    print('A total of {} unique configurations where sampled.'.format(len(results.get_id2config_mapping())))
    print('A total of {} runs where executed.'.format(len(results.get_all_runs())))
    print('Total budget corresponds to {:.1f} full function evaluations.'.format(
        sum([r.budget for r in results.get_all_runs()]) / args.max_repeats)
    )
Пример #24
0
def run_opt(args):
    # import PyTorchWorker as worker

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)

    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    # Start local worker
    w = worker(run_id=args.run_id,
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               timeout=120)
    w.run(background=True)

    if args.method == "BOHB":
        print("[RUNNER] method: BOHB")
        opt = BOHB(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )
    elif args.method == "random":
        print("[RUNNER] method: random")
        opt = RandomSearch(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )
    elif args.method == "BO":
        print("[RUNNER] method: BO")
        opt = BO_Search(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )

    elif args.method == "HB":
        opt = HyperBand(
            configspace=worker.get_configspace(),
            run_id=args.run_id,
            host=host,
            nameserver=ns_host,
            nameserver_port=ns_port,
            result_logger=result_logger,
            min_budget=args.min_budget,
            max_budget=args.max_budget,
        )

    res = opt.run(n_iterations=args.n_iterations)

    # store results
    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    opt.shutdown(shutdown_workers=True)
    NS.shutdown()
Пример #25
0
def run_optimization(args):
    """Runs the optimization process."""
    print("Starting name server.")
    date_time = datetime.datetime.now().strftime('%Y-%m-%d-%H_%M_%S')

    # First start nameserver
    NS = hpns.NameServer(run_id=date_time, host='127.0.0.1', port=None)
    NS.start()

    print("Preparing result logger and loading previous run, if it exists.")

    # Also start result logger
    output_dir = os.path.join(
        args.output_dir,
        datetime.datetime.now().strftime('%Y_%m_%d--%H_%M_%S'))
    result_logger_path = os.path.join(output_dir, 'results_log.json')
    best_result_path = os.path.join(output_dir, 'best_config.txt')

    print("Result logger will be written to %s" % result_logger_path)
    if os.path.exists(result_logger_path):
        previous_run = hpres.logged_results_to_HBS_result(result_logger_path)
    else:
        previous_run = None

    result_logger = hpres.json_result_logger(directory=output_dir,
                                             overwrite=True)

    print("Starting search worker.\n")

    # Then start worker
    w = SearchWorker(args.data_path,
                     iaa,
                     os.path.join(output_dir, "logging"),
                     nameserver='127.0.0.1',
                     run_id=date_time)
    w.run(background=True)

    print("Initializing optimizer.")
    # Run the optimizer
    bohb = BOHB(configspace=w.get_configspace(),
                run_id=date_time,
                nameserver='127.0.0.1',
                result_logger=result_logger,
                min_budget=args.min_budget,
                max_budget=args.max_budget,
                previous_result=previous_run)

    print("Initialization complete. Starting optimization run.")

    res = bohb.run(n_iterations=args.iterations)

    print("Optimization complete.")
    output_fp = os.path.join(output_dir, 'results.pkl')

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    print("Results will be saved at:\n{}".format(output_fp))
    print("Best found configuration: ", id2config[incumbent]['config'])

    Slacker.send_message(
        "AutoML Optimization finished with minimum "
        "budget {}, maximum budget {}, and {} "
        "iterations.\n"
        "Output file has been written in {}    \n".format(
            args.min_budget, args.max_budget, args.iterations, output_dir),
        "AutoML Optimization Finished!")

    sleep(2)
    Slacker.send_code("Best found configuration:",
                      "{}".format(id2config[incumbent]['config']))

    with open(best_result_path, mode='w') as file:
        lines = [
            "Best results are as follows:\n",
            "{}".format(id2config[incumbent]['config'])
        ]
        file.writelines(lines)

    with open(output_fp, mode='wb') as file:
        pickle.dump(res, file)

    # Shutdown after completion
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Пример #26
0
def main():
    # Check quantities of train, validation and test images
    train_images = np.array(glob("data/train/*/*"))
    valid_images = np.array(glob("data/valid/*/*"))
    test_images = np.array(glob("data/test/*/*"))

    # Check relative percentages of image types
    train_images_mel = np.array(glob("data/train/melanoma/*"))
    train_images_nev = np.array(glob("data/train/nevus/*"))
    train_images_seb = np.array(glob("data/train/seborrheic_keratosis/*"))

    valid_images_mel = np.array(glob("data/valid/melanoma/*"))
    valid_images_nev = np.array(glob("data/valid/nevus/*"))
    valid_images_seb = np.array(glob("data/valid/seborrheic_keratosis/*"))

    test_images_mel = np.array(glob("data/test/melanoma/*"))
    test_images_nev = np.array(glob("data/test/nevus/*"))
    test_images_seb = np.array(glob("data/test/seborrheic_keratosis/*"))

    print(
        "There are {} training images, {} validation images and {} test images."
        .format(len(train_images), len(valid_images), len(test_images)))
    print(
        "For the training images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(train_images_mel) / len(train_images) * 100,
                mel2=len(train_images_mel),
                nev=len(train_images_nev) / len(train_images) * 100,
                nev2=len(train_images_nev),
                seb=len(train_images_seb) / len(train_images) * 100,
                seb2=len(train_images_seb)))
    print(
        "For the validation images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(valid_images_mel) / len(valid_images) * 100,
                mel2=len(valid_images_mel),
                nev=len(valid_images_nev) / len(valid_images) * 100,
                nev2=len(valid_images_nev),
                seb=len(valid_images_seb) / len(valid_images) * 100,
                seb2=len(valid_images_seb)))
    print(
        "For the test images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis."
        .format(mel=len(test_images_mel) / len(test_images) * 100,
                mel2=len(test_images_mel),
                nev=len(test_images_nev) / len(test_images) * 100,
                nev2=len(test_images_nev),
                seb=len(test_images_seb) / len(test_images) * 100,
                seb2=len(test_images_seb)))

    # Set HpBandSter logging
    logging.basicConfig(level=logging.DEBUG)

    # Define the parser. Note that key parametres are the min_budget, max_budget, shared_directory and n_iterations.
    parser = argparse.ArgumentParser(
        description='ISIC2017 - CNN on Derm Dataset')
    parser.add_argument('--min_budget',
                        type=float,
                        help='Minimum number of epochs for training.',
                        default=1)
    parser.add_argument('--max_budget',
                        type=float,
                        help='Maximum number of epochs for training.',
                        default=3)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=16)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.'
    )
    parser.add_argument(
        '--nic_name',
        type=str,
        help='Which network interface to use for communication.',
        default='lo')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.',
        default='/home/ubuntu/src/derm-ai/data')
    parser.add_argument(
        '--backend',
        help=
        'Toggles which worker is used. Choose between a pytorch and a keras implementation.',
        choices=['pytorch', 'keras'],
        default='pytorch')
    args = parser.parse_args([])

    host = hpns.nic_name_to_host(args.nic_name)
    # This example shows how to log live results. This is most useful
    # for really long runs, where intermediate results could already be
    # interesting. The core.result submodule contains the functionality to
    # read the two generated files (results.json and configs.json) and
    # create a Result object.
    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)
    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    # Start local worker
    w = worker(run_id=args.run_id,
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               timeout=120)
    w.run(background=True)

    bohb = BOHB(
        configspace=w.get_configspace(),
        run_id=args.run_id,
        host=host,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
    )

    # Run an optimizer

    res = bohb.run(n_iterations=args.n_iterations)
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Пример #27
0
def run_experiment(out_path, on_travis):

    settings = {
        'min_budget': 1,
        'max_budget':
        9,  # number of repetitions; this is the fidelity for this bench
        'num_iterations': 10,  # Set this to a low number for demonstration
        'eta': 3,
        'output_dir': Path(out_path)
    }
    if on_travis:
        settings.update(get_travis_settings('bohb'))

    b = Benchmark(rng=1)

    b.get_configuration_space(seed=1)
    settings.get('output_dir').mkdir(exist_ok=True)

    cs = b.get_configuration_space()
    seed = get_rng(rng=0)
    run_id = 'BOHB_on_cartpole'

    result_logger = hpres.json_result_logger(directory=str(
        settings.get('output_dir')),
                                             overwrite=True)

    ns = hpns.NameServer(run_id=run_id,
                         host='localhost',
                         working_directory=str(settings.get('output_dir')))
    ns_host, ns_port = ns.start()

    worker = CustomWorker(seed=seed,
                          nameserver=ns_host,
                          nameserver_port=ns_port,
                          run_id=run_id,
                          max_budget=settings.get('max_budget'))
    worker.run(background=True)

    master = BOHB(configspace=cs,
                  run_id=run_id,
                  host=ns_host,
                  nameserver=ns_host,
                  nameserver_port=ns_port,
                  eta=settings.get('eta'),
                  min_budget=settings.get('min_budget'),
                  max_budget=settings.get('max_budget'),
                  result_logger=result_logger)

    result = master.run(n_iterations=settings.get('num_iterations'))
    master.shutdown(shutdown_workers=True)
    ns.shutdown()

    with open(settings.get('output_dir') / 'results.pkl', 'wb') as f:
        pickle.dump(result, f)

    id2config = result.get_id2config_mapping()
    incumbent = result.get_incumbent_id()
    inc_value = result.get_runs_by_id(incumbent)[-1]['loss']
    inc_cfg = id2config[incumbent]['config']

    logger.info(f'Inc Config:\n{inc_cfg}\n'
                f'with Performance: {inc_value:.2f}')

    if not on_travis:
        benchmark = Benchmark(container_source='library://phmueller/automl')
        incumbent_result = benchmark.objective_function_test(
            configuration=inc_cfg, fidelity={"budget": settings['max_budget']})
        print(incumbent_result)
Пример #28
0
			# Import a worker class
			from MAXWEL_worker import MAXWEL_worker as worker

			#Build an argument parser       
			parser = argparse.ArgumentParser(description='MAXWEL - sequential execution.')
			parser.add_argument('--min_budget',   type=float, help='Minimum budget used during the optimization.',    default=5)
			parser.add_argument('--max_budget',   type=float, help='Maximum budget used during the optimization.',    default=30)
			parser.add_argument('--n_iterations', type=int,   help='Number of iterations performed by the optimizer', default=10)
			parser.add_argument('--n_workers', type=int,   help='Number of workers to run in parallel.', default=1)
			parser.add_argument('--shared_directory',type=str, help='A directory that is accessible for all processes, e.g. a NFS share.', default='.')

			args=parser.parse_args()

			#Define a realtime result logger
			result_logger = hpres.json_result_logger(directory=result_dir, overwrite=True)


			#Start a nameserver
			NS = hpns.NameServer(run_id='MAXWEL', host='127.0.0.1', port=None)
			NS.start()

			#Start the workers
			workers=[]
			for i in range(args.n_workers):
				w = worker(nameserver='127.0.0.1',run_id='MAXWEL', id=i)
				w.run(background=True)
				workers.append(w)

			#Define and run an optimizer
			bohb = BOHB(configspace = w.get_configspace(),
Пример #29
0
def get_parameters(data, target_feature_index):
    parser = argparse.ArgumentParser(
        description='Example 1 - sequential and local execution.')
    parser.add_argument('--min_budget',
                        type=float,
                        help='Minimum budget used during the optimization.',
                        default=9)
    parser.add_argument('--max_budget',
                        type=float,
                        help='Maximum budget used during the optimization.',
                        default=243)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=4)
    parser.add_argument('--n_workers',
                        type=int,
                        help='Number of workers to run in parallel.',
                        default=2)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.'
    )
    parser.add_argument(
        '--nic_name',
        type=str,
        help='Which network interface to use for communication.',
        default='lo')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.',
        default='/home/lchen/parameters/result')

    args = parser.parse_args()

    host = hpns.nic_name_to_host(args.nic_name)

    if args.worker:
        time.sleep(
            5
        )  # short artificial delay to make sure the nameserver is already running
        w = worker(0.5,
                   data,
                   target_feature_index,
                   run_id=args.run_id,
                   host=host)
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)

    # Step 1: Start a nameserver
    # Every run needs a nameserver. It could be a 'static' server with a
    # permanent address, but here it will be started for the local machine with the default port.
    # The nameserver manages the concurrent running workers across all possible threads or clusternodes.
    # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer.
    NS = hpns.NameServer(run_id='test1',
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()
    # Step 2: Start a worker
    # Now we can instantiate a worker, providing the mandatory information
    # Besides the sleep_interval, we need to define the nameserver information and
    # the same run_id as above. After that, we can start the worker in the background,
    # where it will wait for incoming configurations to evaluate.

    w = worker(0.5,
               data,
               target_feature_index,
               run_id='test1',
               host=host,
               nameserver=ns_host,
               nameserver_port=ns_port)
    w.run(background=True)
    # Step 3: Run an optimizer
    # Now we can create an optimizer object and start the run.
    # Here, we run BOHB, but that is not essential.
    # The run method will return the `Result` that contains all runs performed.
    bohb = BOHB(configspace=worker.get_configspace(),
                run_id=args.run_id,
                host=host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                result_logger=result_logger,
                min_budget=args.min_budget,
                max_budget=args.max_budget)
    print("daozhele5")
    res = bohb.run(n_iterations=args.n_iterations)
    print("daozhele6")
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    # Step 5: Analysis
    # Each optimizer returns a hpbandster.core.result.Result object.
    # It holds informations about the optimization run like the incumbent (=best) configuration.
    # For further details about the Result object, see its documentation.
    # Here we simply print out the best config and some statistics about the performed runs.
    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()
    info = res.get_runs_by_id(incumbent)

    parameter = id2config[incumbent]['config']
    min_error = info[0]['loss']
    feature_importance_dict = info[0]['info']

    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    return parameter, min_error, feature_importance_dict
Пример #30
0
def main(args):
    extra_string = ''

    if args.dataset == 'flights':
        if args.n_split == 0:
            extra_string += '_2M'
        elif args.n_split == 1:
            extra_string += '_800k'
        else:
            raise Exception(
                'Only Valid values for flight splits are 0 (2M) or 1 (800k)')
        extra_string += '_valprop_' + str(args.valprop)

    elif args.dataset in [
            'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm',
            'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap',
            'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap',
            'protein_gap'
    ]:
        extra_string += '_split_' + str(args.n_split)
        extra_string += '_valprop_' + str(args.valprop)

    working_dir = args.result_folder + '/' + args.dataset + extra_string + '/' + args.method +\
        ('-' + args.network if args.network != "ResNet" else '') + '/' + str(args.width) + '/' + str(args.batch_size) +\
        '/' + args.run_id
    print("WORKING DIR")
    print(working_dir)

    # Create data dir if necessary
    if not os.path.exists(args.data_folder):
        mkdir(args.data_folder)

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.nic_name)

    result_logger = hpres.json_result_logger(directory=working_dir,
                                             overwrite=False)

    # Start a nameserver:
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=working_dir)
    ns_host, ns_port = NS.start()

    workers = []
    for i in range(args.num_workers):
        print("CREATING WORKER:", i)
        if args.dataset == 'spirals':
            worker_class = create_SpiralsWorker(args.method, args.network,
                                                args.width, args.batch_size)
            worker = worker_class(early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset == 'flights':
            worker_class = create_FlightWorker(args.method, args.network,
                                               args.width, args.batch_size)
            worker = worker_class(base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  k800=(args.n_split == 1),
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        elif args.dataset in [
                'boston', 'concrete', 'energy', 'power', 'wine', 'yacht',
                'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap',
                'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap',
                'kin8nm_gap', 'naval_gap', 'protein_gap'
        ]:
            worker_class = create_UCIWorker(args.method, args.network,
                                            args.width, args.batch_size)
            worker = worker_class(dname=args.dataset,
                                  base_dir=args.data_folder,
                                  prop_val=args.valprop,
                                  n_split=args.n_split,
                                  early_stop=args.early_stop,
                                  run_id=args.run_id,
                                  host=host,
                                  nameserver=ns_host,
                                  nameserver_port=ns_port,
                                  timeout=600,
                                  id=i)
        else:
            raise ValueError('Dataset not implemented yet!')

        worker.run(background=True)
        workers.append(worker)

    n_iterations = args.n_iterations
    previous_run = None
    if args.previous_result_folder is not None:
        try:
            previous_run = hpres.logged_results_to_HBS_result(
                args.previous_result_folder)
        except Exception as e:
            print(e)

    # Run an optimizer
    bohb = BOHB(
        configspace=worker.get_configspace(),
        run_id=args.run_id,
        host=host,
        nameserver=ns_host,
        nameserver_port=ns_port,
        result_logger=result_logger,
        min_budget=args.min_budget,
        max_budget=args.max_budget,
        previous_result=previous_run,
    )

    res = bohb.run(n_iterations=n_iterations, min_n_workers=args.num_workers)

    # store results
    with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # shutdown
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    id2config = res.get_id2config_mapping()
    incumbent = res.get_incumbent_id()

    all_runs = res.get_all_runs()

    print('Best found configuration:', id2config[incumbent]['config'])
    print('A total of %i unique configurations where sampled.' %
          len(id2config.keys()))
    print('A total of %i runs where executed.' % len(res.get_all_runs()))
    print('Total budget corresponds to %.1f full function evaluations.' %
          (sum([r.budget for r in all_runs]) / args.max_budget))
    print('The run took  %.1f seconds to complete.' %
          (all_runs[-1].time_stamps['finished'] -
           all_runs[0].time_stamps['started']))