Пример #1
0
 def test_multi_rest_config_space(self):
     HDL = {
         "feature_engineer(choice)": {
             "feature_selection(choice)": {
                 "wrapper(choice)": {
                     "RandomForest": {
                         "n_iterations": {
                             "_type": "int_quniform",
                             "_value": [10, 100, 10]
                         },
                         "max_depth": {
                             "_type": "int_quniform",
                             "_value": [3, 7, 2]
                         },
                     },
                     "LinearRegression": {
                         "C": {
                             "_type": "loguniform",
                             "_value": [0.01, 10000],
                             "_default": 1.0
                         },
                     },
                 },
                 "filter": {
                     "score_func": {
                         "_type": "choice",
                         "_value": ["pearsonr", "spearmanr"]
                     }
                 }
             },
             "PolynomialFeatures": {
                 "degree": {
                     "_type": "int_uniform",
                     "_value": [2, 3]
                 },
                 "interaction_only": {
                     "_type": "choice",
                     "_value": [True, False]
                 },
             },
             "decomposition(choice)": {
                 "PCA": {
                     "n_components": {
                         "_type": "uniform",
                         "_value": [0.8, 0.95]
                     },
                     "whiten": {
                         "_type": "choice",
                         "_value": [True, False]
                     },
                 },
                 "KernelPCA": {
                     "n_components": {
                         "_type": "uniform",
                         "_value": [0.8, 0.95]
                     },
                     "whiten": {
                         "_type": "choice",
                         "_value": [True, False]
                     },
                 },
                 "ICA": {}
             }
         }
     }
     config_space = hdl2cs(HDL)
     fmin(evaluate, config_space, "ETPE", n_iterations=30)
Пример #2
0
 def test_conditions_and_fobidden(self):
     HDL = {
         "model(choice)": {
             "linearsvc": {
                 "max_iter": {
                     "_type": "int_quniform",
                     "_value": [300, 3000, 100],
                     "_default": 600
                 },
                 "penalty": {
                     "_type": "choice",
                     "_value": ["l1", "l2"],
                     "_default": "l2"
                 },
                 "dual": {
                     "_type": "choice",
                     "_value": [True, False],
                     "_default": False
                 },
                 "loss": {
                     "_type": "choice",
                     "_value": ["hinge", "squared_hinge"],
                     "_default": "squared_hinge"
                 },
                 "C": {
                     "_type": "loguniform",
                     "_value": [0.01, 10000],
                     "_default": 1.0
                 },
                 "__forbidden": [
                     {
                         "penalty": "l1",
                         "loss": "hinge"
                     },
                     {
                         "penalty": "l2",
                         "dual": False,
                         "loss": "hinge"
                     },
                     {
                         "penalty": "l1",
                         "dual": False
                     },
                     {
                         "penalty": "l1",
                         "dual": True,
                         "loss": "squared_hinge"
                     },
                 ]
             },
             "svc": {
                 "C": {
                     "_type": "loguniform",
                     "_value": [0.01, 10000],
                     "_default": 1.0
                 },
                 "kernel": {
                     "_type": "choice",
                     "_value": ["rbf", "poly", "sigmoid"],
                     "_default": "rbf"
                 },
                 "degree": {
                     "_type": "int_uniform",
                     "_value": [2, 5],
                     "_default": 3
                 },
                 "gamma": {
                     "_type": "loguniform",
                     "_value": [1e-05, 8],
                     "_default": 0.1
                 },
                 "coef0": {
                     "_type": "quniform",
                     "_value": [-1, 1],
                     "_default": 0
                 },
                 "shrinking": {
                     "_type": "choice",
                     "_value": [True, False],
                     "_default": True
                 },
                 "__activate": {
                     "kernel": {
                         "rbf": ["gamma"],
                         "sigmoid": ["gamma", "coef0"],
                         "poly": ["degree", "gamma", "coef0"]
                     }
                 }
             },
             "mock": {
                 "C": {
                     "_type": "loguniform",
                     "_value": [0.01, 10000],
                     "_default": 1.0
                 },
                 "kernel": {
                     "_type": "choice",
                     "_value": ["rbf", "poly", "sigmoid"],
                     "_default": "rbf"
                 },
                 "degree": {
                     "_type": "int_uniform",
                     "_value": [2, 5],
                     "_default": 3
                 },
                 "gamma": {
                     "_type": "loguniform",
                     "_value": [1e-05, 8],
                     "_default": 0.1
                 },
                 "coef0": {
                     "_type": "quniform",
                     "_value": [-1, 1],
                     "_default": 0
                 },
                 "shrinking": {
                     "_type": "choice",
                     "_value": [True, False],
                     "_default": True
                 },
                 "__activate": {
                     "kernel": {
                         "rbf": ["gamma"],
                         "sigmoid": ["gamma", "coef0"],
                         "poly": ["degree", "gamma", "coef0"]
                     }
                 },
                 "max_iter": {
                     "_type": "int_quniform",
                     "_value": [300, 3000, 100],
                     "_default": 600
                 },
                 "penalty": {
                     "_type": "choice",
                     "_value": ["l1", "l2"],
                     "_default": "l2"
                 },
                 "dual": {
                     "_type": "choice",
                     "_value": [True, False],
                     "_default": False
                 },
                 "loss": {
                     "_type": "choice",
                     "_value": ["hinge", "squared_hinge"],
                     "_default": "squared_hinge"
                 },
                 "__forbidden": [
                     {
                         "penalty": "l1",
                         "loss": "hinge"
                     },
                     {
                         "penalty": "l2",
                         "dual": False,
                         "loss": "hinge"
                     },
                     {
                         "penalty": "l1",
                         "dual": False
                     },
                     {
                         "penalty": "l1",
                         "dual": True,
                         "loss": "squared_hinge"
                     },
                 ]
             },
         }
     }
     config_space = hdl2cs(HDL)
     fmin(evaluate, config_space, "ETPE", n_iterations=30)
max_iter = int(sys.argv[3])
n_startup_trials = int(sys.argv[4])

res = pd.DataFrame(columns=[f"trial-{i}" for i in range(repetitions)],
                   index=range(max_iter))
evaluator = UltraoptEvaluator(data, 'balanced_accuracy')
for trial in range(repetitions):
    worker = MyWorker(nameserver=ns_host,
                      nameserver_port=ns_port,
                      run_id=hb_run_id,
                      id=0)
    evaluator = UltraoptEvaluator(data, 'balanced_accuracy')
    worker.evaluator = evaluator
    worker.run(background=True)
    HDL = get_no_ordinal_HDL()
    CS = hdl2cs(HDL)
    CS.seed(trial * 10 + 5)
    bohb = BOHB(
        configspace=CS,
        run_id=hb_run_id,
        # just test KDE
        eta=2,
        min_budget=1,
        max_budget=1,
        nameserver=ns_host,
        nameserver_port=ns_port,
        num_samples=64,
        random_fraction=33,
        bandwidth_factor=3,
        ping_interval=10,
        min_bandwidth=.3)
Пример #4
0
def fmin(eval_func: Callable,
         config_space: Union[ConfigurationSpace, dict],
         optimizer: Union[BaseOptimizer, str, Type] = "ETPE",
         initial_points: Union[None, List[Configuration], List[dict]] = None,
         random_state=42,
         n_iterations=100,
         n_jobs=1,
         parallel_strategy="AsyncComm",
         auto_identify_serial_strategy=True,
         multi_fidelity_iter_generator: Optional[BaseIterGenerator] = None,
         previous_result: Union[FMinResult, BaseOptimizer, str, None] = None,
         warm_start_strategy="continue",
         show_progressbar=True,
         checkpoint_file=None,
         checkpoint_freq=10,
         verbose=0,
         run_id=None,
         ns_host="127.0.0.1",
         ns_port=0):
    # fixme: 这合理吗
    if verbose <= 0:
        logging.basicConfig(level=logging.WARNING)
    elif verbose == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.DEBUG)
    # 设计目标:单机并行、多保真优化
    # ------------   config_space   ---------------#
    if isinstance(config_space, dict):
        cs_ = hdl2cs(config_space)
    elif isinstance(config_space, ConfigurationSpace):
        cs_ = config_space
    else:
        raise NotImplementedError
    # ------------      budgets     ---------------#
    if multi_fidelity_iter_generator is None:
        budgets_ = [1]
    else:
        budgets_ = multi_fidelity_iter_generator.get_budgets()
    # ------------ optimizer ---------------#
    if inspect.isclass(optimizer):
        if not issubclass(optimizer, BaseOptimizer):
            raise ValueError(
                f"optimizer {optimizer} is not subclass of BaseOptimizer")
        opt_ = optimizer()
    elif isinstance(optimizer, BaseOptimizer):
        opt_ = optimizer
    elif isinstance(optimizer, str):
        try:
            opt_ = getattr(importlib.import_module("ultraopt.optimizer"),
                           f"{optimizer}Optimizer")()
        except Exception:
            raise ValueError(
                f"Invalid optimizer string-indicator: {optimizer}")
    else:
        raise NotImplementedError
    if show_progressbar:
        progress_callback = progress.default_callback
    else:
        progress_callback = progress.no_progress_callback
    # 3种运行模式:
    # 1. 串行,方便调试,不支持multi-fidelity
    # 2. AsyncComm,RPC,支持multi-fidelity
    # 3. MapReduce,不支持multi-fidelity
    # non-parallelism debug mode
    if auto_identify_serial_strategy and n_jobs == 1 and multi_fidelity_iter_generator is None:
        parallel_strategy = "Serial"
    if parallel_strategy in ["Serial", "MapReduce"]:
        budgets_ = [1]
    # initialize optimizer
    opt_.initialize(cs_, budgets_, random_state, initial_points)
    opt_ = warm_start_optimizer(opt_, previous_result, warm_start_strategy)
    if parallel_strategy == "Serial":
        with progress_callback(initial=0, total=n_iterations) as progress_ctx:
            for counts in range(n_iterations):
                config, _ = opt_.ask()
                loss = eval_func(config)
                opt_.tell(config, loss)
                _, best_loss, _ = get_wanted(opt_)
                progress_ctx.postfix = f"best loss: {best_loss:.3f}"
                progress_ctx.update(1)
                if checkpoint_file is not None:
                    if (counts % checkpoint_freq == 0 and counts != 0) \
                            or (counts == n_iterations - 1):
                        dump_checkpoint(opt_, checkpoint_file)
    elif parallel_strategy == "AsyncComm":
        # start name-server
        if run_id is None:
            run_id = uuid4().hex
        if multi_fidelity_iter_generator is None:
            # todo: warning
            multi_fidelity_iter_generator = CustomIterGenerator([1], [1])
        NS = NameServer(run_id=run_id, host=ns_host,
                        port=ns_port)  # get_a_free_port(ns_port, ns_host)
        _, ns_port = NS.start()
        # start n workers
        workers = [
            Worker(run_id=run_id,
                   nameserver=ns_host,
                   nameserver_port=ns_port,
                   host=ns_host,
                   worker_id=i) for i in range(n_jobs)
        ]
        for worker in workers:
            worker.initialize(eval_func)
            worker.run(True, "thread")
        # start master
        master = Master(run_id,
                        opt_,
                        multi_fidelity_iter_generator,
                        progress_callback=progress_callback,
                        checkpoint_file=checkpoint_file,
                        checkpoint_freq=checkpoint_freq,
                        nameserver=ns_host,
                        nameserver_port=ns_port,
                        host=ns_host)
        result = master.run(n_iterations)
        master.shutdown(True)
        NS.shutdown()
        # todo: 将result添加到返回结果中
    elif parallel_strategy == "MapReduce":
        # todo: 支持multi-fidelity
        counts = 0
        with progress_callback(initial=0, total=n_iterations) as progress_ctx:
            while counts < n_iterations:
                n_parallels = min(n_jobs, n_iterations - counts)
                config_info_pairs = opt_.ask(n_points=n_parallels)
                losses = Parallel(n_jobs=n_parallels)(
                    delayed(eval_func)(config)
                    for config, _ in config_info_pairs)
                for j, (loss, (config,
                               _)) in enumerate(zip(losses,
                                                    config_info_pairs)):
                    opt_.tell(config,
                              loss,
                              update_model=(j == n_parallels - 1))
                counts += n_parallels
                _, best_loss, _ = get_wanted(opt_)
                progress_ctx.postfix = f"best loss: {best_loss:.3f}"
                progress_ctx.update(n_parallels)
                iteration = counts // n_jobs
                if checkpoint_file is not None:
                    if ((iteration - 1) % checkpoint_freq == 0) \
                            or (counts == n_iterations):
                        dump_checkpoint(opt_, checkpoint_file)
    else:
        raise NotImplementedError

    # max_budget, best_loss, best_config = get_wanted(opt_)
    return FMinResult(opt_)
Пример #5
0
            "criterion": {"_type": "choice", "_value": ["gini", "entropy"], "_default": "gini"},
            "max_features": {"_type": "choice", "_value": ["sqrt", "log2"], "_default": "sqrt"},
            "min_samples_split": {"_type": "int_uniform", "_value": [2, 20], "_default": 2},
            "min_samples_leaf": {"_type": "int_uniform", "_value": [1, 20], "_default": 1},
            "bootstrap": {"_type": "choice", "_value": [True, False], "_default": True},
            "random_state": 42
        },
        "KNeighborsClassifier": {
            "n_neighbors": {"_type": "int_loguniform", "_value": [1, 100], "_default": 3},
            "weights": {"_type": "choice", "_value": ["uniform", "distance"], "_default": "uniform"},
            "p": {"_type": "choice", "_value": [1, 2], "_default": 2},
        },
    }
}

config_space = hdl2cs(HDL)

default_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)


class Evaluator():
    def __init__(self,
                 X, y,
                 metric="accuracy",
                 cv=default_cv):
        # 初始化
        self.X = X
        self.y = y
        self.metric = metric
        self.cv = cv