示例#1
0
class TestEI(unittest.TestCase):
    def setUp(self):
        self.model = MockModel()
        self.ei = EI(self.model)

    def test_1xD(self):
        self.ei.update(model=self.model, eta=1.0)
        configurations = [ConfigurationMock([1.0, 1.0, 1.0])]
        acq = self.ei(configurations)
        self.assertEqual(acq.shape, (1, 1))
        self.assertAlmostEqual(acq[0][0], 0.3989422804014327)

    def test_NxD(self):
        self.ei.update(model=self.model, eta=1.0)
        configurations = ([
            ConfigurationMock([0.0, 0.0, 0.0]),
            ConfigurationMock([0.1, 0.1, 0.1]),
            ConfigurationMock([1.0, 1.0, 1.0])
        ])
        acq = self.ei(configurations)
        self.assertEqual(acq.shape, (3, 1))
        self.assertAlmostEqual(acq[0][0], 0.0)
        self.assertAlmostEqual(acq[1][0], 0.90020601136712231)
        self.assertAlmostEqual(acq[2][0], 0.3989422804014327)

    def test_1x1(self):
        self.ei.update(model=self.model, eta=1.0)
        configurations = [ConfigurationMock([1.0])]
        acq = self.ei(configurations)
        self.assertEqual(acq.shape, (1, 1))
        self.assertAlmostEqual(acq[0][0], 0.3989422804014327)

    def test_Nx1(self):
        self.ei.update(model=self.model, eta=1.0)
        configurations = [
            ConfigurationMock([0.0001]),
            ConfigurationMock([1.0]),
            ConfigurationMock([2.0])
        ]
        acq = self.ei(configurations)
        self.assertEqual(acq.shape, (3, 1))
        self.assertAlmostEqual(acq[0][0], 0.9999)
        self.assertAlmostEqual(acq[1][0], 0.3989422804014327)
        self.assertAlmostEqual(acq[2][0], 0.19964122837424575)

    def test_zero_variance(self):
        self.ei.update(model=self.model, eta=1.0)
        X = np.array([ConfigurationMock([0.0])])
        acq = np.array(self.ei(X))
        self.assertAlmostEqual(acq[0][0], 0.0)
示例#2
0
def plot_state(smac, model, x_points, y_points, x_smac, y_smac, step=None):
    """
      plot function with all evaluated points,
      EI acquisition function
      Predictions with uncertainties
    """
    from smac.optimizer.acquisition import EI

    # cost all points for x
    step = step or len(x_smac)
    x_smac_ = np.array([[x] for x in x_smac[:step]])
    y_smac_ = np.array([[y] for y in y_smac[:step]])
    # as an alternative, we could extract the points from the runhistory again
    # but these points will be scaled to a unit-hypercube
    # X, Y = smac.solver.rh2EPM.transform(runhistory)

    model.train(x_smac_, y_smac_)

    acq_func = EI(model=model)
    acq_func.update(model=model, eta=np.min(y_smac))

    x_points_ = np.array([[x] for x in x_points])
    acq_values = acq_func._compute(X=x_points_)[:, 0]

    # plot acquisition function
    y_mean, y_var = model.predict(x_points_)
    y_mean = y_mean[:, 0]
    y_std = np.sqrt(y_var)[:, 0]

    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.plot(x_points, acq_values)
    plt.title("Aquisition Function")

    plt.savefig('fig%da.pdf' % step)

    # plot uncertainties
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.plot(x_points, y_mean)
    ax1.fill_between(x_points, y_mean - y_std,
                     y_mean + y_std, alpha=0.5)
    ax1.plot(x_smac[:step], y_smac[:step], 'bo')
    ax1.plot(x_smac[:step], y_smac[:step], 'ro')
    ax1.plot(x_points, y_points, '--')
    plt.title("Uncertainty Predictions")

    plt.savefig('fig%db.pdf' % step)
示例#3
0
    def test_get_next_by_random_search_sorted(self,
                                              patch_sample,
                                              patch_ei,
                                              patch_impute):
        values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5)
        patch_sample.return_value = [ConfigurationMock(i) for i in values]
        patch_ei.return_value = np.array([[_] for _ in values], dtype=float)
        patch_impute.side_effect = lambda l: values
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rval = rs._maximize(
            runhistory=None, stats=None, num_points=10, _sorted=True
        )
        self.assertEqual(len(rval), 10)
        for i in range(10):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].value, 10 - i)
            self.assertEqual(rval[i][0], 10 - i)
            self.assertEqual(rval[i][1].origin, 'Random Search (sorted)')

        # Check that config.get_array works as desired and imputation is used
        #  in between, we therefore have to retrieve the value from the mock!
        np.testing.assert_allclose([v.value for v in patch_ei.call_args[0][0]],
                                   np.array(values, dtype=float))
示例#4
0
def optimize(scenario, run, forest=False, seed=8, ratio=0.8):
    types, bounds = get_types(scenario.cs, scenario.feature_array)
    rfr = RandomForestWithInstances(types=types, bounds=bounds, instance_features=scenario.feature_array, seed=seed)
    ei = EI(model=rfr)
    if forest:
        optimizer = ForestSearch(ei, scenario.cs, ratio=ratio)
    else:
        optimizer = InterleavedLocalAndRandomSearch(ei, scenario.cs)

    scenario.output_dir = "%s_%s_%d_%lf" % ("./logs/run_", "forest_" if forest else "random_", seed, time.time())
    smac = SMAC(
        scenario=scenario,
        rng=np.random.RandomState(seed),
        model=rfr,
        acquisition_function=ei,
        acquisition_function_optimizer=optimizer,
        tae_runner=run,
    )

    try:
        incumbent = smac.optimize()
    finally:
        incumbent = smac.solver.incumbent

    return smac.get_tae_runner().run(incumbent, 1)[1]
示例#5
0
class TestAcquisitionFunction(unittest.TestCase):
    def setUp(self):
        self.model = unittest.mock.Mock()
        self.acq = EI(model=self.model)

    def test_update_model_and_eta(self):
        model = 'abc'
        self.assertIsNone(self.acq.eta)
        self.acq.update(model=model, eta=0.1)
        self.assertEqual(self.acq.model, model)
        self.assertEqual(self.acq.eta, 0.1)

    def test_update_other(self):
        self.acq.other = 'other'

        with self.assertRaisesRegex(
                ValueError,
                r"Acquisition function EI needs to be updated with key model, but only got keys "
                r"\['other'\].",
        ):
            self.acq.update(other=None)

        model = 'abc'
        self.acq.update(model=model, eta=0.1, other=None)
        self.assertEqual(self.acq.other, 'other')
示例#6
0
    def test_inject_dependencies(self):
        # initialize objects with missing dependencies
        ta = ExecuteTAFuncDict(lambda x: x**2)
        rh = RunHistory(aggregate_func=None)
        acqu_func = EI(model=None)
        intensifier = Intensifier(tae_runner=None,
                                  stats=None,
                                  traj_logger=None,
                                  rng=np.random.RandomState(),
                                  instances=None)
        init_design = DefaultConfiguration(tae_runner=None,
                                           scenario=None,
                                           stats=None,
                                           traj_logger=None,
                                           rng=np.random.RandomState())
        rh2epm = RunHistory2EPM4Cost(scenario=self.scenario, num_params=0)
        rh2epm.scenario = None

        # assert missing dependencies
        self.assertIsNone(rh.aggregate_func)
        self.assertIsNone(acqu_func.model)
        self.assertIsNone(intensifier.tae_runner)
        self.assertIsNone(intensifier.stats)
        self.assertIsNone(intensifier.traj_logger)
        self.assertIsNone(init_design.tae_runner)
        self.assertIsNone(init_design.scenario)
        self.assertIsNone(init_design.stats)
        self.assertIsNone(init_design.traj_logger)
        self.assertIsNone(rh2epm.scenario)

        # initialize smac-object
        SMAC(scenario=self.scenario,
             tae_runner=ta,
             runhistory=rh,
             intensifier=intensifier,
             acquisition_function=acqu_func,
             runhistory2epm=rh2epm,
             initial_design=init_design)

        # assert that missing dependencies are injected
        self.assertIsNotNone(rh.aggregate_func, AbstractAcquisitionFunction)
        self.assertIsInstance(acqu_func.model, AbstractEPM)
        self.assertIsInstance(intensifier.tae_runner, ExecuteTARun)
        self.assertIsInstance(intensifier.stats, Stats)
        self.assertIsInstance(intensifier.traj_logger, TrajLogger)
        self.assertIsInstance(init_design.tae_runner, ExecuteTARun)
        self.assertIsInstance(init_design.scenario, Scenario)
        self.assertIsInstance(init_design.stats, Stats)
        self.assertIsInstance(init_design.traj_logger, TrajLogger)
        self.assertIsInstance(rh2epm.scenario, Scenario)
示例#7
0
    def test_challenger_list_callback(self, patch_sample, patch_ei,
                                      patch_impute):
        values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5)
        patch_sample.return_value = ConfigurationMock(1)
        patch_ei.return_value = np.array([[_] for _ in values], dtype=float)
        patch_impute.side_effect = lambda l: values
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rs._maximize = unittest.mock.Mock()
        rs._maximize.return_value = [(0, 0)]

        rval = rs.maximize(
            runhistory=None,
            stats=None,
            num_points=10,
        )
        self.assertEqual(rs._maximize.call_count, 0)
        next(rval)
        self.assertEqual(rs._maximize.call_count, 1)

        random_configuration_chooser = unittest.mock.Mock()
        random_configuration_chooser.check.side_effect = [
            True, False, False, False
        ]
        rs._maximize = unittest.mock.Mock()
        rs._maximize.return_value = [(0, 0), (1, 1)]

        rval = rs.maximize(
            runhistory=None,
            stats=None,
            num_points=10,
            random_configuration_chooser=random_configuration_chooser,
        )
        self.assertEqual(rs._maximize.call_count, 0)
        # The first configuration is chosen at random (see the random_configuration_chooser mock)
        conf = next(rval)
        self.assertIsInstance(conf, ConfigurationMock)
        self.assertEqual(rs._maximize.call_count, 0)
        # The 2nd configuration triggers the call to the callback (see the random_configuration_chooser mock)
        conf = next(rval)
        self.assertEqual(rs._maximize.call_count, 1)
        self.assertEqual(conf, 0)
        # The 3rd configuration doesn't trigger the callback any more
        conf = next(rval)
        self.assertEqual(rs._maximize.call_count, 1)
        self.assertEqual(conf, 1)

        with self.assertRaises(StopIteration):
            next(rval)
示例#8
0
    def __init__(
        self,
        configspace,
        random_fraction=1 / 2,
        logger=None,
        configs=None,
        losses=None,
    ):
        self.logger = logger

        self.random_fraction = random_fraction
        self.configspace = configspace
        self.min_points_in_model = len(self.configspace.get_hyperparameters())

        rng = np.random.RandomState(random.randint(0, 100))

        self.model = _construct_model(configspace, rng)
        self.acquisition_func = EI(model=self.model)
        self.acq_optimizer = LocalSearch(
            acquisition_function=self.acquisition_func,
            config_space=configspace,
            rng=rng)
        self.runhistory = RunHistory()

        self.configs = configs or list()
        self.losses = losses or list()
        if self.has_model:
            for config, cost in zip(self.configs, self.losses):
                self.runhistory.add(config, cost, 0, StatusType.SUCCESS)

            X = convert_configurations_to_array(self.configs)
            Y = np.array(self.losses, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses),
            )
示例#9
0
    def test_get_next_by_random_search(self, patch):
        def side_effect(size):
            return [ConfigurationMock()] * size

        patch.side_effect = side_effect
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rval = rs._maximize(
            runhistory=None, stats=None, num_points=10, _sorted=False
        )
        self.assertEqual(len(rval), 10)
        for i in range(10):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].origin, 'Random Search')
            self.assertEqual(rval[i][0], 0)
示例#10
0
    def test_get_next_by_local_search(
            self,
            _get_initial_points_patch,
            patch
    ):
        # Without known incumbent
        class SideEffect(object):

            def __call__(self, *args, **kwargs):
                rval = []
                for i in range(len(args[0])):
                    rval.append((i, ConfigurationMock(i)))
                return rval

        patch.side_effect = SideEffect()
        cs = test_helpers.get_branin_config_space()
        rand_confs = cs.sample_configuration(size=9)
        _get_initial_points_patch.return_value = rand_confs
        acq_func = EI(None)

        ls = LocalSearch(acq_func, cs)

        # To have some data in a mock runhistory
        runhistory = unittest.mock.Mock()
        runhistory.data = [None] * 1000

        rval = ls._maximize(runhistory, None, 9)
        self.assertEqual(len(rval), 9)
        self.assertEqual(patch.call_count, 1)
        for i in range(9):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].value, 8 - i)
            self.assertEqual(rval[i][0], 8 - i)
            self.assertEqual(rval[i][1].origin, 'Local Search')

        # Check that the known 'incumbent' is transparently passed through
        patch.side_effect = SideEffect()
        _get_initial_points_patch.return_value = ['Incumbent'] + rand_confs
        rval = ls._maximize(runhistory, None, 10)
        self.assertEqual(len(rval), 10)
        self.assertEqual(patch.call_count, 2)
        # Only the first local search in each iteration starts from the
        # incumbent
        self.assertEqual(patch.call_args_list[1][0][0][0], 'Incumbent')
        for i in range(10):
            self.assertEqual(rval[i][1].origin, 'Local Search')
示例#11
0
    def test_get_next_by_local_search(self, _get_initial_points_patch, patch):
        # Without known incumbent
        class SideEffect(object):
            def __init__(self):
                self.call_number = 0

            def __call__(self, *args, **kwargs):
                rval = 9 - self.call_number
                self.call_number += 1
                return (rval, ConfigurationMock(rval))

        patch.side_effect = SideEffect()
        cs = test_helpers.get_branin_config_space()
        rand_confs = cs.sample_configuration(size=9)
        _get_initial_points_patch.return_value = rand_confs
        acq_func = EI(None)

        ls = LocalSearch(acq_func, cs)

        # To have some data in a mock runhistory
        runhistory = unittest.mock.Mock()
        runhistory.data = [None] * 1000

        rval = ls._maximize(runhistory, None, 9)
        self.assertEqual(len(rval), 9)
        self.assertEqual(patch.call_count, 9)
        for i in range(9):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].value, 9 - i)
            self.assertEqual(rval[i][0], 9 - i)
            self.assertEqual(rval[i][1].origin, 'Local Search')

        # With known incumbent
        patch.side_effect = SideEffect()
        _get_initial_points_patch.return_value = ['Incumbent'] + rand_confs
        rval = ls._maximize(runhistory, None, 10)
        self.assertEqual(len(rval), 10)
        self.assertEqual(patch.call_count, 19)
        # Only the first local search in each iteration starts from the
        # incumbent
        self.assertEqual(patch.call_args_list[9][0][0], 'Incumbent')
        for i in range(10):
            self.assertEqual(rval[i][1].origin, 'Local Search')
示例#12
0
    def __init__(self,
                 scenario: Scenario,
                 tae_runner: Optional[Union[Type[ExecuteTARun], Callable]] = None,
                 tae_runner_kwargs: Optional[dict] = None,
                 runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
                 runhistory_kwargs: Optional[dict] = None,
                 intensifier: Optional[Type[Intensifier]] = None,
                 intensifier_kwargs: Optional[dict] = None,
                 acquisition_function: Optional[Type[AbstractAcquisitionFunction]] = None,
                 acquisition_function_kwargs: Optional[dict] = None,
                 integrate_acquisition_function: bool = False,
                 acquisition_function_optimizer: Optional[Type[AcquisitionFunctionMaximizer]] = None,
                 acquisition_function_optimizer_kwargs: Optional[dict] = None,
                 model: Optional[Type[AbstractEPM]] = None,
                 model_kwargs: Optional[dict] = None,
                 runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
                 runhistory2epm_kwargs: Optional[dict] = None,
                 initial_design: Optional[Type[InitialDesign]] = None,
                 initial_design_kwargs: Optional[dict] = None,
                 initial_configurations: Optional[List[Configuration]] = None,
                 stats: Optional[Stats] = None,
                 restore_incumbent: Optional[Configuration] = None,
                 rng: Optional[Union[np.random.RandomState, int]] = None,
                 smbo_class: Optional[SMBO] = None,
                 run_id: Optional[int] = None,
                 random_configuration_chooser: Optional[Type[RandomConfigurationChooser]] = None,
                 random_configuration_chooser_kwargs: Optional[dict] = None
                 ):
        """
        Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        tae_runner_kwargs: Optional[dict]
            arguments passed to constructor of '~tae_runner'
        runhistory : RunHistory
            runhistory to store all algorithm runs
        runhistory_kwargs : Optional[dict]
            arguments passed to constructor of runhistory.
            We strongly advise against changing the aggregation function,
            since it will break some code assumptions
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        intensifier_kwargs: Optional[dict]
            arguments passed to the constructor of '~intensifier'
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Class or object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` or :class:`~smac.optimizer.acquisition.LogEI` if not set.
            `~acquisition_function_kwargs` is passed to the class constructor.
        acquisition_function_kwargs : Optional[dict]
            dictionary to pass specific arguments to ~acquisition_function
        integrate_acquisition_function : bool, default=False
            Whether to integrate the acquisition function. Works only with models which can sample their
            hyperparameters (i.e. GaussianProcessMCMC).
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        acquisition_function_optimizer_kwargs: Optional[dict]
            Arguments passed to constructor of '~acquisition_function_optimizer'
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        model_kwargs : Optional[dict]
            Arguments passed to constructor of '~model'
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        runhistory2epm_kwargs: Optional[dict]
            Arguments passed to the constructor of '~runhistory2epm'
        initial_design : InitialDesign
            initial sampling design
        initial_design_kwargs: Optional[dict]
            arguments passed to constructor of `~initial_design'
        initial_configurations : List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~smac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.
        random_configuration_chooser_kwargs : Optional[dict]
            arguments of constructor for '~random_configuration_chooser'

        """
        self.logger = logging.getLogger(
            self.__module__ + "." + self.__class__.__name__)

        aggregate_func = average_cost

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            # run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # run_id=datetime.now().strftime("%Y%m%d%H%M%S%f")
            run_id=uuid1()
            self.output_dir = create_output_directory(scenario, run_id)   # fixme run_id
        elif scenario.output_dir is not None:
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = scenario.output_dir_for_this_run

        if (
            scenario.deterministic is True
            and getattr(scenario, 'tuner_timeout', None) is None
            and scenario.run_obj == 'quality'
        ):
            self.logger.info('Optimizing a deterministic scenario for quality without a tuner timeout - will make '
                             'SMAC deterministic and only evaluate one configuration per iteration!')
            scenario.intensification_percentage = 1e-10
            scenario.min_chall = 1

        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario,file_system=scenario.file_system)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":
            self.logger.warning("Runtime as objective automatically activates log(y) transformation")
            self.scenario.transform_y = "LOG"

        # initialize empty runhistory
        runhistory_def_kwargs = {'aggregate_func': aggregate_func}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs,file_system=scenario.file_system)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(**runhistory_def_kwargs)
        else:
            if runhistory.aggregate_func is None:
                runhistory.aggregate_func = aggregate_func

        rand_conf_chooser_kwargs = {
           'rng': rng
        }
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if 'prob' not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs['prob'] = scenario.rand_prob
            random_configuration_chooser = ChooserProb(**rand_conf_chooser_kwargs)
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser = random_configuration_chooser(**rand_conf_chooser_kwargs)
        elif not isinstance(random_configuration_chooser, RandomConfigurationChooser):
            raise ValueError("random_configuration_chooser has to be"
                             " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats,file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        model_def_kwargs = {
            'types': types,
            'bounds': bounds,
            'instance_features': scenario.feature_array,
            'seed': rng.randint(MAXINT),
            'pca_components': scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                'log_y': scenario.transform_y in ["LOG", "LOGS"],
                'num_trees': scenario.rf_num_trees,
                'do_bootstrapping': scenario.rf_do_bootstrapping,
                'ratio_features': scenario.rf_ratio_features,
                'min_samples_split': scenario.rf_min_samples_split,
                'min_samples_leaf': scenario.rf_min_samples_leaf,
                'max_depth': scenario.rf_max_depth,
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs['configspace'] = self.scenario.cs
            model = RandomForestWithInstances(**model_def_kwargs)
        elif inspect.isclass(model):
            model_def_kwargs['configspace'] = self.scenario.cs
            model = model(**model_def_kwargs)
        else:
            raise TypeError(
                "Model not recognized: %s" %(type(model)))

        # initial acquisition function
        acq_def_kwargs = {'model': model}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)
        if acquisition_function is None:
            if scenario.transform_y in ["LOG", "LOGS"]:
                acquisition_function = LogEI(**acq_def_kwargs)
            else:
                acquisition_function = EI(**acq_def_kwargs)
        elif inspect.isclass(acquisition_function):
            acquisition_function = acquisition_function(**acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s."
                % type(acquisition_function)
            )
        if integrate_acquisition_function:
            acquisition_function = IntegratedAcquisitionFunction(
                acquisition_function=acquisition_function,
                **acq_def_kwargs
            )

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            'acquisition_function': acquisition_function,
            'config_space': scenario.cs,
            'rng': rng,
            }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                'max_steps': scenario.sls_max_steps,
                'n_steps_plateau_walk': scenario.sls_n_steps_plateau_walk,
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(**acq_func_opt_kwargs)
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer = acquisition_function_optimizer(**acq_func_opt_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer)
            )

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            'stats': self.stats,
            'run_obj': scenario.run_obj,
            'runhistory': runhistory,
            'par_factor': scenario.par_factor,
            'cost_for_crash': scenario.cost_for_crash,
            'abort_on_first_run_crash': scenario.abort_on_first_run_crash
            }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)
        if 'ta' not in tae_def_kwargs:
            tae_def_kwargs['ta'] = scenario.ta
        if tae_runner is None:
            tae_def_kwargs['ta'] = scenario.ta
            tae_runner = ExecuteTARunOld(**tae_def_kwargs)
        elif inspect.isclass(tae_runner):
            tae_runner = tae_runner(**tae_def_kwargs)
        elif callable(tae_runner):
            tae_def_kwargs['ta'] = tae_runner
            tae_runner = ExecuteTAFuncDict(**tae_def_kwargs)
        else:
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either None, a callable or an object implementing "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file."
                            % type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # initialize intensification
        intensifier_def_kwargs = {
            'tae_runner': tae_runner,
            'stats': self.stats,
            'traj_logger': traj_logger,
            'rng': rng,
            'instances': scenario.train_insts,
            'cutoff': scenario.cutoff,
            'deterministic': scenario.deterministic,
            'run_obj_time': scenario.run_obj == "runtime",
            'always_race_against': scenario.cs.get_default_configuration()
                                   if scenario.always_race_default else None,
            'use_ta_time_bound': scenario.use_ta_time,
            'instance_specifics': scenario.instance_specific,
            'minR': scenario.minR,
            'maxR': scenario.maxR,
            'adaptive_capping_slackfactor': scenario.intens_adaptive_capping_slackfactor,
            'min_chall': scenario.intens_min_chall
            }
        if intensifier_kwargs is not None:
            intensifier_def_kwargs.update(intensifier_kwargs)
        if intensifier is None:
            intensifier = Intensifier(**intensifier_def_kwargs)
        elif inspect.isclass(intensifier):
            intensifier = intensifier(**intensifier_def_kwargs)
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the Intensifier, but is '%s'" %
                type(intensifier)
            )

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both")

        init_design_def_kwargs = {
            'tae_runner': tae_runner,
            'scenario': scenario,
            'stats': self.stats,
            'traj_logger': traj_logger,
            'runhistory': runhistory,
            'rng': rng,
            'configs': initial_configurations,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'n_configs_x_params': 0,
            'max_config_fracs': 0.0
            }
        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = DefaultConfiguration(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = RandomConfigurations(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":
                initial_design = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":
                initial_design = FactorialInitialDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":
                initial_design = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        elif inspect.isclass(initial_design):
            initial_design = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'" %
                type(initial_design)
            )

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in ["LOG", "LOGS"]:
            cutoff = np.log(np.nanmin([np.inf, np.float_(scenario.cutoff)]))
            threshold = cutoff + np.log(scenario.par_factor)
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)])
            threshold = cutoff * scenario.par_factor
        num_params = len(scenario.cs.get_hyperparameters())
        imputor = RFRImputator(rng=rng,
                               cutoff=cutoff,
                               threshold=threshold,
                               model=model,
                               change_threshold=0.01,
                               max_iter=2)

        r2e_def_kwargs = {
            'scenario': scenario,
            'num_params': num_params,
            'success_states': [StatusType.SUCCESS, ],
            'impute_censored_data': True,
            'impute_state': [StatusType.CAPPED, ],
            'imputor': imputor,
            'scale_perc': 5
            }
        if scenario.run_obj == 'quality':
            r2e_def_kwargs.update({
                'success_states': [StatusType.SUCCESS, StatusType.CRASHED],
                'impute_censored_data': False,
                'impute_state': None,
            })
        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == 'runtime':
                runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":
                    runhistory2epm = RunHistory2EPM4Cost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOG":
                    runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOGS":
                    runhistory2epm = RunHistory2EPM4LogScaledCost(**r2e_def_kwargs)
                elif scenario.transform_y == "INVS":
                    runhistory2epm = RunHistory2EPM4InvScaledCost(**r2e_def_kwargs)
            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)
        elif inspect.isclass(runhistory2epm):
            runhistory2epm = runhistory2epm(**r2e_def_kwargs)
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'" %
                type(runhistory2epm)
            )

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': run_id,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser': random_configuration_chooser
        }

        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
示例#13
0
    def __init__(
        self,
        configspace,
        random_fraction=1 / 2,
        logger=None,
        previous_results=None
    ):
        self.logger = logger
        self.random_fraction = random_fraction

        self.runhistory = RunHistory()
        self.configs = list()
        self.losses = list()
        rng = np.random.RandomState(random.randint(0, 100))

        if previous_results is not None and len(previous_results.batch_results) > 0:
            # Assume same-task changing-configspace trajectory for now
            results_previous_adjustment = previous_results.batch_results[-1]
            configspace_previous = results_previous_adjustment.configspace

            # Construct combined config space
            configspace_combined = ConfigSpace.ConfigurationSpace()
            development_step = CSH.CategoricalHyperparameter("development_step", choices=["old", "new"])
            configspace_combined.add_hyperparameter(
                development_step
            )

            configspace_only_old, configspace_both, configspace_only_new = get_configspace_partitioning_cond(configspace, configspace_previous)

            configspace_combined.add_hyperparameters(configspace_both.get_hyperparameters())
            configspace_combined.add_hyperparameters(configspace_only_old.get_hyperparameters())
            configspace_combined.add_hyperparameters(configspace_only_new.get_hyperparameters())

            for hyperparameter in configspace_only_old.get_hyperparameters():
                configspace_combined.add_condition(
                    ConfigSpace.EqualsCondition(hyperparameter, development_step, "old")
                )
            for hyperparameter in configspace_only_new.get_hyperparameters():
                configspace_combined.add_condition(
                    ConfigSpace.EqualsCondition(hyperparameter, development_step, "new")
                )

            # Read old configs and losses
            result_previous = results_previous_adjustment.results[0]
            all_runs = result_previous.get_all_runs(only_largest_budget=False)
            self.losses_old = [run.loss for run in all_runs]
            self.configs_old = [run.config_id for run in all_runs]
            id2conf = result_previous.get_id2config_mapping()
            self.configs_old = [id2conf[id_]["config"] for id_ in self.configs_old]

            # Map old configs to combined space
            for config in self.configs_old:
                config["development_step"] = "old"
            self.configs_old = [ConfigSpace.Configuration(configspace_combined, config) for config in self.configs_old]

            for config, cost in zip(self.configs_old, self.losses_old):
                self.runhistory.add(config, cost, 0, StatusType.SUCCESS)

            # Construct and fit model
            self.configspace = configspace_combined
            self.model = _construct_model(self.configspace, rng)
            self.acquisition_func = EI(model=self.model)
            self.acq_optimizer = LocalSearch(acquisition_function=self.acquisition_func,
                                             config_space=self.configspace, rng=rng)

            X = convert_configurations_to_array(self.configs_old)
            Y = np.array(self.losses_old, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses_old),
            )
        else:
            self.configspace = configspace
            self.model = _construct_model(self.configspace, rng)
            self.acquisition_func = EI(model=self.model)
            self.acq_optimizer = LocalSearch(acquisition_function=self.acquisition_func,
                                             config_space=self.configspace, rng=rng)

        self.min_points_in_model = len(self.configspace.get_hyperparameters())  # TODO
示例#14
0
class GPCondSampler:
    def __init__(
        self,
        configspace,
        random_fraction=1 / 2,
        logger=None,
        previous_results=None
    ):
        self.logger = logger
        self.random_fraction = random_fraction

        self.runhistory = RunHistory()
        self.configs = list()
        self.losses = list()
        rng = np.random.RandomState(random.randint(0, 100))

        if previous_results is not None and len(previous_results.batch_results) > 0:
            # Assume same-task changing-configspace trajectory for now
            results_previous_adjustment = previous_results.batch_results[-1]
            configspace_previous = results_previous_adjustment.configspace

            # Construct combined config space
            configspace_combined = ConfigSpace.ConfigurationSpace()
            development_step = CSH.CategoricalHyperparameter("development_step", choices=["old", "new"])
            configspace_combined.add_hyperparameter(
                development_step
            )

            configspace_only_old, configspace_both, configspace_only_new = get_configspace_partitioning_cond(configspace, configspace_previous)

            configspace_combined.add_hyperparameters(configspace_both.get_hyperparameters())
            configspace_combined.add_hyperparameters(configspace_only_old.get_hyperparameters())
            configspace_combined.add_hyperparameters(configspace_only_new.get_hyperparameters())

            for hyperparameter in configspace_only_old.get_hyperparameters():
                configspace_combined.add_condition(
                    ConfigSpace.EqualsCondition(hyperparameter, development_step, "old")
                )
            for hyperparameter in configspace_only_new.get_hyperparameters():
                configspace_combined.add_condition(
                    ConfigSpace.EqualsCondition(hyperparameter, development_step, "new")
                )

            # Read old configs and losses
            result_previous = results_previous_adjustment.results[0]
            all_runs = result_previous.get_all_runs(only_largest_budget=False)
            self.losses_old = [run.loss for run in all_runs]
            self.configs_old = [run.config_id for run in all_runs]
            id2conf = result_previous.get_id2config_mapping()
            self.configs_old = [id2conf[id_]["config"] for id_ in self.configs_old]

            # Map old configs to combined space
            for config in self.configs_old:
                config["development_step"] = "old"
            self.configs_old = [ConfigSpace.Configuration(configspace_combined, config) for config in self.configs_old]

            for config, cost in zip(self.configs_old, self.losses_old):
                self.runhistory.add(config, cost, 0, StatusType.SUCCESS)

            # Construct and fit model
            self.configspace = configspace_combined
            self.model = _construct_model(self.configspace, rng)
            self.acquisition_func = EI(model=self.model)
            self.acq_optimizer = LocalSearch(acquisition_function=self.acquisition_func,
                                             config_space=self.configspace, rng=rng)

            X = convert_configurations_to_array(self.configs_old)
            Y = np.array(self.losses_old, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses_old),
            )
        else:
            self.configspace = configspace
            self.model = _construct_model(self.configspace, rng)
            self.acquisition_func = EI(model=self.model)
            self.acq_optimizer = LocalSearch(acquisition_function=self.acquisition_func,
                                             config_space=self.configspace, rng=rng)

        self.min_points_in_model = len(self.configspace.get_hyperparameters())  # TODO

    @property
    def has_model(self):
        return len(self.configs) >= self.min_points_in_model

    def get_config(self, budget):  # pylint: disable=unused-argument
        self.logger.debug("start sampling a new configuration.")

        is_random_fraction = np.random.rand() < self.random_fraction
        if is_random_fraction or not self.has_model:
            if "development_step" in self.configspace.get_hyperparameter_names():
                while True:
                    sample = self.configspace.sample_configuration()
                    if sample["development_step"] == "new":
                        break
            else:
                sample = self.configspace.sample_configuration()
        else:
            # Use private _maximize to not return challenger list object
            sample = self.acq_optimizer._maximize(
                runhistory=self.runhistory,
                stats=None,
                num_points=1,
            )
            sample = sample[0][1]

        sample = ConfigSpace.util.deactivate_inactive_hyperparameters(sample.get_dictionary(), self.configspace)
        sample = sample.get_dictionary()
        info = {}
        self.logger.debug("done sampling a new configuration.")
        return sample, info

    def new_result(self, job, config_info):  # pylint: disable=unused-argument
        if job.exception is not None:
            self.logger.warning(f"job {job.id} failed with exception\n{job.exception}")

        if job.result is None:
            # One could skip crashed results, but we decided to
            # assign a +inf loss and count them as bad configurations
            loss = np.inf
        else:
            # same for non numeric losses.
            # Note that this means losses of minus infinity will count as bad!
            loss = job.result["loss"] if np.isfinite(job.result["loss"]) else np.inf

        config = ConfigSpace.Configuration(self.configspace, job.kwargs["config"])
        self.configs.append(config)
        self.losses.append(loss)

        if self.has_model:
            # TODO: include old
            X = convert_configurations_to_array(self.configs)
            Y = np.array(self.losses, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses),
            )
示例#15
0
 def setUp(self):
     self.model = unittest.mock.Mock()
     self.model.models = [MockModel(), MockModel(), MockModel()]
     self.ei = EI(self.model)
示例#16
0
 def setUp(self):
     self.model = unittest.mock.Mock()
     self.acq = EI(model=self.model)
示例#17
0
    def _component_builder(self, conf:typing.Union[Configuration, dict]) \
        -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]:
        """
            builds new Acquisition function object
            and EPM object and returns these

            Parameters
            ----------
            conf: typing.Union[Configuration, dict]
                configuration specificing "model" and "acq_func"

            Returns
            -------
            typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]

        """
        types, bounds = get_types(
            self.config_space, instance_features=self.scenario.feature_array)

        if conf["model"] == "RF":
            model = RandomForestWithInstances(
                configspace=self.config_space,
                types=types,
                bounds=bounds,
                instance_features=self.scenario.feature_array,
                seed=self.rng.randint(MAXINT),
                pca_components=conf.get("pca_dim", self.scenario.PCA_DIM),
                log_y=conf.get("log_y", self.scenario.transform_y
                               in ["LOG", "LOGS"]),
                num_trees=conf.get("num_trees", self.scenario.rf_num_trees),
                do_bootstrapping=conf.get("do_bootstrapping",
                                          self.scenario.rf_do_bootstrapping),
                ratio_features=conf.get("ratio_features",
                                        self.scenario.rf_ratio_features),
                min_samples_split=conf.get("min_samples_split",
                                           self.scenario.rf_min_samples_split),
                min_samples_leaf=conf.get("min_samples_leaf",
                                          self.scenario.rf_min_samples_leaf),
                max_depth=conf.get("max_depth", self.scenario.rf_max_depth),
            )

        elif conf["model"] == "GP":
            from smac.epm.gp_kernels import ConstantKernel, HammingKernel, WhiteKernel, Matern

            cov_amp = ConstantKernel(
                2.0,
                constant_value_bounds=(np.exp(-10), np.exp(2)),
                prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng),
            )

            cont_dims = np.nonzero(types == 0)[0]
            cat_dims = np.nonzero(types != 0)[0]

            if len(cont_dims) > 0:
                exp_kernel = Matern(
                    np.ones([len(cont_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))],
                    nu=2.5,
                    operate_on=cont_dims,
                )

            if len(cat_dims) > 0:
                ham_kernel = HammingKernel(
                    np.ones([len(cat_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))],
                    operate_on=cat_dims,
                )
            noise_kernel = WhiteKernel(
                noise_level=1e-8,
                noise_level_bounds=(np.exp(-25), np.exp(2)),
                prior=HorseshoePrior(scale=0.1, rng=self.rng),
            )

            if len(cont_dims) > 0 and len(cat_dims) > 0:
                # both
                kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
            elif len(cont_dims) > 0 and len(cat_dims) == 0:
                # only cont
                kernel = cov_amp * exp_kernel + noise_kernel
            elif len(cont_dims) == 0 and len(cat_dims) > 0:
                # only cont
                kernel = cov_amp * ham_kernel + noise_kernel
            else:
                raise ValueError()

            n_mcmc_walkers = 3 * len(kernel.theta)
            if n_mcmc_walkers % 2 == 1:
                n_mcmc_walkers += 1

            model = GaussianProcessMCMC(
                self.config_space,
                types=types,
                bounds=bounds,
                kernel=kernel,
                n_mcmc_walkers=n_mcmc_walkers,
                chain_length=250,
                burnin_steps=250,
                normalize_y=True,
                seed=self.rng.randint(low=0, high=10000),
            )

        if conf["acq_func"] == "EI":
            acq = EI(model=model, par=conf.get("par_ei", 0))
        elif conf["acq_func"] == "LCB":
            acq = LCB(model=model, par=conf.get("par_lcb", 0))
        elif conf["acq_func"] == "PI":
            acq = PI(model=model, par=conf.get("par_pi", 0))
        elif conf["acq_func"] == "LogEI":
            # par value should be in log-space
            acq = LogEI(model=model, par=conf.get("par_logei", 0))

        return acq, model
示例#18
0
    def create_optimizer(self):
        from smac.epm.rf_with_instances import RandomForestWithInstances
        from smac.initial_design.default_configuration_design import DefaultConfiguration
        from smac.intensification.intensification import Intensifier
        from smac.optimizer.smbo import SMBO
        from smac.optimizer.acquisition import EI
        from smac.optimizer.ei_optimization import InterleavedLocalAndRandomSearch
        from smac.optimizer.objective import average_cost
        from smac.runhistory.runhistory2epm import RunHistory2EPM4Cost
        from smac.tae.execute_ta_run import StatusType
        from smac.utils.constants import MAXINT
        from smac.utils.util_funcs import get_types

        TAE_RUNNER = self._priv_evaluator

        runhistory2epm = RunHistory2EPM4Cost(
            scenario=self.scenario,
            num_params=len(self.param_space),
            success_states=[StatusType.SUCCESS, StatusType.CRASHED],
            impute_censored_data=False,
            impute_state=None)

        intensifier = Intensifier(tae_runner=TAE_RUNNER, stats=self.stats, traj_logger=self.traj_logger,
                                        rng=self.rng, instances=self.scenario.train_insts, cutoff=self.scenario.cutoff,
                                        deterministic=self.scenario.deterministic, run_obj_time=self.scenario.run_obj == "runtime",
                                        always_race_against=self.scenario.cs.get_default_configuration() \
                                              if self.scenario.always_race_default else None,
                                        instance_specifics=self.scenario.instance_specific,
                                        minR=self.scenario.minR, maxR=self.scenario.maxR)

        types, bounds = get_types(self.scenario.cs,
                                  self.scenario.feature_array)
        model = RandomForestWithInstances(
            types=types,
            bounds=bounds,
            seed=self.rng.randint(MAXINT),
            instance_features=self.scenario.feature_array,
            pca_components=self.scenario.PCA_DIM)
        acq_func = EI(model=model)

        smbo_args = {
            'scenario':
            self.scenario,
            'stats':
            self.stats,
            'initial_design':
            DefaultConfiguration(tae_runner=TAE_RUNNER,
                                 scenario=self.scenario,
                                 stats=self.stats,
                                 traj_logger=self.traj_logger,
                                 rng=self.rng),
            'runhistory':
            self.runhistory,
            'runhistory2epm':
            runhistory2epm,
            'intensifier':
            intensifier,
            'aggregate_func':
            average_cost,
            'num_run':
            self.seed,
            'model':
            model,
            'acq_optimizer':
            InterleavedLocalAndRandomSearch(
                acq_func, self.scenario.cs,
                np.random.RandomState(seed=self.rng.randint(MAXINT))),
            'acquisition_func':
            acq_func,
            'rng':
            self.rng,
            'restore_incumbent':
            None,
        }

        self.smbo = SMBO(**smbo_args)
示例#19
0
    def build_pc_smbo(self,
                      tae_runner,
                      stats,
                      scenario,
                      runhistory,
                      aggregate_func,
                      acq_func_name,
                      model_target_names,
                      logging_directory,
                      double_intensification=False,
                      constant_pipeline_steps=None,
                      variable_pipeline_steps=None,
                      cached_pipeline_steps=None,
                      seed=None,
                      intensification_instances=None,
                      num_marginalized_configurations_by_random_search=20,
                      num_configs_for_marginalization=40,
                      random_splitting_number=5,
                      random_splitting_enabled=False):

        # Build intensifier
        rng = np.random.RandomState(seed)
        traj_logger = TrajLogger(logging_directory, stats)
        intensifier = Intensifier(tae_runner=tae_runner,
                                  stats=stats,
                                  traj_logger=traj_logger,
                                  rng=rng,
                                  cutoff=scenario.cutoff,
                                  deterministic=scenario.deterministic,
                                  run_obj_time=scenario.run_obj == "runtime",
                                  run_limit=scenario.ta_run_limit,
                                  instances=intensification_instances,
                                  maxR=len(intensification_instances))

        # Build model
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        #types = get_types(scenario.cs)
        if len(model_target_names) > 1:
            # model_target_names = ['cost','time']
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                target_names=model_target_names, bounds=bounds, types=types)
            # UncorrelatedMultiObjectiveRandomForestWithInstances(target_names=model_target_names,
            #                                                    types=types)
        elif len(model_target_names) == 1:
            model = RandomForestWithInstances(types=types, bounds=bounds)
        else:
            model = RandomEPM(rng=rng)
            # model = RandomForestWithInstances(types=types)

        # Build acquisition function, runhistory2epm and local search
        num_params = len(scenario.cs.get_hyperparameters())
        if acq_func_name in ["ei", "pc-ei"]:
            acquisition_func = EI(model)
            acq_func_wrapper = PCAquisitionFunctionWrapper(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
            runhistory2epm = RunHistory2EPM4Cost(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            select_configuration = SelectConfigurations(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
        elif acq_func_name in ["m-ei", "pc-m-ei"]:
            #acquisition_func = MEI(model)
            acquisition_func = EI(model)
            acq_func_wrapper = PCAquisitionFunctionWrapper(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
            runhistory2epm = RunHistory2EPM4Cost(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            # TODO: num_configs_for_marginalization
            select_configuration = SelectConfigurationsWithMarginalization(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                num_marginalized_configurations_by_random_search=
                num_marginalized_configurations_by_random_search,
                num_configs_for_marginalization=num_configs_for_marginalization
            )
        elif acq_func_name in ['eips', 'pc-eips']:
            acquisition_func = EIPS(model)
            acq_func_wrapper = PCAquisitionFunctionWrapper(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
            runhistory2epm = RunHistory2EPM4EIPS(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            select_configuration = SelectConfigurations(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
        elif acq_func_name in ["m-eips", "pc-m-eips"]:
            acquisition_func = EIPS(model)
            acq_func_wrapper = PCAquisitionFunctionWrapper(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
            runhistory2epm = RunHistory2EPM4EIPS(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            # TODO: num_configs_for_marginalization
            select_configuration = SelectConfigurationsWithMarginalization(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                num_marginalized_configurations_by_random_search=
                num_marginalized_configurations_by_random_search,
                num_configs_for_marginalization=num_configs_for_marginalization
            )
        elif acq_func_name == 'pceips':
            acquisition_func = PCEIPS(model)
            acq_func_wrapper = PCAquisitionFunctionWrapperWithCachingReduction(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                cached_pipeline_steps=cached_pipeline_steps)
            runhistory2epm = RunHistory2EPM4EIPS(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            if constant_pipeline_steps == None or variable_pipeline_steps == None or cached_pipeline_steps == None:
                raise ValueError(
                    "Constant_pipeline_steps and variable pipeline steps should not be none\
                                    when using PCEIPS")
            select_configuration = SelectConfigurations(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps)
        elif acq_func_name == 'pc-m-pceips':
            acquisition_func = PCEIPS(model)
            acq_func_wrapper = PCAquisitionFunctionWrapperWithCachingReduction(
                acquisition_func=acquisition_func,
                config_space=scenario.cs,
                runhistory=runhistory,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                cached_pipeline_steps=cached_pipeline_steps)
            runhistory2epm = RunHistory2EPM4EIPS(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                                       config_space=scenario.cs)
            if constant_pipeline_steps == None or variable_pipeline_steps == None or cached_pipeline_steps == None:
                raise ValueError(
                    "Constant_pipeline_steps and variable pipeline steps should not be none\
                                    when using PCEIPS")
            select_configuration = SelectConfigurationsWithMarginalization(
                scenario=scenario,
                stats=stats,
                runhistory=runhistory,
                model=model,
                acq_optimizer=local_search,
                acquisition_func=acq_func_wrapper,
                rng=rng,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                num_marginalized_configurations_by_random_search=
                num_marginalized_configurations_by_random_search,
                num_configs_for_marginalization=num_configs_for_marginalization
            )
        elif acq_func_name == "roar":
            runhistory2epm = RunHistory2EPM4Cost(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            select_configuration = SelectConfigurationsRandom(
                scenario=scenario)
        elif acq_func_name == "pc-roar-mrs":
            runhistory2epm = RunHistory2EPM4Cost(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            select_configuration = SelectConfigurationsMRS(
                scenario=scenario,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                splitting_number=random_splitting_number,
                random_splitting_enabled=random_splitting_enabled)
        elif acq_func_name == "pc-roar-sigmoid-rs":
            runhistory2epm = RunHistory2EPM4Cost(
                scenario, num_params, success_states=[StatusType.SUCCESS])
            select_configuration = SelectConfigurationsSigmoidRS(
                scenario=scenario,
                constant_pipeline_steps=constant_pipeline_steps,
                variable_pipeline_steps=variable_pipeline_steps,
                fraction=random_splitting_number)
        else:
            # Not a valid acquisition function
            raise ValueError("The provided acquisition function is not valid")

        # Build initial design
        # initial_design = RandomConfiguration(tae_runner=tae_runner,
        #                                      scenario=scenario,
        #                                      stats=stats,
        #                                      traj_logger=traj_logger,
        #                                      rng=rng)
        initial_configs = scenario.cs.sample_configuration(size=2)
        for config in initial_configs:
            config._populate_values()
        initial_design = MultiConfigInitialDesign(
            tae_runner=tae_runner,
            scenario=scenario,
            stats=stats,
            traj_logger=traj_logger,
            runhistory=runhistory,
            rng=rng,
            configs=initial_configs,
            intensifier=intensifier,
            aggregate_func=aggregate_func)

        # run id
        num_run = rng.randint(1234567980)

        # Build pc_smbo
        if acq_func_name not in ['pc-roar-sigmoid-rs']:
            smbo = PCSMBO(scenario=scenario,
                          stats=stats,
                          initial_design=initial_design,
                          runhistory=runhistory,
                          runhistory2epm=runhistory2epm,
                          intensifier=intensifier,
                          aggregate_func=aggregate_func,
                          num_run=num_run,
                          model=model,
                          rng=rng,
                          select_configuration=select_configuration,
                          double_intensification=double_intensification)
        else:
            smbo = PCSMBOSigmoidRandomSearch(
                scenario=scenario,
                stats=stats,
                initial_design=initial_design,
                runhistory=runhistory,
                runhistory2epm=runhistory2epm,
                intensifier=intensifier,
                aggregate_func=aggregate_func,
                num_run=num_run,
                model=model,
                rng=rng,
                select_configuration=select_configuration)

        return smbo
示例#20
0
    def __init__(
            self,
            scenario: Scenario,
            # TODO: once we drop python3.4 add type hint
            # typing.Union[ExecuteTARun, callable]
            tae_runner=None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            rng: np.random.RandomState = None,
            run_id: int = 1):
        """Constructor"""
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost
        self.runhistory = None
        self.trajectory = None

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        local_search = LocalSearch(
            acquisition_function,
            scenario.cs,
            max_steps=scenario.sls_max_steps,
            n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        self.solver = EPILS_Solver(scenario=scenario,
                                   stats=self.stats,
                                   initial_design=initial_design,
                                   runhistory=runhistory,
                                   runhistory2epm=runhistory2epm,
                                   intensifier=intensifier,
                                   aggregate_func=aggregate_func,
                                   num_run=num_run,
                                   model=model,
                                   acq_optimizer=local_search,
                                   acquisition_func=acquisition_function,
                                   rng=rng)
示例#21
0
    def __init__(self, api_config, config_space, parallel_setting="LS"):
        super(SMAC4EPMOpimizer, self).__init__(api_config)
        self.cs = config_space
        self.num_hps = len(self.cs.get_hyperparameters())

        if parallel_setting not in ["CL_min", "CL_max", "CL_mean", "KB", "LS"]:
            raise ValueError(
                "parallel_setting can only be one of the following: "
                "CL_min, CL_max, CL_mean, KB, LS")
        self.parallel_setting = parallel_setting

        rng = np.random.RandomState(seed=0)
        scenario = Scenario({
            "run_obj": "quality",  # we optimize quality (alt. to runtime)
            "runcount-limit": 128,
            "cs": self.cs,  # configuration space
            "deterministic": True,
            "limit_resources": False,
        })

        self.stats = Stats(scenario)
        # traj = TrajLogger(output_dir=None, stats=self.stats)

        self.runhistory = RunHistory()

        r2e_def_kwargs = {
            "scenario": scenario,
            "num_params": self.num_hps,
            "success_states": [
                StatusType.SUCCESS,
            ],
            "impute_censored_data": False,
            "scale_perc": 5,
        }

        self.random_chooser = ChooserProb(rng=rng, prob=0.0)

        types, bounds = get_types(self.cs, instance_features=None)
        model_kwargs = {
            "configspace": self.cs,
            "types": types,
            "bounds": bounds,
            "seed": rng.randint(MAXINT),
        }

        models = []

        cov_amp = ConstantKernel(
            2.0,
            constant_value_bounds=(np.exp(-10), np.exp(2)),
            prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng),
        )

        cont_dims = np.array(np.where(np.array(types) == 0)[0], dtype=np.int)
        cat_dims = np.where(np.array(types) != 0)[0]

        if len(cont_dims) > 0:
            exp_kernel = Matern(
                np.ones([len(cont_dims)]),
                [(np.exp(-6.754111155189306), np.exp(0.0858637988771976))
                 for _ in range(len(cont_dims))],
                nu=2.5,
                operate_on=cont_dims,
            )

        if len(cat_dims) > 0:
            ham_kernel = HammingKernel(
                np.ones([len(cat_dims)]),
                [(np.exp(-6.754111155189306), np.exp(0.0858637988771976))
                 for _ in range(len(cat_dims))],
                operate_on=cat_dims,
            )
        assert len(cont_dims) + len(cat_dims) == len(
            scenario.cs.get_hyperparameters())

        noise_kernel = WhiteKernel(
            noise_level=1e-8,
            noise_level_bounds=(np.exp(-25), np.exp(2)),
            prior=HorseshoePrior(scale=0.1, rng=rng),
        )

        if len(cont_dims) > 0 and len(cat_dims) > 0:
            # both
            kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
        elif len(cont_dims) > 0 and len(cat_dims) == 0:
            # only cont
            kernel = cov_amp * exp_kernel + noise_kernel
        elif len(cont_dims) == 0 and len(cat_dims) > 0:
            # only cont
            kernel = cov_amp * ham_kernel + noise_kernel
        else:
            raise ValueError()
        gp_kwargs = {"kernel": kernel}

        rf_kwargs = {}
        rf_kwargs["num_trees"] = model_kwargs.get("num_trees", 10)
        rf_kwargs["do_bootstrapping"] = model_kwargs.get(
            "do_bootstrapping", True)
        rf_kwargs["ratio_features"] = model_kwargs.get("ratio_features", 1.0)
        rf_kwargs["min_samples_split"] = model_kwargs.get(
            "min_samples_split", 2)
        rf_kwargs["min_samples_leaf"] = model_kwargs.get("min_samples_leaf", 1)
        rf_kwargs["log_y"] = model_kwargs.get("log_y", True)

        rf_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs)

        rf_kwargs = copy.deepcopy(rf_kwargs)
        rf_kwargs["log_y"] = False
        rf_no_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs)

        rh2epm_cost = RunHistory2EPM4Cost(**r2e_def_kwargs)
        rh2epm_log_cost = RunHistory2EPM4LogScaledCost(**r2e_def_kwargs)
        rh2epm_copula = RunHistory2EPM4GaussianCopulaCorrect(**r2e_def_kwargs)

        self.combinations = []

        # 2 models * 4 acquisition functions
        acq_funcs = [EI, PI, LogEI, LCB]
        acq_func_instances = []
        # acq_func_maximizer_instances = []

        n_sls_iterations = {
            1: 10,
            2: 10,
            3: 10,
            4: 10,
            5: 10,
            6: 10,
            7: 8,
            8: 6,
        }.get(len(self.cs.get_hyperparameters()), 5)

        acq_func_maximizer_kwargs = {
            "config_space": self.cs,
            "rng": rng,
            "max_steps": 5,
            "n_steps_plateau_walk": 5,
            "n_sls_iterations": n_sls_iterations,
        }
        self.idx_ei = 0

        self.num_models = len(models)
        self.num_acq_funcs = len(acq_funcs)

        no_transform_gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                                          **copy.deepcopy(gp_kwargs))
        ei = EI(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, ei, ei_opt, rh2epm_cost))

        pi = PI(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = pi
        pi_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, pi, pi_opt, rh2epm_cost))

        lcb = LCB(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = lcb
        lcb_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, lcb, lcb_opt, rh2epm_cost))

        gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                             **copy.deepcopy(gp_kwargs))
        ei = EI(model=gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((gp, ei, ei_opt, rh2epm_copula))

        gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                             **copy.deepcopy(gp_kwargs))
        ei = LogEI(model=gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((gp, ei, ei_opt, rh2epm_log_cost))

        ei = EI(model=rf_no_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_cost))

        ei = LogEI(model=rf_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_log, ei, ei_opt, rh2epm_log_cost))

        ei = EI(model=rf_no_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_copula))

        self.num_acq_instances = len(acq_func_instances)
        self.best_observation = np.inf

        self.next_evaluations = []
示例#22
0
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[BaseRunner], Callable]] = None,
        tae_runner_kwargs: Optional[Dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[Dict] = None,
        intensifier: Optional[Type[AbstractRacer]] = None,
        intensifier_kwargs: Optional[Dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[Dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[Dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[Dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[Dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[Dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[Type[SMBO]] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[Dict] = None,
        dask_client: Optional[dask.distributed.Client] = None,
        n_jobs: Optional[int] = 1,
    ):
        """
        Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.base.BaseRunner or callable
            Callable or implementation of
            :class:`~smac.tae.base.BaseRunner`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        tae_runner_kwargs: Optional[Dict]
            arguments passed to constructor of '~tae_runner'
        runhistory : RunHistory
            runhistory to store all algorithm runs
        runhistory_kwargs : Optional[Dict]
            arguments passed to constructor of runhistory.
            We strongly advise against changing the aggregation function,
            since it will break some code assumptions
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        intensifier_kwargs: Optional[Dict]
            arguments passed to the constructor of '~intensifier'
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Class or object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` or :class:`~smac.optimizer.acquisition.LogEI` if not set.
            `~acquisition_function_kwargs` is passed to the class constructor.
        acquisition_function_kwargs : Optional[Dict]
            dictionary to pass specific arguments to ~acquisition_function
        integrate_acquisition_function : bool, default=False
            Whether to integrate the acquisition function. Works only with models which can sample their
            hyperparameters (i.e. GaussianProcessMCMC).
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.LocalAndSortedRandomSearch` if not set.
        acquisition_function_optimizer_kwargs: Optional[Dict]
            Arguments passed to constructor of '~acquisition_function_optimizer'
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        model_kwargs : Optional[Dict]
            Arguments passed to constructor of '~model'
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        runhistory2epm_kwargs: Optional[Dict]
            Arguments passed to the constructor of '~runhistory2epm'
        initial_design : InitialDesign
            initial sampling design
        initial_design_kwargs: Optional[Dict]
            arguments passed to constructor of `~initial_design'
        initial_configurations : List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~smac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.
        random_configuration_chooser_kwargs : Optional[Dict]
            arguments of constructor for '~random_configuration_chooser'
        dask_client : dask.distributed.Client
            User-created dask client, can be used to start a dask cluster and then attach SMAC to it.
        n_jobs : int, optional
            Number of jobs. If > 1 or -1, this creates a dask client if ``dask_client`` is ``None``. Will
            be ignored if ``dask_client`` is not ``None``.
            If ``None``, this value will be set to 1, if ``-1``, this will be set to the number of cpu cores.
        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            self.output_dir = create_output_directory(scenario, run_id)
        elif scenario.output_dir is not None:  # type: ignore[attr-defined] # noqa F821
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = cast(str, scenario.output_dir_for_this_run
                                   )  # type: ignore[attr-defined] # noqa F821
        rng = cast(np.random.RandomState, rng)

        if (scenario.deterministic is
                True  # type: ignore[attr-defined] # noqa F821
                and getattr(scenario, 'tuner_timeout', None) is None
                and scenario.run_obj ==
                'quality'  # type: ignore[attr-defined] # noqa F821
            ):
            self.logger.info(
                'Optimizing a deterministic scenario for quality without a tuner timeout - will make '
                'SMAC deterministic and only evaluate one configuration per iteration!'
            )
            scenario.intensification_percentage = 1e-10  # type: ignore[attr-defined] # noqa F821
            scenario.min_chall = 1  # type: ignore[attr-defined] # noqa F821

        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"  # type: ignore[attr-defined] # noqa F821

        # initialize empty runhistory
        runhistory_def_kwargs = {}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(
                **runhistory_def_kwargs)  # type: ignore[operator] # noqa F821
        elif isinstance(runhistory, RunHistory):
            pass
        else:
            raise ValueError(
                'runhistory has to be a class or an object of RunHistory')

        rand_conf_chooser_kwargs = {'rng': rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if 'prob' not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs[
                    'prob'] = scenario.rand_prob  # type: ignore[attr-defined] # noqa F821
            random_configuration_chooser_instance = (
                ChooserProb(**rand_conf_chooser_kwargs
                            )  # type: ignore[arg-type] # noqa F821
            )  # type: RandomConfigurationChooser
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser_instance = random_configuration_chooser(
                **
                rand_conf_chooser_kwargs)  # type: ignore[arg-type] # noqa F821
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(
            rng.randint(MAXINT))  # type: ignore[attr-defined] # noqa F821

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(
            scenario.cs,
            scenario.feature_array)  # type: ignore[attr-defined] # noqa F821
        model_def_kwargs = {
            'types': types,
            'bounds': bounds,
            'instance_features': scenario.feature_array,
            'seed': rng.randint(MAXINT),
            'pca_components': scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    'log_y': scenario.transform_y
                    in ["LOG",
                        "LOGS"],  # type: ignore[attr-defined] # noqa F821
                    'num_trees': scenario.
                    rf_num_trees,  # type: ignore[attr-defined] # noqa F821
                    'do_bootstrapping': scenario.
                    rf_do_bootstrapping,  # type: ignore[attr-defined] # noqa F821
                    'ratio_features': scenario.
                    rf_ratio_features,  # type: ignore[attr-defined] # noqa F821
                    'min_samples_split': scenario.
                    rf_min_samples_split,  # type: ignore[attr-defined] # noqa F821
                    'min_samples_leaf': scenario.
                    rf_min_samples_leaf,  # type: ignore[attr-defined] # noqa F821
                    'max_depth': scenario.
                    rf_max_depth,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs[
                'configspace'] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = (
                RandomForestWithInstances(
                    **model_def_kwargs)  # type: ignore[arg-type] # noqa F821
            )  # type: AbstractEPM
        elif inspect.isclass(model):
            model_def_kwargs[
                'configspace'] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = model(
                **model_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {'model': model_instance}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)
        if acquisition_function is None:
            if scenario.transform_y in [
                    "LOG", "LOGS"
            ]:  # type: ignore[attr-defined] # noqa F821
                acquisition_function_instance = (
                    LogEI(**
                          acq_def_kwargs)  # type: ignore[arg-type] # noqa F821
                )  # type: AbstractAcquisitionFunction
            else:
                acquisition_function_instance = EI(
                    **acq_def_kwargs)  # type: ignore[arg-type] # noqa F821
        elif inspect.isclass(acquisition_function):
            acquisition_function_instance = acquisition_function(
                **acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function_instance = IntegratedAcquisitionFunction(
                acquisition_function=acquisition_function_instance,
                **acq_def_kwargs)

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            'acquisition_function': acquisition_function_instance,
            'config_space':
            scenario.cs,  # type: ignore[attr-defined] # noqa F821
            'rng': rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    'max_steps': scenario.
                    sls_max_steps,  # type: ignore[attr-defined] # noqa F821
                    'n_steps_plateau_walk': scenario.
                    sls_n_steps_plateau_walk,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer_instance = (
                LocalAndSortedRandomSearch(
                    **
                    acq_func_opt_kwargs)  # type: ignore[arg-type] # noqa F821
            )  # type: AcquisitionFunctionMaximizer
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer_instance = acquisition_function_optimizer(
                **acq_func_opt_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            'stats': self.stats,
            'run_obj': scenario.run_obj,
            'par_factor':
            scenario.par_factor,  # type: ignore[attr-defined] # noqa F821
            'cost_for_crash':
            scenario.cost_for_crash,  # type: ignore[attr-defined] # noqa F821
            'abort_on_first_run_crash': scenario.
            abort_on_first_run_crash  # type: ignore[attr-defined] # noqa F821
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)

        if 'ta' not in tae_def_kwargs:
            tae_def_kwargs[
                'ta'] = scenario.ta  # type: ignore[attr-defined] # noqa F821
        if tae_runner is None:
            tae_def_kwargs[
                'ta'] = scenario.ta  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = (
                ExecuteTARunOld(
                    **tae_def_kwargs)  # type: ignore[arg-type] # noqa F821
            )  # type: BaseRunner
        elif inspect.isclass(tae_runner):
            tae_runner_instance = cast(BaseRunner, tae_runner(
                **tae_def_kwargs))  # type: ignore[arg-type] # noqa F821
        elif callable(tae_runner):
            tae_def_kwargs['ta'] = tae_runner
            tae_def_kwargs[
                'use_pynisher'] = scenario.limit_resources  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = ExecuteTAFuncDict(
                **tae_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "BaseRunner. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # In case of a parallel run, wrap the single worker in a parallel
        # runner
        if n_jobs is None or n_jobs == 1:
            _n_jobs = 1
        elif n_jobs == -1:
            _n_jobs = joblib.cpu_count()
        elif n_jobs > 0:
            _n_jobs = n_jobs
        else:
            raise ValueError(
                'Number of tasks must be positive, None or -1, but is %s' %
                str(n_jobs))
        if _n_jobs > 1 or dask_client is not None:
            tae_runner_instance = DaskParallelRunner(
                tae_runner_instance,
                n_workers=_n_jobs,
                output_directory=self.output_dir,
                dask_client=dask_client,
            )

        # Check that overall objective and tae objective are the same
        # TODO: remove these two ignores once the scenario object knows all its attributes!
        if tae_runner_instance.run_obj != scenario.run_obj:  # type: ignore[union-attr] # noqa F821
            raise ValueError(
                "Objective for the target algorithm runner and "
                "the scenario must be the same, but are '%s' and "
                "'%s'" %
                (tae_runner_instance.run_obj,
                 scenario.run_obj))  # type: ignore[union-attr] # noqa F821

        # initialize intensification
        intensifier_def_kwargs = {
            'stats': self.stats,
            'traj_logger': traj_logger,
            'rng': rng,
            'instances':
            scenario.train_insts,  # type: ignore[attr-defined] # noqa F821
            'cutoff':
            scenario.cutoff,  # type: ignore[attr-defined] # noqa F821
            'deterministic':
            scenario.deterministic,  # type: ignore[attr-defined] # noqa F821
            'run_obj_time': scenario.run_obj ==
            "runtime",  # type: ignore[attr-defined] # noqa F821
            'instance_specifics': scenario.
            instance_specific,  # type: ignore[attr-defined] # noqa F821
            'adaptive_capping_slackfactor': scenario.
            intens_adaptive_capping_slackfactor,  # type: ignore[attr-defined] # noqa F821
            'min_chall':
            scenario.intens_min_chall  # type: ignore[attr-defined] # noqa F821
        }

        if isinstance(intensifier, Intensifier) \
                or (intensifier is not None and inspect.isclass(intensifier) and issubclass(intensifier, Intensifier)):
            intensifier_def_kwargs[
                'always_race_against'] = scenario.cs.get_default_configuration(
                )  # type: ignore[attr-defined] # noqa F821
            intensifier_def_kwargs[
                'use_ta_time_bound'] = scenario.use_ta_time  # type: ignore[attr-defined] # noqa F821
            intensifier_def_kwargs[
                'minR'] = scenario.minR  # type: ignore[attr-defined] # noqa F821
            intensifier_def_kwargs[
                'maxR'] = scenario.maxR  # type: ignore[attr-defined] # noqa F821
        if intensifier_kwargs is not None:
            intensifier_def_kwargs.update(intensifier_kwargs)

        if intensifier is None:
            intensifier_instance = (
                Intensifier(**intensifier_def_kwargs
                            )  # type: ignore[arg-type] # noqa F821
            )  # type: AbstractRacer
        elif inspect.isclass(intensifier):
            intensifier_instance = intensifier(
                **intensifier_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the AbstractRacer, but is '%s'"
                % type(intensifier))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        init_design_def_kwargs = {
            'cs': scenario.cs,  # type: ignore[attr-defined] # noqa F821
            'traj_logger': traj_logger,
            'rng': rng,
            'ta_run_limit':
            scenario.ta_run_limit,  # type: ignore[attr-defined] # noqa F821
            'configs': initial_configurations,
            'n_configs_x_params': 0,
            'max_config_fracs': 0.0
        }
        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design_instance = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design_instance = DefaultConfiguration(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design_instance = RandomConfigurations(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent
                                 )  # type: ignore[attr-defined] # noqa F821
        elif inspect.isclass(initial_design):
            initial_design_instance = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in [
                "LOG", "LOGS"
        ]:  # type: ignore[attr-defined] # noqa F821
            cutoff = np.log(np.nanmin([
                np.inf, np.float_(scenario.cutoff)
            ]))  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff + np.log(
                scenario.par_factor)  # type: ignore[attr-defined] # noqa F821
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)
                                ])  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff * scenario.par_factor  # type: ignore[attr-defined] # noqa F821
        num_params = len(scenario.cs.get_hyperparameters()
                         )  # type: ignore[attr-defined] # noqa F821
        imputor = RFRImputator(rng=rng,
                               cutoff=cutoff,
                               threshold=threshold,
                               model=model_instance,
                               change_threshold=0.01,
                               max_iter=2)

        r2e_def_kwargs = {
            'scenario': scenario,
            'num_params': num_params,
            'success_states': [
                StatusType.SUCCESS,
            ],
            'impute_censored_data': True,
            'impute_state': [
                StatusType.CAPPED,
            ],
            'imputor': imputor,
            'scale_perc': 5
        }
        if scenario.run_obj == 'quality':
            r2e_def_kwargs.update({
                'success_states':
                [StatusType.SUCCESS, StatusType.CRASHED, StatusType.MEMOUT],
                'impute_censored_data':
                False,
                'impute_state':
                None,
            })

        if isinstance(
                intensifier_instance,
            (SuccessiveHalving, Hyperband)) and scenario.run_obj == "quality":
            r2e_def_kwargs.update({
                'success_states': [
                    StatusType.SUCCESS,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                    StatusType.DONOTADVANCE,
                ],
                'consider_for_higher_budgets_state': [
                    StatusType.DONOTADVANCE,
                    StatusType.TIMEOUT,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                ],
            })

        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == 'runtime':
                rh2epm = (
                    RunHistory2EPM4LogCost(
                        **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
                )  # type: AbstractRunHistory2EPM
            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4Cost(
                        **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
                elif scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogCost(
                        **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
                elif scenario.transform_y == "LOGS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
                elif scenario.transform_y == "INVS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)
        elif inspect.isclass(runhistory2epm):
            rh2epm = runhistory2epm(
                **r2e_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design_instance,
            'runhistory': runhistory,
            'runhistory2epm': rh2epm,
            'intensifier': intensifier_instance,
            'num_run': run_id,
            'model': model_instance,
            'acq_optimizer': acquisition_function_optimizer_instance,
            'acquisition_func': acquisition_function_instance,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser':
            random_configuration_chooser_instance,
            'tae_runner': tae_runner_instance,
        }  # type: Dict[str, Any]

        if smbo_class is None:
            self.solver = SMBO(**
                               smbo_args)  # type: ignore[arg-type] # noqa F821
        else:
            self.solver = smbo_class(
                **smbo_args)  # type: ignore[arg-type] # noqa F821
示例#23
0
class GPSampler:
    def __init__(
        self,
        configspace,
        random_fraction=1 / 2,
        logger=None,
        configs=None,
        losses=None,
    ):
        self.logger = logger

        self.random_fraction = random_fraction
        self.configspace = configspace
        self.min_points_in_model = len(self.configspace.get_hyperparameters())

        rng = np.random.RandomState(random.randint(0, 100))

        self.model = _construct_model(configspace, rng)
        self.acquisition_func = EI(model=self.model)
        self.acq_optimizer = LocalSearch(
            acquisition_function=self.acquisition_func,
            config_space=configspace,
            rng=rng)
        self.runhistory = RunHistory()

        self.configs = configs or list()
        self.losses = losses or list()
        if self.has_model:
            for config, cost in zip(self.configs, self.losses):
                self.runhistory.add(config, cost, 0, StatusType.SUCCESS)

            X = convert_configurations_to_array(self.configs)
            Y = np.array(self.losses, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses),
            )

    @property
    def has_model(self):
        return len(self.configs) >= self.min_points_in_model

    def get_config(self, budget):  # pylint: disable=unused-argument
        self.logger.debug("start sampling a new configuration.")

        is_random_fraction = np.random.rand() < self.random_fraction
        if is_random_fraction:
            sample = self.configspace.sample_configuration()
        elif self.has_model:
            # Use private _maximize to not return challenger list object
            sample = self.acq_optimizer._maximize(
                runhistory=self.runhistory,
                stats=None,
                num_points=1,
            )
            sample = sample[0][1]
        else:
            sample = self.configspace.sample_configuration()

        sample = sample.get_dictionary()
        info = {}
        self.logger.debug("done sampling a new configuration.")
        return sample, info

    def new_result(self, job, config_info):  # pylint: disable=unused-argument
        if job.exception is not None:
            self.logger.warning(
                f"job {job.id} failed with exception\n{job.exception}")

        if job.result is None:
            # One could skip crashed results, but we decided to
            # assign a +inf loss and count them as bad configurations
            loss = np.inf
        else:
            # same for non numeric losses.
            # Note that this means losses of minus infinity will count as bad!
            loss = job.result["loss"] if np.isfinite(
                job.result["loss"]) else np.inf

        config = ConfigSpace.Configuration(self.configspace,
                                           job.kwargs["config"])
        self.configs.append(config)
        self.losses.append(loss)
        self.runhistory.add(config, loss, 0, StatusType.SUCCESS)

        if self.has_model:
            X = convert_configurations_to_array(self.configs)
            Y = np.array(self.losses, dtype=np.float64)
            self.model.train(X, Y)
            self.acquisition_func.update(
                model=self.model,
                eta=min(self.losses),
            )
示例#24
0
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: typing.Optional[typing.Union[ExecuteTARun,
                                                 typing.Callable]] = None,
        runhistory: typing.Optional[RunHistory] = None,
        intensifier: typing.Optional[Intensifier] = None,
        acquisition_function: typing.
        Optional[AbstractAcquisitionFunction] = None,
        acquisition_function_optimizer: typing.
        Optional[AcquisitionFunctionMaximizer] = None,
        model: typing.Optional[AbstractEPM] = None,
        runhistory2epm: typing.Optional[AbstractRunHistory2EPM] = None,
        initial_design: typing.Optional[InitialDesign] = None,
        initial_configurations: typing.Optional[
            typing.List[Configuration]] = None,
        stats: typing.Optional[Stats] = None,
        restore_incumbent: typing.Optional[Configuration] = None,
        rng: typing.Optional[typing.Union[np.random.RandomState, int]] = None,
        smbo_class: typing.Optional[SMBO] = None,
        run_id: typing.Optional[int] = None,
        random_configuration_chooser: typing.
        Optional[RandomConfigurationChooser] = None):
        """
        Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        runhistory : RunHistory
            runhistory to store all algorithm runs
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` if not set.
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        initial_design : InitialDesign
            initial sampling design
        initial_configurations : typing.List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~smac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            self.output_dir = create_output_directory(scenario, run_id)
        elif scenario.output_dir is not None:
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = scenario.output_dir_for_this_run

        if (scenario.deterministic is True
                and getattr(scenario, 'tuner_timeout', None) is None
                and scenario.run_obj == 'quality'):
            self.logger.info('Optimizing a deterministic scenario for '
                             'quality without a tuner timeout - will make '
                             'SMAC deterministic!')
            scenario.intensification_percentage = 1e-10
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":
            self.logger.warn(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        if not random_configuration_chooser:
            random_configuration_chooser = ChooserProb(prob=scenario.rand_prob,
                                                       rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                log_y=scenario.transform_y in ["LOG", "LOGS"],
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.transform_y in ["LOG", "LOGS"]:
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)

        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        if acquisition_function_optimizer is None:
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                acquisition_function=acquisition_function,
                config_space=scenario.cs,
                rng=np.random.RandomState(seed=rng.randint(MAXINT)),
                max_steps=scenario.sls_max_steps,
                n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)
        elif not isinstance(
                acquisition_function_optimizer,
                AcquisitionFunctionMaximizer,
        ):
            raise ValueError(
                "Argument 'acquisition_function_optimizer' must be of type"
                "'AcquisitionFunctionMaximizer', but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash,
                abort_on_first_run_crash=scenario.abort_on_first_run_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash,
                abort_on_first_run_crash=scenario.abort_on_first_run_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                use_ta_time_bound=scenario.use_ta_time,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            elif scenario.initial_incumbent == "LHD":
                initial_design = LHDesign(runhistory=runhistory,
                                          intensifier=intensifier,
                                          aggregate_func=aggregate_func,
                                          tae_runner=tae_runner,
                                          scenario=scenario,
                                          stats=self.stats,
                                          traj_logger=traj_logger,
                                          rng=rng)
            elif scenario.initial_incumbent == "FACTORIAL":
                initial_design = FactorialInitialDesign(
                    runhistory=runhistory,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    rng=rng)
            elif scenario.initial_incumbent == "SOBOL":
                initial_design = SobolDesign(runhistory=runhistory,
                                             intensifier=intensifier,
                                             aggregate_func=aggregate_func,
                                             tae_runner=tae_runner,
                                             scenario=scenario,
                                             stats=self.stats,
                                             traj_logger=traj_logger,
                                             rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == 'runtime':

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":
                    runhistory2epm = RunHistory2EPM4Cost(
                        scenario=scenario,
                        num_params=num_params,
                        success_states=[
                            StatusType.SUCCESS, StatusType.CRASHED
                        ],
                        impute_censored_data=False,
                        impute_state=None)
                elif scenario.transform_y == "LOG":
                    runhistory2epm = RunHistory2EPM4LogCost(
                        scenario=scenario,
                        num_params=num_params,
                        success_states=[
                            StatusType.SUCCESS, StatusType.CRASHED
                        ],
                        impute_censored_data=False,
                        impute_state=None)
                elif scenario.transform_y == "LOGS":
                    runhistory2epm = RunHistory2EPM4LogScaledCost(
                        scenario=scenario,
                        num_params=num_params,
                        success_states=[
                            StatusType.SUCCESS, StatusType.CRASHED
                        ],
                        impute_censored_data=False,
                        impute_state=None)
                elif scenario.transform_y == "INVS":
                    runhistory2epm = RunHistory2EPM4InvScaledCost(
                        scenario=scenario,
                        num_params=num_params,
                        success_states=[
                            StatusType.SUCCESS, StatusType.CRASHED
                        ],
                        impute_censored_data=False,
                        impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': run_id,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser': random_configuration_chooser
        }

        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
示例#25
0
    def _component_builder(self, conf:typing.Union[Configuration, dict]) \
        -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]:
        """
            builds new Acquisition function object
            and EPM object and returns these
            
            Parameters
            ----------
            conf: typing.Union[Configuration, dict]
                configuration specificing "model" and "acq_func"
                
            Returns
            -------
            typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]
            
        """
        types, bounds = get_types(self.config_space, instance_features=self.scenario.feature_array)
        
        if conf["model"] == "RF":
            model = RandomForestWithInstances(
                  types=types,
                  bounds=bounds,
                  instance_features=self.scenario.feature_array,
                  seed=self.rng.randint(MAXINT),
                  pca_components=conf.get("pca_dim", self.scenario.PCA_DIM),
                  log_y=conf.get("log_y", self.scenario.transform_y in ["LOG", "LOGS"]),
                  num_trees=conf.get("num_trees", self.scenario.rf_num_trees), 
                  do_bootstrapping=conf.get("do_bootstrapping", self.scenario.rf_do_bootstrapping),  
                  ratio_features=conf.get("ratio_features", self.scenario.rf_ratio_features),
                  min_samples_split=conf.get("min_samples_split", self.scenario.rf_min_samples_split),
                  min_samples_leaf=conf.get("min_samples_leaf", self.scenario.rf_min_samples_leaf),
                  max_depth=conf.get("max_depth", self.scenario.rf_max_depth))
            
        elif conf["model"] == "GP":
            cov_amp = 2
            n_dims = len(types)

            initial_ls = np.ones([n_dims])
            exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
            kernel = cov_amp * exp_kernel

            prior = DefaultPrior(len(kernel) + 1, rng=self.rng)

            n_hypers = 3 * len(kernel)
            if n_hypers % 2 == 1:
                n_hypers += 1

            model = GaussianProcessMCMC(
                types=types,
                bounds=bounds,
                kernel=kernel,
                prior=prior,
                n_hypers=n_hypers,
                chain_length=200,
                burnin_steps=100,
                normalize_input=True,
                normalize_output=True,
                rng=self.rng,
            )
            
        if conf["acq_func"] == "EI":
            acq = EI(model=model,
                     par=conf.get("par_ei", 0))
        elif conf["acq_func"] == "LCB":
            acq = LCB(model=model,
                par=conf.get("par_lcb", 0))
        elif conf["acq_func"] == "PI":
            acq = PI(model=model,
                     par=conf.get("par_pi", 0))
        elif conf["acq_func"] == "LogEI":
            # par value should be in log-space
            acq = LogEI(model=model,
                        par=conf.get("par_logei", 0))
        
        return acq, model
示例#26
0
    def __init__(
            self,
            scenario: Scenario,
            tae_runner: typing.Union[ExecuteTARun, typing.Callable] = None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            acquisition_function_optimizer: AcquisitionFunctionMaximizer = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            restore_incumbent: Configuration = None,
            rng: typing.Union[np.random.RandomState, int] = None,
            smbo_class: SMBO = None,
            run_id: int = 1):
        """Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        runhistory : RunHistory
            runhistory to store all algorithm runs
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` if not set.
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        initial_design : InitialDesign
            initial sampling design
        initial_configurations : typing.List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id: int, (default: 1)
            Run ID will be used as subfolder for output_dir.
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        if acquisition_function_optimizer is None:
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                acquisition_function, scenario.cs,
                np.random.RandomState(seed=rng.randint(MAXINT)))
        elif not isinstance(
                acquisition_function_optimizer,
                AcquisitionFunctionMaximizer,
        ):
            raise ValueError(
                "Argument 'acquisition_function_optimizer' must be of type"
                "'AcquisitionFunctionMaximizer', but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(tae_runner=tae_runner,
                                      stats=self.stats,
                                      traj_logger=traj_logger,
                                      rng=rng,
                                      instances=scenario.train_insts,
                                      cutoff=scenario.cutoff,
                                      deterministic=scenario.deterministic,
                                      run_obj_time=scenario.run_obj == "runtime",
                                      always_race_against=scenario.cs.get_default_configuration() \
                                        if scenario.always_race_default else None,
                                      instance_specifics=scenario.instance_specific,
                                      minR=scenario.minR,
                                      maxR=scenario.maxR)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log10(scenario.cutoff)
                threshold = np.log10(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[StatusType.SUCCESS, StatusType.CRASHED],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': num_run,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent
        }
        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
示例#27
0
 def setUp(self):
     self.model = MockModel()
     self.ei = EI(self.model)
示例#28
0
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[BaseRunner], Callable]] = None,
        tae_runner_kwargs: Optional[Dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[Dict] = None,
        intensifier: Optional[Type[AbstractRacer]] = None,
        intensifier_kwargs: Optional[Dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[Dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[Dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[Dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[Dict] = None,
        multi_objective_algorithm: Optional[
            Type[AbstractMultiObjectiveAlgorithm]] = None,
        multi_objective_kwargs: Optional[Dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[Dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[Type[SMBO]] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[Dict] = None,
        dask_client: Optional[dask.distributed.Client] = None,
        n_jobs: Optional[int] = 1,
    ):
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            self.output_dir = create_output_directory(scenario, run_id)
        elif scenario.output_dir is not None:  # type: ignore[attr-defined] # noqa F821
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = cast(str, scenario.output_dir_for_this_run
                                   )  # type: ignore[attr-defined] # noqa F821
        rng = cast(np.random.RandomState, rng)

        if (scenario.deterministic is
                True  # type: ignore[attr-defined] # noqa F821
                and getattr(scenario, "tuner_timeout", None) is None
                and scenario.run_obj ==
                "quality"  # type: ignore[attr-defined] # noqa F821
            ):
            self.logger.info(
                "Optimizing a deterministic scenario for quality without a tuner timeout - will make "
                "SMAC deterministic and only evaluate one configuration per iteration!"
            )
            scenario.intensification_percentage = 1e-10  # type: ignore[attr-defined] # noqa F821
            scenario.min_chall = 1  # type: ignore[attr-defined] # noqa F821

        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"  # type: ignore[attr-defined] # noqa F821

        # initialize empty runhistory
        num_obj = len(scenario.multi_objectives
                      )  # type: ignore[attr-defined] # noqa F821
        runhistory_def_kwargs = {}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(
                **runhistory_def_kwargs)  # type: ignore[operator] # noqa F821
        elif isinstance(runhistory, RunHistory):
            pass
        else:
            raise ValueError(
                "runhistory has to be a class or an object of RunHistory")

        rand_conf_chooser_kwargs = {"rng": rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if "prob" not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs[
                    "prob"] = scenario.rand_prob  # type: ignore[attr-defined] # noqa F821
            random_configuration_chooser_instance = ChooserProb(
                **
                rand_conf_chooser_kwargs  # type: ignore[arg-type] # noqa F821  # type: RandomConfigurationChooser
            )
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser_instance = random_configuration_chooser(  # type: ignore # noqa F821
                **
                rand_conf_chooser_kwargs  # type: ignore[arg-type] # noqa F821
            )
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(
            rng.randint(MAXINT))  # type: ignore[attr-defined] # noqa F821

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(
            scenario.cs,
            scenario.feature_array)  # type: ignore[attr-defined] # noqa F821
        model_def_kwargs = {
            "types": types,
            "bounds": bounds,
            "instance_features": scenario.feature_array,
            "seed": rng.randint(MAXINT),
            "pca_components": scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    "log_y": scenario.transform_y
                    in ["LOG",
                        "LOGS"],  # type: ignore[attr-defined] # noqa F821
                    "num_trees": scenario.
                    rf_num_trees,  # type: ignore[attr-defined] # noqa F821
                    "do_bootstrapping": scenario.
                    rf_do_bootstrapping,  # type: ignore[attr-defined] # noqa F821
                    "ratio_features": scenario.
                    rf_ratio_features,  # type: ignore[attr-defined] # noqa F821
                    "min_samples_split": scenario.
                    rf_min_samples_split,  # type: ignore[attr-defined] # noqa F821
                    "min_samples_leaf": scenario.
                    rf_min_samples_leaf,  # type: ignore[attr-defined] # noqa F821
                    "max_depth": scenario.
                    rf_max_depth,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs[
                "configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = RandomForestWithInstances(
                **
                model_def_kwargs  # type: ignore[arg-type] # noqa F821  # type: AbstractEPM
            )
        elif inspect.isclass(model):
            model_def_kwargs[
                "configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = model(
                **model_def_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {"model": model_instance}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)

        acquisition_function_instance = (
            None)  # type: Optional[AbstractAcquisitionFunction]
        if acquisition_function is None:
            if scenario.transform_y in [
                    "LOG", "LOGS"
            ]:  # type: ignore[attr-defined] # noqa F821
                acquisition_function_instance = LogEI(
                    **acq_def_kwargs  # type: ignore[arg-type] # noqa F821
                )
            else:
                acquisition_function_instance = EI(
                    **acq_def_kwargs  # type: ignore[arg-type] # noqa F821
                )
        elif inspect.isclass(acquisition_function):
            acquisition_function_instance = acquisition_function(
                **acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function_instance = IntegratedAcquisitionFunction(
                acquisition_function=
                acquisition_function_instance,  # type: ignore
                **acq_def_kwargs,
            )

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            "acquisition_function": acquisition_function_instance,
            "config_space":
            scenario.cs,  # type: ignore[attr-defined] # noqa F821
            "rng": rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    "max_steps": scenario.
                    sls_max_steps,  # type: ignore[attr-defined] # noqa F821
                    "n_steps_plateau_walk": scenario.
                    sls_n_steps_plateau_walk,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer_instance = LocalAndSortedRandomSearch(
                **acq_func_opt_kwargs  # type: ignore
            )
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer_instance = acquisition_function_optimizer(  # type: ignore # noqa F821
                **acq_func_opt_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            "stats": self.stats,
            "run_obj": scenario.run_obj,
            "par_factor":
            scenario.par_factor,  # type: ignore[attr-defined] # noqa F821
            "cost_for_crash":
            scenario.cost_for_crash,  # type: ignore[attr-defined] # noqa F821
            "abort_on_first_run_crash": scenario.
            abort_on_first_run_crash,  # type: ignore[attr-defined] # noqa F821
            "multi_objectives": scenario.
            multi_objectives,  # type: ignore[attr-defined] # noqa F821
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)

        if "ta" not in tae_def_kwargs:
            tae_def_kwargs[
                "ta"] = scenario.ta  # type: ignore[attr-defined] # noqa F821
        if tae_runner is None:
            tae_def_kwargs[
                "ta"] = scenario.ta  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = ExecuteTARunOld(
                **tae_def_kwargs
            )  # type: ignore[arg-type] # noqa F821  # type: BaseRunner
        elif inspect.isclass(tae_runner):
            tae_runner_instance = cast(
                BaseRunner, tae_runner(**tae_def_kwargs))  # type: ignore
        elif callable(tae_runner):
            tae_def_kwargs["ta"] = tae_runner
            tae_def_kwargs[
                "use_pynisher"] = scenario.limit_resources  # type: ignore[attr-defined] # noqa F821
            tae_def_kwargs[
                "memory_limit"] = scenario.memory_limit  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = ExecuteTAFuncDict(
                **tae_def_kwargs)  # type: ignore
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "BaseRunner. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # In case of a parallel run, wrap the single worker in a parallel
        # runner
        if n_jobs is None or n_jobs == 1:
            _n_jobs = 1
        elif n_jobs == -1:
            _n_jobs = joblib.cpu_count()
        elif n_jobs > 0:
            _n_jobs = n_jobs
        else:
            raise ValueError(
                "Number of tasks must be positive, None or -1, but is %s" %
                str(n_jobs))
        if _n_jobs > 1 or dask_client is not None:
            tae_runner_instance = DaskParallelRunner(  # type: ignore
                tae_runner_instance,
                n_workers=_n_jobs,
                output_directory=self.output_dir,
                dask_client=dask_client,
            )

        # Check that overall objective and tae objective are the same
        # TODO: remove these two ignores once the scenario object knows all its attributes!
        if tae_runner_instance.run_obj != scenario.run_obj:  # type: ignore[union-attr] # noqa F821
            raise ValueError(
                "Objective for the target algorithm runner and "
                "the scenario must be the same, but are '%s' and "
                "'%s'" %
                (tae_runner_instance.run_obj,
                 scenario.run_obj))  # type: ignore[union-attr] # noqa F821

        if intensifier is None:
            intensifier = Intensifier

        if isinstance(intensifier, AbstractRacer):
            intensifier_instance = intensifier
        elif inspect.isclass(intensifier):
            # initialize intensification
            intensifier_def_kwargs = {
                "stats": self.stats,
                "traj_logger": traj_logger,
                "rng": rng,
                "instances":
                scenario.train_insts,  # type: ignore[attr-defined] # noqa F821
                "cutoff":
                scenario.cutoff,  # type: ignore[attr-defined] # noqa F821
                "deterministic": scenario.
                deterministic,  # type: ignore[attr-defined] # noqa F821
                "run_obj_time": scenario.run_obj ==
                "runtime",  # type: ignore[attr-defined] # noqa F821
                "instance_specifics": scenario.
                instance_specific,  # type: ignore[attr-defined] # noqa F821
                "adaptive_capping_slackfactor": scenario.
                intens_adaptive_capping_slackfactor,  # type: ignore[attr-defined] # noqa F821
                "min_chall": scenario.
                intens_min_chall,  # type: ignore[attr-defined] # noqa F821
            }

            if issubclass(intensifier, Intensifier):
                intensifier_def_kwargs[
                    "always_race_against"] = scenario.cs.get_default_configuration(
                    )  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "use_ta_time_bound"] = scenario.use_ta_time  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "minR"] = scenario.minR  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "maxR"] = scenario.maxR  # type: ignore[attr-defined] # noqa F821

            if intensifier_kwargs is not None:
                intensifier_def_kwargs.update(intensifier_kwargs)

            intensifier_instance = intensifier(
                **intensifier_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the AbstractRacer, but is '%s'"
                % type(intensifier))

        # initialize multi objective
        # the multi_objective_algorithm_instance will be passed to the runhistory2epm object
        multi_objective_algorithm_instance = (
            None)  # type: Optional[AbstractMultiObjectiveAlgorithm]

        if scenario.multi_objectives is not None and num_obj > 1:  # type: ignore[attr-defined] # noqa F821
            # define any defaults here
            _multi_objective_kwargs = {"rng": rng, "num_obj": num_obj}

            if multi_objective_kwargs is not None:
                _multi_objective_kwargs.update(multi_objective_kwargs)

            if multi_objective_algorithm is None:
                multi_objective_algorithm_instance = MeanAggregationStrategy(
                    **_multi_objective_kwargs
                )  # type: ignore[arg-type] # noqa F821
            elif inspect.isclass(multi_objective_algorithm):
                multi_objective_algorithm_instance = multi_objective_algorithm(
                    **_multi_objective_kwargs)
            else:
                raise TypeError(
                    "Multi-objective algorithm not recognized: %s" %
                    (type(multi_objective_algorithm)))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        init_design_def_kwargs = {
            "cs": scenario.cs,  # type: ignore[attr-defined] # noqa F821
            "traj_logger": traj_logger,
            "rng": rng,
            "ta_run_limit":
            scenario.ta_run_limit,  # type: ignore[attr-defined] # noqa F821
            "configs": initial_configurations,
            "n_configs_x_params": 0,
            "max_config_fracs": 0.0,
        }

        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design_instance = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs["max_config_fracs"] = 0.0
                initial_design_instance = DefaultConfiguration(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs["max_config_fracs"] = 0.0
                initial_design_instance = RandomConfigurations(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" %
                                 scenario.initial_incumbent  # type: ignore
                                 )  # type: ignore[attr-defined] # noqa F821
        elif inspect.isclass(initial_design):
            initial_design_instance = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in [
                "LOG", "LOGS"
        ]:  # type: ignore[attr-defined] # noqa F821
            cutoff = np.log(np.nanmin([
                np.inf, np.float_(scenario.cutoff)
            ]))  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff + np.log(
                scenario.par_factor)  # type: ignore[attr-defined] # noqa F821
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)
                                ])  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff * scenario.par_factor  # type: ignore[attr-defined] # noqa F821

        num_params = len(scenario.cs.get_hyperparameters()
                         )  # type: ignore[attr-defined] # noqa F821
        imputor = RFRImputator(
            rng=rng,
            cutoff=cutoff,
            threshold=threshold,
            model=model_instance,
            change_threshold=0.01,
            max_iter=2,
        )

        r2e_def_kwargs = {
            "scenario": scenario,
            "num_params": num_params,
            "success_states": [
                StatusType.SUCCESS,
            ],
            "impute_censored_data": True,
            "impute_state": [
                StatusType.CAPPED,
            ],
            "imputor": imputor,
            "scale_perc": 5,
        }

        # TODO: consider other sorts of multi-objective algorithms
        if isinstance(multi_objective_algorithm_instance, AggregationStrategy):
            r2e_def_kwargs.update({
                "multi_objective_algorithm":
                multi_objective_algorithm_instance
            })

        if scenario.run_obj == "quality":
            r2e_def_kwargs.update({
                "success_states": [
                    StatusType.SUCCESS,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                ],
                "impute_censored_data":
                False,
                "impute_state":
                None,
            })

        if (isinstance(intensifier_instance, (SuccessiveHalving, Hyperband))
                and scenario.run_obj == "quality"):
            r2e_def_kwargs.update({
                "success_states": [
                    StatusType.SUCCESS,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                    StatusType.DONOTADVANCE,
                ],
                "consider_for_higher_budgets_state": [
                    StatusType.DONOTADVANCE,
                    StatusType.TIMEOUT,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                ],
            })

        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == "runtime":
                rh2epm = RunHistory2EPM4LogCost(
                    **r2e_def_kwargs  # type: ignore
                )  # type: ignore[arg-type] # noqa F821  # type: AbstractRunHistory2EPM
            elif scenario.run_obj == "quality":
                if scenario.transform_y == "NONE":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4Cost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "LOGS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "INVS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
            else:
                raise ValueError(
                    "Unknown run objective: %s. Should be either "
                    "quality or runtime." %
                    self.scenario.run_obj  # type: ignore # noqa F821
                )
        elif inspect.isclass(runhistory2epm):
            rh2epm = runhistory2epm(**
                                    r2e_def_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            "scenario": scenario,
            "stats": self.stats,
            "initial_design": initial_design_instance,
            "runhistory": runhistory,
            "runhistory2epm": rh2epm,
            "intensifier": intensifier_instance,
            "num_run": run_id,
            "model": model_instance,
            "acq_optimizer": acquisition_function_optimizer_instance,
            "acquisition_func": acquisition_function_instance,
            "rng": rng,
            "restore_incumbent": restore_incumbent,
            "random_configuration_chooser":
            random_configuration_chooser_instance,
            "tae_runner": tae_runner_instance,
        }  # type: Dict[str, Any]

        if smbo_class is None:
            self.solver = SMBO(**
                               smbo_args)  # type: ignore[arg-type] # noqa F821
        else:
            self.solver = smbo_class(
                **smbo_args)  # type: ignore[arg-type] # noqa F821
示例#29
0
def run_experiment():
    pipeline_space = PipelineSpace()
    o_s = OneHotEncodingStep()
    i_s = ImputationStep()
    r_s = RescalingStep()
    b_s = BalancingStep()
    p_s = PreprocessingStep()
    c_s = ClassificationStep()
    pipeline_space.add_pipeline_steps([o_s, i_s, r_s, b_s, p_s, c_s])

    runhistory = PCRunHistory(average_cost)

    cs_builder = ConfigSpaceBuilder(pipeline_space)
    config_space = cs_builder.build_config_space()

    args = {
        'cs': config_space,
        'run_obj': "quality",
        'runcount_limit': 100,
        'wallclock_limit': 100,
        'memory_limit': 100,
        'cutoff_time': 100,
        'deterministic': "true"
    }
    scenario = Scenario(args)

    # Build stats
    stats = Stats(scenario, output_dir=None, stamp="")

    types, bounds = get_types(scenario.cs, scenario.feature_array)

    model = RandomForestWithInstances(types=types, bounds=bounds)

    constant_pipeline_steps = [
        "one_hot_encoder", "imputation", "rescaling", "balancing",
        "feature_preprocessor"
    ]
    variable_pipeline_steps = ["classifier"]
    rng = np.random.RandomState()
    num_params = len(scenario.cs.get_hyperparameters())

    acquisition_func = EI(model)
    acq_func_wrapper = PCAquisitionFunctionWrapper(
        acquisition_func=acquisition_func,
        config_space=scenario.cs,
        runhistory=runhistory,
        constant_pipeline_steps=constant_pipeline_steps,
        variable_pipeline_steps=variable_pipeline_steps)
    runhistory2epm = RunHistory2EPM4Cost(scenario,
                                         num_params,
                                         success_states=[StatusType.SUCCESS])
    local_search = LocalSearch(acquisition_function=acq_func_wrapper,
                               config_space=scenario.cs)
    select_configuration = SelectConfigurationsWithMarginalization(
        scenario=scenario,
        stats=stats,
        runhistory=runhistory,
        model=model,
        acq_optimizer=local_search,
        acquisition_func=acq_func_wrapper,
        rng=rng,
        constant_pipeline_steps=constant_pipeline_steps,
        variable_pipeline_steps=variable_pipeline_steps,
        num_marginalized_configurations_by_random_search=40,
        num_configs_for_marginalization=200)

    # sample configurations to fill runhistory
    sample_configs = config_space.sample_configuration(size=10)
    for config in sample_configs:
        runhistory.add(config, 1, 1, StatusType.SUCCESS)

    # test select_configurations procedure
    X, Y = runhistory2epm.transform(runhistory)
    challengers = select_configuration.run(
        X,
        Y,
        sample_configs[0],
        num_configurations_by_random_search_sorted=100,
        num_configurations_by_local_search=10,
        random_leaf_size=1)

    print(challengers[0])