Пример #1
0
 def test_predict(self):
     rs = np.random.RandomState(1)
     X = rs.rand(20, 10)
     Y = rs.rand(10, 1)
     model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint),
                                       bounds=np.array(list(
                                           map(lambda x: (0, 10),
                                               range(10))),
                                                       dtype=object))
     model.train(X[:10], Y[:10])
     m_hat, v_hat = model.predict(X[10:])
     self.assertEqual(m_hat.shape, (10, 1))
     self.assertEqual(v_hat.shape, (10, 1))
Пример #2
0
    def test_predict_marginalized_over_instances(self):
        rs = np.random.RandomState(1)
        X = rs.rand(20, 10)
        F = rs.rand(10, 5)
        Y = rs.rand(len(X) * len(F), 1)
        X_ = rs.rand(200, 15)

        model = RandomForestWithInstances(np.zeros((15, ), dtype=np.uint),
                                          instance_features=F)
        model.train(X_, Y)
        means, vars = model.predict_marginalized_over_instances(X)
        self.assertEqual(means.shape, (20, 1))
        self.assertEqual(vars.shape, (20, 1))
Пример #3
0
    def test_with_ordinal(self):
        cs = smac.configspace.ConfigurationSpace()
        _ = cs.add_hyperparameter(
            CategoricalHyperparameter('a', [0, 1], default_value=0))
        _ = cs.add_hyperparameter(
            OrdinalHyperparameter('b', [0, 1], default_value=1))
        _ = cs.add_hyperparameter(
            UniformFloatHyperparameter('c',
                                       lower=0.,
                                       upper=1.,
                                       default_value=1))
        _ = cs.add_hyperparameter(
            UniformIntegerHyperparameter('d',
                                         lower=0,
                                         upper=10,
                                         default_value=1))
        cs.seed(1)

        feat_array = np.array([0, 0, 0]).reshape(1, -1)
        types, bounds = get_types(cs, feat_array)
        model = RandomForestWithInstances(
            configspace=cs,
            types=types,
            bounds=bounds,
            instance_features=feat_array,
            seed=1,
            ratio_features=1.0,
            pca_components=9,
        )
        self.assertEqual(bounds[0][0], 2)
        self.assertTrue(bounds[0][1] is np.nan)
        self.assertEqual(bounds[1][0], 0)
        self.assertEqual(bounds[1][1], 1)
        self.assertEqual(bounds[2][0], 0.)
        self.assertEqual(bounds[2][1], 1.)
        self.assertEqual(bounds[3][0], 0.)
        self.assertEqual(bounds[3][1], 1.)
        X = np.array(
            [[0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.],
             [0., 1., 0., 9., 0., 0., 0.], [0., 1., 1., 4., 0., 0., 0.]],
            dtype=np.float64)
        y = np.array([0, 1, 2, 3], dtype=np.float64)

        X_train = np.vstack((X, X, X, X, X, X, X, X, X, X))
        y_train = np.vstack((y, y, y, y, y, y, y, y, y, y))

        model.train(X_train, y_train.reshape((-1, 1)))
        mean, _ = model.predict(X)
        for idx, m in enumerate(mean):
            self.assertAlmostEqual(y[idx], m, 0.05)
Пример #4
0
 def test_predict(self):
     rs = np.random.RandomState(1)
     X = rs.rand(20, 10)
     Y = rs.rand(10, 1)
     model = RandomForestWithInstances(
         configspace=self._get_cs(10),
         types=np.zeros((10, ), dtype=np.uint),
         bounds=list(map(lambda x: (0, 10), range(10))),
         seed=1,
     )
     model.train(X[:10], Y[:10])
     m_hat, v_hat = model.predict(X[10:])
     self.assertEqual(m_hat.shape, (10, 1))
     self.assertEqual(v_hat.shape, (10, 1))
Пример #5
0
 def _refit_model(self, types, bounds, X, y):
     """
     Easily allows for refitting of the model.
     Parameters
     ----------
     types: list
         SMAC EPM types
     X:ndarray
         X matrix
     y:ndarray
         corresponding y vector
     """
     self.model = RandomForestWithInstances(types, bounds, do_bootstrapping=True)
     self.model.rf_opts.compute_oob_error = True
     self.model.train(X, y)
Пример #6
0
    def __init__(self,
                 target_names: List[str],
                 bounds: np.ndarray,
                 types: np.ndarray,
                 rf_kwargs: Optional[Dict[str, Any]] = None,
                 **kwargs):
        """Constructor

        Parameters
        ----------
        target_names : list
            List of str, each entry is the name of one target dimension. Length
            of the list will be ``n_objectives``.

        bounds : np.ndarray
            See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation.

        types : np.ndarray
            See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation.

        kwargs
            See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation.
        """
        super().__init__(**kwargs)
        if rf_kwargs is None:
            rf_kwargs = {}

        self.target_names = target_names
        self.num_targets = len(self.target_names)
        self.estimators = [
            RandomForestWithInstances(types, bounds, **rf_kwargs)
            for i in range(self.num_targets)
        ]
    def __init__(
        self,
        target_names: List[str],
        configspace: ConfigurationSpace,
        types: List[int],
        bounds: List[Tuple[float, float]],
        seed: int,
        rf_kwargs: Optional[Dict[str, Any]] = None,
        instance_features: Optional[np.ndarray] = None,
        pca_components: Optional[int] = None,
    ) -> None:
        super().__init__(
            configspace=configspace,
            bounds=bounds,
            types=types,
            seed=seed,
            instance_features=instance_features,
            pca_components=pca_components,
        )
        if rf_kwargs is None:
            rf_kwargs = {}

        self.target_names = target_names
        self.num_targets = len(self.target_names)
        print(seed, rf_kwargs)
        self.estimators = [
            RandomForestWithInstances(configspace, types, bounds, **rf_kwargs)
            for _ in range(self.num_targets)
        ]
Пример #8
0
    def setUp(self):
        logging.basicConfig(level=logging.DEBUG)
        self.cs = ConfigurationSpace()
        self.cs.add_hyperparameter(CategoricalHyperparameter(
                name="cat_a_b", choices=["a", "b"], default_value="a"))
        self.cs.add_hyperparameter(UniformFloatHyperparameter(
                name="float_0_1", lower=0, upper=1, default_value=0.5))
        self.cs.add_hyperparameter(UniformIntegerHyperparameter(
                name='integer_0_100', lower=-10, upper=10, default_value=0))

        self.rh = runhistory.RunHistory(aggregate_func=average_cost)
        rs = numpy.random.RandomState(1)
        to_count = 0
        cn_count = 0
        for i in range(500):
            config, seed, runtime, status, instance_id = \
                generate_config(cs=self.cs, rs=rs)
            if runtime == 40:
                to_count += 1
            if runtime < 40 and status == StatusType.TIMEOUT:
                cn_count += 1
            self.rh.add(config=config, cost=runtime, time=runtime,
                        status=status, instance_id=instance_id,
                        seed=seed, additional_info=None)
        print("%d TIMEOUTs, %d censored" % (to_count, cn_count))

        self.scen = Scen()
        self.scen.run_obj = "runtime"
        self.scen.overall_obj = "par10"
        self.scen.cutoff = 40

        types, bounds = get_types(self.cs, None)
        self.model = RandomForestWithInstances(
                types=types, bounds=bounds,
                instance_features=None, seed=1234567980)
    def __init__(self, target_names, bounds, types, **kwargs):
        """Wrapper for the random forest to predict multiple targets.

        Only the a list with the target names and the types array for the
        underlying forest model are mandatory. All other hyperparameters to
        the random forest can be passed via kwargs. Consult the documentation of
        the random forest for the hyperparameters and their meanings.

        Parameters
        ----------
        target_names : list
            List of str, each entry is the name of one target dimension.

        types : np.ndarray
            See RandomForestWithInstances documentation

        kwargs
            See RandomForestWithInstances documentation

        """
        super().__init__(**kwargs)
        
        self.target_names = target_names
        self.num_targets = len(self.target_names)
        self.estimators = [RandomForestWithInstances(types, bounds, **kwargs)
                           for i in range(self.num_targets)]
Пример #10
0
 def model(self, model_short_name='urfi'):
     if model_short_name not in ['urfi', 'rfi']:
         raise ValueError(
             'Specified model %s does not exist or not supported!' %
             model_short_name)
     elif model_short_name == 'rfi':
         self.types, self.bounds = get_types(self.scenario.cs,
                                             self.scenario.feature_array)
         self._model = RandomForestWithInstances(
             self.types,
             self.bounds,
             instance_features=self.scenario.feature_array,
             seed=12345)
     elif model_short_name == 'urfi':
         if not self._preprocessed:
             self.types, self.bounds = get_types(
                 self.scenario.cs, self.scenario.feature_array)
             self._model = UnloggedEPARXrfi(
                 self.types,
                 self.bounds,
                 instance_features=self.scenario.feature_array,
                 seed=12345,
                 cutoff=self.cutoff,
                 threshold=self.threshold)
         else:
             self.types, self.bounds = get_types(self.scenario.cs, None)
             self._model = Unloggedrfwi(self.types,
                                        self.bounds,
                                        instance_features=None,
                                        seed=12345)
     self._model.rf_opts.compute_oob_error = True
Пример #11
0
def optimize(scenario, run, forest=False, seed=8, ratio=0.8):
    types, bounds = get_types(scenario.cs, scenario.feature_array)
    rfr = RandomForestWithInstances(types=types, bounds=bounds, instance_features=scenario.feature_array, seed=seed)
    ei = EI(model=rfr)
    if forest:
        optimizer = ForestSearch(ei, scenario.cs, ratio=ratio)
    else:
        optimizer = InterleavedLocalAndRandomSearch(ei, scenario.cs)

    scenario.output_dir = "%s_%s_%d_%lf" % ("./logs/run_", "forest_" if forest else "random_", seed, time.time())
    smac = SMAC(
        scenario=scenario,
        rng=np.random.RandomState(seed),
        model=rfr,
        acquisition_function=ei,
        acquisition_function_optimizer=optimizer,
        tae_runner=run,
    )

    try:
        incumbent = smac.optimize()
    finally:
        incumbent = smac.solver.incumbent

    return smac.get_tae_runner().run(incumbent, 1)[1]
Пример #12
0
    def test_log_runtime_with_imputation(self):
        '''
            adding some rundata to RunHistory2EPM4LogCost and impute censored data
        '''
        self.imputor = RFRImputator(
            rng=np.random.RandomState(seed=12345),
            cutoff=np.log(self.scen.cutoff),
            threshold=np.log(self.scen.cutoff * self.scen.par_factor),
            model=RandomForestWithInstances(
                configspace=self.cs,
                types=self.types,
                bounds=self.bounds,
                instance_features=None,
                seed=12345,
                ratio_features=1.0,
            )
        )

        rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2,
                                                       scenario=self.scen,
                                                       impute_censored_data=True,
                                                       impute_state=[StatusType.TIMEOUT, ],
                                                       success_states=[StatusType.SUCCESS, ],
                                                       imputor=self.imputor)

        self.rh.add(config=self.config1, cost=1, time=1,
                    status=StatusType.SUCCESS, instance_id=23,
                    seed=None,
                    additional_info=None)

        X, y = rh2epm.transform(self.rh)
        self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001))
        self.assertTrue(np.allclose(y, np.array([[0.]])))  # 10^0 = 1

        # rh2epm should use time and not cost field later
        self.rh.add(config=self.config3, cost=200, time=20,
                    status=StatusType.TIMEOUT, instance_id=1,
                    seed=45,
                    additional_info={"start_time": 20})

        X, y = rh2epm.transform(self.rh)
        self.assertTrue(
            np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001))
        # ln(20 * 10)
        self.assertTrue(np.allclose(y, np.array([[0.], [5.2983]]), atol=0.001))

        self.rh.add(config=self.config2, cost=100, time=10,
                    status=StatusType.TIMEOUT, instance_id=1,
                    seed=12354,
                    additional_info={"start_time": 10})

        X, y = rh2epm.transform(self.rh)
        np.testing.assert_array_almost_equal(X, np.array([[0.005, 0.995],
                                                          [0.995, 0.005],
                                                          [0.995, 0.995]]),
                                             decimal=3)

        np.testing.assert_array_almost_equal(y, np.array([[0.], [2.727], [5.2983]]),
                                             decimal=3)
Пример #13
0
    def testRandomImputation(self):
        rs = numpy.random.RandomState(1)

        for i in range(0, 150, 15):
            # First random imputation sanity check
            num_samples = max(1, i * 10)
            num_feat = max(1, i)
            num_censored = int(num_samples * 0.1)
            X = rs.rand(num_samples, num_feat)
            y = numpy.sin(X[:, 0:1])

            cutoff = max(y) * 0.9
            y[y > cutoff] = cutoff

            # We have some cen data
            cen_X = X[:num_censored, :]
            cen_y = y[:num_censored]
            uncen_X = X[num_censored:, :]
            uncen_y = y[num_censored:]

            cen_y /= 2

            cs = ConfigurationSpace()
            for i in range(num_feat):
                cs.add_hyperparameter(
                    UniformFloatHyperparameter(name="a_%d" % i,
                                               lower=0,
                                               upper=1,
                                               default_value=0.5))

            types, bounds = get_types(cs, None)
            print(types)
            print(bounds)
            print('#' * 120)
            print(cen_X)
            print(uncen_X)
            print('~' * 120)
            self.model = RandomForestWithInstances(types=types,
                                                   bounds=bounds,
                                                   instance_features=None,
                                                   seed=1234567980)
            imputor = rfr_imputator.RFRImputator(rng=rs,
                                                 cutoff=cutoff,
                                                 threshold=cutoff * 10,
                                                 change_threshold=0.01,
                                                 max_iter=5,
                                                 model=self.model)

            imp_y = imputor.impute(censored_X=cen_X,
                                   censored_y=cen_y,
                                   uncensored_X=uncen_X,
                                   uncensored_y=uncen_y)

            if imp_y is None:
                continue

            for idx in range(cen_y.shape[0]):
                self.assertGreater(imp_y[idx], cen_y[idx])
            self.assertTrue(numpy.isfinite(imp_y).all())
Пример #14
0
    def test_train_with_pca(self):
        rs = np.random.RandomState(1)
        X = rs.rand(20, 20)
        F = rs.rand(10, 10)
        Y = rs.rand(20, 1)
        model = RandomForestWithInstances(
            types=np.zeros((20, ), dtype=np.uint),
            bounds=list(map(lambda x: (0, 10), range(10))),
            pca_components=2,
            instance_features=F,
        )
        model.train(X, Y)

        self.assertEqual(model.n_params, 10)
        self.assertEqual(model.n_feats, 10)
        self.assertIsNotNone(model.pca)
        self.assertIsNotNone(model.scaler)
Пример #15
0
    def test_predict_marginalized_over_instances(self):
        rs = np.random.RandomState(1)
        X = rs.rand(20, 10)
        F = rs.rand(10, 5)
        Y = rs.rand(len(X) * len(F), 1)
        X_ = rs.rand(200, 15)

        model = RandomForestWithInstances(
            configspace=self._get_cs(10),
            types=np.zeros((15, ), dtype=np.uint),
            instance_features=F,
            bounds=list(map(lambda x: (0, 10), range(10))),
            seed=1,
        )
        model.train(X_, Y)
        means, vars = model.predict_marginalized_over_instances(X)
        self.assertEqual(means.shape, (20, 1))
        self.assertEqual(vars.shape, (20, 1))
Пример #16
0
    def _refit_model(self, types, bounds, X, y):
        """
        Easily allows for refitting of the model.
        Parameters
        ----------
        types: list
            SMAC EPM types
        X:ndarray
            X matrix
        y:ndarray
            corresponding y vector
        """
        # We need to fake config-space bypass imputation of inactive values in random forest implementation
        fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint")

        self.model = RandomForestWithInstances(fake_cs, types, bounds, seed=12345, do_bootstrapping=True)
        self.model.rf_opts.compute_oob_error = True
        self.model.train(X, y)
Пример #17
0
    def _refit_model(self, types, bounds, X, y):
        """
        Easily allows for refitting of the model.

        Parameters
        ----------
        types: list
            SMAC EPM types
        X:ndarray
            X matrix
        y:ndarray
            corresponding y vector
        """
        # take at most 80% of the data per split to ensure enough data for oob error
        self.model = RandomForestWithInstances(types=types,
                                               bounds=bounds,
                                               do_bootstrapping=True,
                                               n_points_per_tree=int(
                                                   X.shape[1] * 0.8))
        self.model.rf_opts.compute_oob_error = True
        self.model.train(X, y)
Пример #18
0
    def test_log_runtime_with_imputation(self):
        '''
            adding some rundata to RunHistory2EPM4LogCost and impute censored data
        '''
        self.imputor = RFRImputator(rs=np.random.RandomState(seed=12345),
                                    cutoff=np.log10(self.scen.cutoff),
                                    threshold=np.log10(
                                        self.scen.cutoff * self.scen.par_factor),
                                    model=RandomForestWithInstances(types=self.types, bounds=self.bounds,
                                                                    instance_features=None,
                                                                    seed=12345)
                                    )

        rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2,
                                                       scenario=self.scen,
                                                       impute_censored_data=True,
                                                       impute_state=[
                                                           StatusType.TIMEOUT],
                                                       imputor=self.imputor)

        self.rh.add(config=self.config1, cost=1, time=1,
                    status=StatusType.SUCCESS, instance_id=23,
                    seed=None,
                    additional_info=None)

        X, y = rh2epm.transform(self.rh)
        self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001))
        self.assertTrue(np.allclose(y, np.array([[0.]])))  # 10^0 = 1

        # rh2epm should use time and not cost field later
        self.rh.add(config=self.config3, cost=200, time=20,
                    status=StatusType.TIMEOUT, instance_id=1,
                    seed=45,
                    additional_info={"start_time": 20})

        X, y = rh2epm.transform(self.rh)
        self.assertTrue(
            np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001))
        # log_10(20 * 10)
        self.assertTrue(np.allclose(y, np.array([[0.], [2.301]]), atol=0.001))

        self.rh.add(config=self.config2, cost=100, time=10,
                    status=StatusType.TIMEOUT, instance_id=1,
                    seed=12354,
                    additional_info={"start_time": 10})

        X, y = rh2epm.transform(self.rh)
        print(y)
        self.assertTrue(np.allclose(
            X, np.array([[0.005, 0.995], [0.995, 0.005], [0.995, 0.995]]), atol=0.001))
        # both timeouts should be imputed to a PAR10
        self.assertTrue(
            np.allclose(y, np.array([[0.], [2.301], [2.301]]), atol=0.001))
    def test_rf_on_sklearn_data(self):
        import sklearn.datasets
        X, y = sklearn.datasets.load_boston(return_X_y=True)
        rs = np.random.RandomState(1)

        types = np.zeros(X.shape[1])
        bounds = [(np.min(X[:, i]), np.max(X[:, i]))
                  for i in range(X.shape[1])]

        cv = sklearn.model_selection.KFold(shuffle=True,
                                           random_state=rs,
                                           n_splits=2)

        for do_log in [False, True]:
            if do_log:
                targets = np.log(y)
                model = RandomForestWithInstances(
                    configspace=self._get_cs(X.shape[1]),
                    types=types,
                    bounds=bounds,
                    seed=1,
                    ratio_features=1.0,
                    pca_components=100,
                    log_y=True,
                )
                maes = [0.43169704431695493156, 0.4267519520332511912]
            else:
                targets = y
                model = RandomForestWithInstances(
                    configspace=self._get_cs(X.shape[1]),
                    types=types,
                    bounds=bounds,
                    seed=1,
                    ratio_features=1.0,
                    pca_components=100,
                )
                maes = [9.3298376833224042496, 9.348010654109179346]

            for i, (train_split, test_split) in enumerate(cv.split(X,
                                                                   targets)):
                X_train = X[train_split]
                y_train = targets[train_split]
                X_test = X[test_split]
                y_test = targets[test_split]
                model.train(X_train, y_train)
                y_hat, mu_hat = model.predict(X_test)
                mae = np.mean(np.abs(y_hat - y_test), dtype=np.float128)
                self.assertAlmostEqual(
                    mae,
                    maes[i],
                    msg=('Do log: %s, iteration %i' % (str(do_log), i)),
                    # We observe a difference of around 0.00017
                    # in github actions if doing log
                    places=3 if do_log else 7)
Пример #20
0
    def test_predict_with_actual_values(self):
        X = np.array([[0., 0., 0.], [0., 0., 1.], [0., 1., 0.], [0., 1., 1.],
                      [1., 0., 0.], [1., 0., 1.], [1., 1., 0.], [1., 1., 1.]],
                     dtype=np.float64)
        y = np.array(
            [[.1], [.2], [9], [9.2], [100.], [100.2], [109.], [109.2]],
            dtype=np.float64)
        model = RandomForestWithInstances(
            configspace=self._get_cs(3),
            types=np.array([0, 0, 0], dtype=np.uint),
            bounds=[(0, np.nan), (0, np.nan), (0, np.nan)],
            instance_features=None,
            seed=12345,
            ratio_features=1.0,
        )
        model.train(np.vstack((X, X, X, X, X, X, X, X)),
                    np.vstack((y, y, y, y, y, y, y, y)))

        y_hat, _ = model.predict(X)
        for y_i, y_hat_i in zip(
                y.reshape((1, -1)).flatten(),
                y_hat.reshape((1, -1)).flatten()):
            self.assertAlmostEqual(y_i, y_hat_i, delta=0.1)
Пример #21
0
    def __init__(
        self,
        target_names: List[str],
        configspace: ConfigurationSpace,
        types: List[int],
        bounds: List[Tuple[float, float]],
        seed: int,
        rf_kwargs: Optional[Dict[str, Any]] = None,
        instance_features: Optional[np.ndarray] = None,
        pca_components: Optional[int] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        target_names : list
            List of str, each entry is the name of one target dimension. Length
            of the list will be ``n_objectives``.
        types : List[int]
            Specifies the number of categorical values of an input dimension where
            the i-th entry corresponds to the i-th input dimension. Let's say we
            have 2 dimension where the first dimension consists of 3 different
            categorical choices and the second dimension is continuous than we
            have to pass [3, 0]. Note that we count starting from 0.
        bounds : List[Tuple[float, float]]
            bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims
        instance_features : np.ndarray (I, K)
            Contains the K dimensional instance features of the I different instances
        pca_components : float
            Number of components to keep when using PCA to reduce dimensionality of instance features. Requires to
            set n_feats (> pca_dims).
        """
        super().__init__(
            configspace=configspace,
            bounds=bounds,
            types=types,
            seed=seed,
            instance_features=instance_features,
            pca_components=pca_components,
        )
        if rf_kwargs is None:
            rf_kwargs = {}

        self.target_names = target_names
        self.num_targets = len(self.target_names)
        print(seed, rf_kwargs)
        self.estimators = [
            RandomForestWithInstances(configspace, types, bounds, **rf_kwargs)
            for _ in range(self.num_targets)
        ]
Пример #22
0
 def get_model(self, cs, instance_features=None):
     if instance_features:
         instance_features = numpy.array(
             [instance_features[key] for key in instance_features])
     types, bounds = get_types(cs, instance_features)
     model = RandomForestWithInstances(
         configspace=cs,
         types=types,
         bounds=bounds,
         instance_features=instance_features,
         seed=1234567980,
         pca_components=7,
     )
     return model
Пример #23
0
    def test_predict_marginalized_over_instances_wrong_X_dimensions(self):
        rs = np.random.RandomState(1)

        model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint),
                                          instance_features=rs.rand(10, 2),
                                          bounds=np.array(list(
                                              map(lambda x: (0, 10),
                                                  range(10))),
                                                          dtype=object))
        X = rs.rand(10)
        self.assertRaisesRegexp(ValueError, "Expected 2d array, got 1d array!",
                                model.predict_marginalized_over_instances, X)
        X = rs.rand(10, 10, 10)
        self.assertRaisesRegexp(ValueError, "Expected 2d array, got 3d array!",
                                model.predict_marginalized_over_instances, X)
Пример #24
0
    def test_predict_wrong_X_dimensions(self):
        rs = np.random.RandomState(1)

        model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint))
        X = rs.rand(10)
        self.assertRaisesRegexp(ValueError, "Expected 2d array, got 1d array!",
                                model.predict, X)
        X = rs.rand(10, 10, 10)
        self.assertRaisesRegexp(ValueError, "Expected 2d array, got 3d array!",
                                model.predict, X)

        X = rs.rand(10, 5)
        self.assertRaisesRegexp(
            ValueError, "Rows in X should have 10 entries "
            "but have 5!", model.predict, X)
Пример #25
0
    def test_predict_marginalized_over_instances_mocked(self, rf_mock):
        """Use mock to count the number of calls to predict()"""
        class SideEffect(object):
            def __call__(self, X):
                # Numpy array of number 0 to X.shape[0]
                rval = np.array(list(range(X.shape[0]))).reshape((-1, 1))
                # Return mean and variance
                return rval, rval

        rf_mock.side_effect = SideEffect()

        rs = np.random.RandomState(1)
        F = rs.rand(10, 5)

        model = RandomForestWithInstances(np.zeros((15, ), dtype=np.uint),
                                          instance_features=F)
        means, vars = model.predict_marginalized_over_instances(rs.rand(
            11, 10))
        self.assertEqual(rf_mock.call_count, 11)
        self.assertEqual(means.shape, (11, 1))
        self.assertEqual(vars.shape, (11, 1))
        for i in range(11):
            self.assertEqual(means[i], 4.5)
            self.assertEqual(vars[i], 12.75)
Пример #26
0
    def test_predict_marginalized_over_instances_no_features(self, rf_mock):
        """The RF should fall back to the regular predict() method."""

        rs = np.random.RandomState(1)
        X = rs.rand(20, 10)
        Y = rs.rand(10, 1)
        model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint))
        model.train(X[:10], Y[:10])
        model.predict(X[10:])
        self.assertEqual(rf_mock.call_count, 1)
Пример #27
0
    def test_predict_wrong_X_dimensions(self):
        rs = np.random.RandomState(1)

        model = RandomForestWithInstances(
            configspace=self._get_cs(10),
            types=np.zeros((10, ), dtype=np.uint),
            bounds=list(map(lambda x: (0, 10), range(10))),
            seed=1,
        )
        X = rs.rand(10)
        self.assertRaisesRegex(ValueError, "Expected 2d array, got 1d array!",
                               model.predict, X)
        X = rs.rand(10, 10, 10)
        self.assertRaisesRegex(ValueError, "Expected 2d array, got 3d array!",
                               model.predict, X)

        X = rs.rand(10, 5)
        self.assertRaisesRegex(
            ValueError, "Rows in X should have 10 entries "
            "but have 5!", model.predict, X)
Пример #28
0
    def test_predict_marginalized_over_instances_no_features(self, rf_mock):
        """The RF should fall back to the regular predict() method."""

        rs = np.random.RandomState(1)
        X = rs.rand(20, 10)
        Y = rs.rand(10, 1)
        model = RandomForestWithInstances(
            configspace=self._get_cs(10),
            types=np.zeros((10, ), dtype=np.uint),
            bounds=list(map(lambda x: (0, 10), range(10))),
            seed=1,
        )
        model.train(X[:10], Y[:10])
        model.predict(X[10:])
        self.assertEqual(rf_mock.call_count, 1)
Пример #29
0
 def model(self, model_short_name='urfi'):
     self.types, self.bounds = get_types(self.scenario.cs,
                                         self.scenario.feature_array)
     if model_short_name not in ['urfi', 'rfi']:
         raise ValueError(
             'Specified model %s does not exist or not supported!' %
             model_short_name)
     elif model_short_name == 'rfi':
         self._model = RandomForestWithInstances(
             self.types,
             self.bounds,
             instance_features=self.scenario.feature_array,
             seed=self.rng.randint(99999))
     elif model_short_name == 'urfi':
         self._model = UnloggedRandomForestWithInstances(
             self.types,
             self.bounds,
             self.scenario.feature_array,
             seed=self.rng.randint(99999),
             cutoff=self.cutoff,
             threshold=self.threshold)
     self._model.rf_opts.compute_oob_error = True
Пример #30
0
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[BaseRunner], Callable]] = None,
        tae_runner_kwargs: Optional[Dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[Dict] = None,
        intensifier: Optional[Type[AbstractRacer]] = None,
        intensifier_kwargs: Optional[Dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[Dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[Dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[Dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[Dict] = None,
        multi_objective_algorithm: Optional[
            Type[AbstractMultiObjectiveAlgorithm]] = None,
        multi_objective_kwargs: Optional[Dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[Dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[Type[SMBO]] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[Dict] = None,
        dask_client: Optional[dask.distributed.Client] = None,
        n_jobs: Optional[int] = 1,
    ):
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            self.output_dir = create_output_directory(scenario, run_id)
        elif scenario.output_dir is not None:  # type: ignore[attr-defined] # noqa F821
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = cast(str, scenario.output_dir_for_this_run
                                   )  # type: ignore[attr-defined] # noqa F821
        rng = cast(np.random.RandomState, rng)

        if (scenario.deterministic is
                True  # type: ignore[attr-defined] # noqa F821
                and getattr(scenario, "tuner_timeout", None) is None
                and scenario.run_obj ==
                "quality"  # type: ignore[attr-defined] # noqa F821
            ):
            self.logger.info(
                "Optimizing a deterministic scenario for quality without a tuner timeout - will make "
                "SMAC deterministic and only evaluate one configuration per iteration!"
            )
            scenario.intensification_percentage = 1e-10  # type: ignore[attr-defined] # noqa F821
            scenario.min_chall = 1  # type: ignore[attr-defined] # noqa F821

        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"  # type: ignore[attr-defined] # noqa F821

        # initialize empty runhistory
        num_obj = len(scenario.multi_objectives
                      )  # type: ignore[attr-defined] # noqa F821
        runhistory_def_kwargs = {}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(
                **runhistory_def_kwargs)  # type: ignore[operator] # noqa F821
        elif isinstance(runhistory, RunHistory):
            pass
        else:
            raise ValueError(
                "runhistory has to be a class or an object of RunHistory")

        rand_conf_chooser_kwargs = {"rng": rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if "prob" not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs[
                    "prob"] = scenario.rand_prob  # type: ignore[attr-defined] # noqa F821
            random_configuration_chooser_instance = ChooserProb(
                **
                rand_conf_chooser_kwargs  # type: ignore[arg-type] # noqa F821  # type: RandomConfigurationChooser
            )
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser_instance = random_configuration_chooser(  # type: ignore # noqa F821
                **
                rand_conf_chooser_kwargs  # type: ignore[arg-type] # noqa F821
            )
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(
            rng.randint(MAXINT))  # type: ignore[attr-defined] # noqa F821

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(
            scenario.cs,
            scenario.feature_array)  # type: ignore[attr-defined] # noqa F821
        model_def_kwargs = {
            "types": types,
            "bounds": bounds,
            "instance_features": scenario.feature_array,
            "seed": rng.randint(MAXINT),
            "pca_components": scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    "log_y": scenario.transform_y
                    in ["LOG",
                        "LOGS"],  # type: ignore[attr-defined] # noqa F821
                    "num_trees": scenario.
                    rf_num_trees,  # type: ignore[attr-defined] # noqa F821
                    "do_bootstrapping": scenario.
                    rf_do_bootstrapping,  # type: ignore[attr-defined] # noqa F821
                    "ratio_features": scenario.
                    rf_ratio_features,  # type: ignore[attr-defined] # noqa F821
                    "min_samples_split": scenario.
                    rf_min_samples_split,  # type: ignore[attr-defined] # noqa F821
                    "min_samples_leaf": scenario.
                    rf_min_samples_leaf,  # type: ignore[attr-defined] # noqa F821
                    "max_depth": scenario.
                    rf_max_depth,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs[
                "configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = RandomForestWithInstances(
                **
                model_def_kwargs  # type: ignore[arg-type] # noqa F821  # type: AbstractEPM
            )
        elif inspect.isclass(model):
            model_def_kwargs[
                "configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
            model_instance = model(
                **model_def_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {"model": model_instance}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)

        acquisition_function_instance = (
            None)  # type: Optional[AbstractAcquisitionFunction]
        if acquisition_function is None:
            if scenario.transform_y in [
                    "LOG", "LOGS"
            ]:  # type: ignore[attr-defined] # noqa F821
                acquisition_function_instance = LogEI(
                    **acq_def_kwargs  # type: ignore[arg-type] # noqa F821
                )
            else:
                acquisition_function_instance = EI(
                    **acq_def_kwargs  # type: ignore[arg-type] # noqa F821
                )
        elif inspect.isclass(acquisition_function):
            acquisition_function_instance = acquisition_function(
                **acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function_instance = IntegratedAcquisitionFunction(
                acquisition_function=
                acquisition_function_instance,  # type: ignore
                **acq_def_kwargs,
            )

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            "acquisition_function": acquisition_function_instance,
            "config_space":
            scenario.cs,  # type: ignore[attr-defined] # noqa F821
            "rng": rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    "max_steps": scenario.
                    sls_max_steps,  # type: ignore[attr-defined] # noqa F821
                    "n_steps_plateau_walk": scenario.
                    sls_n_steps_plateau_walk,  # type: ignore[attr-defined] # noqa F821
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer_instance = LocalAndSortedRandomSearch(
                **acq_func_opt_kwargs  # type: ignore
            )
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer_instance = acquisition_function_optimizer(  # type: ignore # noqa F821
                **acq_func_opt_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            "stats": self.stats,
            "run_obj": scenario.run_obj,
            "par_factor":
            scenario.par_factor,  # type: ignore[attr-defined] # noqa F821
            "cost_for_crash":
            scenario.cost_for_crash,  # type: ignore[attr-defined] # noqa F821
            "abort_on_first_run_crash": scenario.
            abort_on_first_run_crash,  # type: ignore[attr-defined] # noqa F821
            "multi_objectives": scenario.
            multi_objectives,  # type: ignore[attr-defined] # noqa F821
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)

        if "ta" not in tae_def_kwargs:
            tae_def_kwargs[
                "ta"] = scenario.ta  # type: ignore[attr-defined] # noqa F821
        if tae_runner is None:
            tae_def_kwargs[
                "ta"] = scenario.ta  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = ExecuteTARunOld(
                **tae_def_kwargs
            )  # type: ignore[arg-type] # noqa F821  # type: BaseRunner
        elif inspect.isclass(tae_runner):
            tae_runner_instance = cast(
                BaseRunner, tae_runner(**tae_def_kwargs))  # type: ignore
        elif callable(tae_runner):
            tae_def_kwargs["ta"] = tae_runner
            tae_def_kwargs[
                "use_pynisher"] = scenario.limit_resources  # type: ignore[attr-defined] # noqa F821
            tae_def_kwargs[
                "memory_limit"] = scenario.memory_limit  # type: ignore[attr-defined] # noqa F821
            tae_runner_instance = ExecuteTAFuncDict(
                **tae_def_kwargs)  # type: ignore
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "BaseRunner. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # In case of a parallel run, wrap the single worker in a parallel
        # runner
        if n_jobs is None or n_jobs == 1:
            _n_jobs = 1
        elif n_jobs == -1:
            _n_jobs = joblib.cpu_count()
        elif n_jobs > 0:
            _n_jobs = n_jobs
        else:
            raise ValueError(
                "Number of tasks must be positive, None or -1, but is %s" %
                str(n_jobs))
        if _n_jobs > 1 or dask_client is not None:
            tae_runner_instance = DaskParallelRunner(  # type: ignore
                tae_runner_instance,
                n_workers=_n_jobs,
                output_directory=self.output_dir,
                dask_client=dask_client,
            )

        # Check that overall objective and tae objective are the same
        # TODO: remove these two ignores once the scenario object knows all its attributes!
        if tae_runner_instance.run_obj != scenario.run_obj:  # type: ignore[union-attr] # noqa F821
            raise ValueError(
                "Objective for the target algorithm runner and "
                "the scenario must be the same, but are '%s' and "
                "'%s'" %
                (tae_runner_instance.run_obj,
                 scenario.run_obj))  # type: ignore[union-attr] # noqa F821

        if intensifier is None:
            intensifier = Intensifier

        if isinstance(intensifier, AbstractRacer):
            intensifier_instance = intensifier
        elif inspect.isclass(intensifier):
            # initialize intensification
            intensifier_def_kwargs = {
                "stats": self.stats,
                "traj_logger": traj_logger,
                "rng": rng,
                "instances":
                scenario.train_insts,  # type: ignore[attr-defined] # noqa F821
                "cutoff":
                scenario.cutoff,  # type: ignore[attr-defined] # noqa F821
                "deterministic": scenario.
                deterministic,  # type: ignore[attr-defined] # noqa F821
                "run_obj_time": scenario.run_obj ==
                "runtime",  # type: ignore[attr-defined] # noqa F821
                "instance_specifics": scenario.
                instance_specific,  # type: ignore[attr-defined] # noqa F821
                "adaptive_capping_slackfactor": scenario.
                intens_adaptive_capping_slackfactor,  # type: ignore[attr-defined] # noqa F821
                "min_chall": scenario.
                intens_min_chall,  # type: ignore[attr-defined] # noqa F821
            }

            if issubclass(intensifier, Intensifier):
                intensifier_def_kwargs[
                    "always_race_against"] = scenario.cs.get_default_configuration(
                    )  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "use_ta_time_bound"] = scenario.use_ta_time  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "minR"] = scenario.minR  # type: ignore[attr-defined] # noqa F821
                intensifier_def_kwargs[
                    "maxR"] = scenario.maxR  # type: ignore[attr-defined] # noqa F821

            if intensifier_kwargs is not None:
                intensifier_def_kwargs.update(intensifier_kwargs)

            intensifier_instance = intensifier(
                **intensifier_def_kwargs)  # type: ignore[arg-type] # noqa F821
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the AbstractRacer, but is '%s'"
                % type(intensifier))

        # initialize multi objective
        # the multi_objective_algorithm_instance will be passed to the runhistory2epm object
        multi_objective_algorithm_instance = (
            None)  # type: Optional[AbstractMultiObjectiveAlgorithm]

        if scenario.multi_objectives is not None and num_obj > 1:  # type: ignore[attr-defined] # noqa F821
            # define any defaults here
            _multi_objective_kwargs = {"rng": rng, "num_obj": num_obj}

            if multi_objective_kwargs is not None:
                _multi_objective_kwargs.update(multi_objective_kwargs)

            if multi_objective_algorithm is None:
                multi_objective_algorithm_instance = MeanAggregationStrategy(
                    **_multi_objective_kwargs
                )  # type: ignore[arg-type] # noqa F821
            elif inspect.isclass(multi_objective_algorithm):
                multi_objective_algorithm_instance = multi_objective_algorithm(
                    **_multi_objective_kwargs)
            else:
                raise TypeError(
                    "Multi-objective algorithm not recognized: %s" %
                    (type(multi_objective_algorithm)))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        init_design_def_kwargs = {
            "cs": scenario.cs,  # type: ignore[attr-defined] # noqa F821
            "traj_logger": traj_logger,
            "rng": rng,
            "ta_run_limit":
            scenario.ta_run_limit,  # type: ignore[attr-defined] # noqa F821
            "configs": initial_configurations,
            "n_configs_x_params": 0,
            "max_config_fracs": 0.0,
        }

        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design_instance = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs["max_config_fracs"] = 0.0
                initial_design_instance = DefaultConfiguration(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":  # type: ignore[attr-defined] # noqa F821
                init_design_def_kwargs["max_config_fracs"] = 0.0
                initial_design_instance = RandomConfigurations(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":  # type: ignore[attr-defined] # noqa F821
                initial_design_instance = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" %
                                 scenario.initial_incumbent  # type: ignore
                                 )  # type: ignore[attr-defined] # noqa F821
        elif inspect.isclass(initial_design):
            initial_design_instance = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in [
                "LOG", "LOGS"
        ]:  # type: ignore[attr-defined] # noqa F821
            cutoff = np.log(np.nanmin([
                np.inf, np.float_(scenario.cutoff)
            ]))  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff + np.log(
                scenario.par_factor)  # type: ignore[attr-defined] # noqa F821
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)
                                ])  # type: ignore[attr-defined] # noqa F821
            threshold = cutoff * scenario.par_factor  # type: ignore[attr-defined] # noqa F821

        num_params = len(scenario.cs.get_hyperparameters()
                         )  # type: ignore[attr-defined] # noqa F821
        imputor = RFRImputator(
            rng=rng,
            cutoff=cutoff,
            threshold=threshold,
            model=model_instance,
            change_threshold=0.01,
            max_iter=2,
        )

        r2e_def_kwargs = {
            "scenario": scenario,
            "num_params": num_params,
            "success_states": [
                StatusType.SUCCESS,
            ],
            "impute_censored_data": True,
            "impute_state": [
                StatusType.CAPPED,
            ],
            "imputor": imputor,
            "scale_perc": 5,
        }

        # TODO: consider other sorts of multi-objective algorithms
        if isinstance(multi_objective_algorithm_instance, AggregationStrategy):
            r2e_def_kwargs.update({
                "multi_objective_algorithm":
                multi_objective_algorithm_instance
            })

        if scenario.run_obj == "quality":
            r2e_def_kwargs.update({
                "success_states": [
                    StatusType.SUCCESS,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                ],
                "impute_censored_data":
                False,
                "impute_state":
                None,
            })

        if (isinstance(intensifier_instance, (SuccessiveHalving, Hyperband))
                and scenario.run_obj == "quality"):
            r2e_def_kwargs.update({
                "success_states": [
                    StatusType.SUCCESS,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                    StatusType.DONOTADVANCE,
                ],
                "consider_for_higher_budgets_state": [
                    StatusType.DONOTADVANCE,
                    StatusType.TIMEOUT,
                    StatusType.CRASHED,
                    StatusType.MEMOUT,
                ],
            })

        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == "runtime":
                rh2epm = RunHistory2EPM4LogCost(
                    **r2e_def_kwargs  # type: ignore
                )  # type: ignore[arg-type] # noqa F821  # type: AbstractRunHistory2EPM
            elif scenario.run_obj == "quality":
                if scenario.transform_y == "NONE":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4Cost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "LOG":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "LOGS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
                elif scenario.transform_y == "INVS":  # type: ignore[attr-defined] # noqa F821
                    rh2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)  # type: ignore # noqa F821
            else:
                raise ValueError(
                    "Unknown run objective: %s. Should be either "
                    "quality or runtime." %
                    self.scenario.run_obj  # type: ignore # noqa F821
                )
        elif inspect.isclass(runhistory2epm):
            rh2epm = runhistory2epm(**
                                    r2e_def_kwargs)  # type: ignore # noqa F821
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            "scenario": scenario,
            "stats": self.stats,
            "initial_design": initial_design_instance,
            "runhistory": runhistory,
            "runhistory2epm": rh2epm,
            "intensifier": intensifier_instance,
            "num_run": run_id,
            "model": model_instance,
            "acq_optimizer": acquisition_function_optimizer_instance,
            "acquisition_func": acquisition_function_instance,
            "rng": rng,
            "restore_incumbent": restore_incumbent,
            "random_configuration_chooser":
            random_configuration_chooser_instance,
            "tae_runner": tae_runner_instance,
        }  # type: Dict[str, Any]

        if smbo_class is None:
            self.solver = SMBO(**
                               smbo_args)  # type: ignore[arg-type] # noqa F821
        else:
            self.solver = smbo_class(
                **smbo_args)  # type: ignore[arg-type] # noqa F821