def test_predict(self): rs = np.random.RandomState(1) X = rs.rand(20, 10) Y = rs.rand(10, 1) model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint), bounds=np.array(list( map(lambda x: (0, 10), range(10))), dtype=object)) model.train(X[:10], Y[:10]) m_hat, v_hat = model.predict(X[10:]) self.assertEqual(m_hat.shape, (10, 1)) self.assertEqual(v_hat.shape, (10, 1))
def test_predict_marginalized_over_instances(self): rs = np.random.RandomState(1) X = rs.rand(20, 10) F = rs.rand(10, 5) Y = rs.rand(len(X) * len(F), 1) X_ = rs.rand(200, 15) model = RandomForestWithInstances(np.zeros((15, ), dtype=np.uint), instance_features=F) model.train(X_, Y) means, vars = model.predict_marginalized_over_instances(X) self.assertEqual(means.shape, (20, 1)) self.assertEqual(vars.shape, (20, 1))
def test_with_ordinal(self): cs = smac.configspace.ConfigurationSpace() _ = cs.add_hyperparameter( CategoricalHyperparameter('a', [0, 1], default_value=0)) _ = cs.add_hyperparameter( OrdinalHyperparameter('b', [0, 1], default_value=1)) _ = cs.add_hyperparameter( UniformFloatHyperparameter('c', lower=0., upper=1., default_value=1)) _ = cs.add_hyperparameter( UniformIntegerHyperparameter('d', lower=0, upper=10, default_value=1)) cs.seed(1) feat_array = np.array([0, 0, 0]).reshape(1, -1) types, bounds = get_types(cs, feat_array) model = RandomForestWithInstances( configspace=cs, types=types, bounds=bounds, instance_features=feat_array, seed=1, ratio_features=1.0, pca_components=9, ) self.assertEqual(bounds[0][0], 2) self.assertTrue(bounds[0][1] is np.nan) self.assertEqual(bounds[1][0], 0) self.assertEqual(bounds[1][1], 1) self.assertEqual(bounds[2][0], 0.) self.assertEqual(bounds[2][1], 1.) self.assertEqual(bounds[3][0], 0.) self.assertEqual(bounds[3][1], 1.) X = np.array( [[0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 1., 0., 9., 0., 0., 0.], [0., 1., 1., 4., 0., 0., 0.]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) X_train = np.vstack((X, X, X, X, X, X, X, X, X, X)) y_train = np.vstack((y, y, y, y, y, y, y, y, y, y)) model.train(X_train, y_train.reshape((-1, 1))) mean, _ = model.predict(X) for idx, m in enumerate(mean): self.assertAlmostEqual(y[idx], m, 0.05)
def test_predict(self): rs = np.random.RandomState(1) X = rs.rand(20, 10) Y = rs.rand(10, 1) model = RandomForestWithInstances( configspace=self._get_cs(10), types=np.zeros((10, ), dtype=np.uint), bounds=list(map(lambda x: (0, 10), range(10))), seed=1, ) model.train(X[:10], Y[:10]) m_hat, v_hat = model.predict(X[10:]) self.assertEqual(m_hat.shape, (10, 1)) self.assertEqual(v_hat.shape, (10, 1))
def _refit_model(self, types, bounds, X, y): """ Easily allows for refitting of the model. Parameters ---------- types: list SMAC EPM types X:ndarray X matrix y:ndarray corresponding y vector """ self.model = RandomForestWithInstances(types, bounds, do_bootstrapping=True) self.model.rf_opts.compute_oob_error = True self.model.train(X, y)
def __init__(self, target_names: List[str], bounds: np.ndarray, types: np.ndarray, rf_kwargs: Optional[Dict[str, Any]] = None, **kwargs): """Constructor Parameters ---------- target_names : list List of str, each entry is the name of one target dimension. Length of the list will be ``n_objectives``. bounds : np.ndarray See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation. types : np.ndarray See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation. kwargs See :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` documentation. """ super().__init__(**kwargs) if rf_kwargs is None: rf_kwargs = {} self.target_names = target_names self.num_targets = len(self.target_names) self.estimators = [ RandomForestWithInstances(types, bounds, **rf_kwargs) for i in range(self.num_targets) ]
def __init__( self, target_names: List[str], configspace: ConfigurationSpace, types: List[int], bounds: List[Tuple[float, float]], seed: int, rf_kwargs: Optional[Dict[str, Any]] = None, instance_features: Optional[np.ndarray] = None, pca_components: Optional[int] = None, ) -> None: super().__init__( configspace=configspace, bounds=bounds, types=types, seed=seed, instance_features=instance_features, pca_components=pca_components, ) if rf_kwargs is None: rf_kwargs = {} self.target_names = target_names self.num_targets = len(self.target_names) print(seed, rf_kwargs) self.estimators = [ RandomForestWithInstances(configspace, types, bounds, **rf_kwargs) for _ in range(self.num_targets) ]
def setUp(self): logging.basicConfig(level=logging.DEBUG) self.cs = ConfigurationSpace() self.cs.add_hyperparameter(CategoricalHyperparameter( name="cat_a_b", choices=["a", "b"], default_value="a")) self.cs.add_hyperparameter(UniformFloatHyperparameter( name="float_0_1", lower=0, upper=1, default_value=0.5)) self.cs.add_hyperparameter(UniformIntegerHyperparameter( name='integer_0_100', lower=-10, upper=10, default_value=0)) self.rh = runhistory.RunHistory(aggregate_func=average_cost) rs = numpy.random.RandomState(1) to_count = 0 cn_count = 0 for i in range(500): config, seed, runtime, status, instance_id = \ generate_config(cs=self.cs, rs=rs) if runtime == 40: to_count += 1 if runtime < 40 and status == StatusType.TIMEOUT: cn_count += 1 self.rh.add(config=config, cost=runtime, time=runtime, status=status, instance_id=instance_id, seed=seed, additional_info=None) print("%d TIMEOUTs, %d censored" % (to_count, cn_count)) self.scen = Scen() self.scen.run_obj = "runtime" self.scen.overall_obj = "par10" self.scen.cutoff = 40 types, bounds = get_types(self.cs, None) self.model = RandomForestWithInstances( types=types, bounds=bounds, instance_features=None, seed=1234567980)
def __init__(self, target_names, bounds, types, **kwargs): """Wrapper for the random forest to predict multiple targets. Only the a list with the target names and the types array for the underlying forest model are mandatory. All other hyperparameters to the random forest can be passed via kwargs. Consult the documentation of the random forest for the hyperparameters and their meanings. Parameters ---------- target_names : list List of str, each entry is the name of one target dimension. types : np.ndarray See RandomForestWithInstances documentation kwargs See RandomForestWithInstances documentation """ super().__init__(**kwargs) self.target_names = target_names self.num_targets = len(self.target_names) self.estimators = [RandomForestWithInstances(types, bounds, **kwargs) for i in range(self.num_targets)]
def model(self, model_short_name='urfi'): if model_short_name not in ['urfi', 'rfi']: raise ValueError( 'Specified model %s does not exist or not supported!' % model_short_name) elif model_short_name == 'rfi': self.types, self.bounds = get_types(self.scenario.cs, self.scenario.feature_array) self._model = RandomForestWithInstances( self.types, self.bounds, instance_features=self.scenario.feature_array, seed=12345) elif model_short_name == 'urfi': if not self._preprocessed: self.types, self.bounds = get_types( self.scenario.cs, self.scenario.feature_array) self._model = UnloggedEPARXrfi( self.types, self.bounds, instance_features=self.scenario.feature_array, seed=12345, cutoff=self.cutoff, threshold=self.threshold) else: self.types, self.bounds = get_types(self.scenario.cs, None) self._model = Unloggedrfwi(self.types, self.bounds, instance_features=None, seed=12345) self._model.rf_opts.compute_oob_error = True
def optimize(scenario, run, forest=False, seed=8, ratio=0.8): types, bounds = get_types(scenario.cs, scenario.feature_array) rfr = RandomForestWithInstances(types=types, bounds=bounds, instance_features=scenario.feature_array, seed=seed) ei = EI(model=rfr) if forest: optimizer = ForestSearch(ei, scenario.cs, ratio=ratio) else: optimizer = InterleavedLocalAndRandomSearch(ei, scenario.cs) scenario.output_dir = "%s_%s_%d_%lf" % ("./logs/run_", "forest_" if forest else "random_", seed, time.time()) smac = SMAC( scenario=scenario, rng=np.random.RandomState(seed), model=rfr, acquisition_function=ei, acquisition_function_optimizer=optimizer, tae_runner=run, ) try: incumbent = smac.optimize() finally: incumbent = smac.solver.incumbent return smac.get_tae_runner().run(incumbent, 1)[1]
def test_log_runtime_with_imputation(self): ''' adding some rundata to RunHistory2EPM4LogCost and impute censored data ''' self.imputor = RFRImputator( rng=np.random.RandomState(seed=12345), cutoff=np.log(self.scen.cutoff), threshold=np.log(self.scen.cutoff * self.scen.par_factor), model=RandomForestWithInstances( configspace=self.cs, types=self.types, bounds=self.bounds, instance_features=None, seed=12345, ratio_features=1.0, ) ) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=self.scen, impute_censored_data=True, impute_state=[StatusType.TIMEOUT, ], success_states=[StatusType.SUCCESS, ], imputor=self.imputor) self.rh.add(config=self.config1, cost=1, time=1, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) X, y = rh2epm.transform(self.rh) self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.]]))) # 10^0 = 1 # rh2epm should use time and not cost field later self.rh.add(config=self.config3, cost=200, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 20}) X, y = rh2epm.transform(self.rh) self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) # ln(20 * 10) self.assertTrue(np.allclose(y, np.array([[0.], [5.2983]]), atol=0.001)) self.rh.add(config=self.config2, cost=100, time=10, status=StatusType.TIMEOUT, instance_id=1, seed=12354, additional_info={"start_time": 10}) X, y = rh2epm.transform(self.rh) np.testing.assert_array_almost_equal(X, np.array([[0.005, 0.995], [0.995, 0.005], [0.995, 0.995]]), decimal=3) np.testing.assert_array_almost_equal(y, np.array([[0.], [2.727], [5.2983]]), decimal=3)
def testRandomImputation(self): rs = numpy.random.RandomState(1) for i in range(0, 150, 15): # First random imputation sanity check num_samples = max(1, i * 10) num_feat = max(1, i) num_censored = int(num_samples * 0.1) X = rs.rand(num_samples, num_feat) y = numpy.sin(X[:, 0:1]) cutoff = max(y) * 0.9 y[y > cutoff] = cutoff # We have some cen data cen_X = X[:num_censored, :] cen_y = y[:num_censored] uncen_X = X[num_censored:, :] uncen_y = y[num_censored:] cen_y /= 2 cs = ConfigurationSpace() for i in range(num_feat): cs.add_hyperparameter( UniformFloatHyperparameter(name="a_%d" % i, lower=0, upper=1, default_value=0.5)) types, bounds = get_types(cs, None) print(types) print(bounds) print('#' * 120) print(cen_X) print(uncen_X) print('~' * 120) self.model = RandomForestWithInstances(types=types, bounds=bounds, instance_features=None, seed=1234567980) imputor = rfr_imputator.RFRImputator(rng=rs, cutoff=cutoff, threshold=cutoff * 10, change_threshold=0.01, max_iter=5, model=self.model) imp_y = imputor.impute(censored_X=cen_X, censored_y=cen_y, uncensored_X=uncen_X, uncensored_y=uncen_y) if imp_y is None: continue for idx in range(cen_y.shape[0]): self.assertGreater(imp_y[idx], cen_y[idx]) self.assertTrue(numpy.isfinite(imp_y).all())
def test_train_with_pca(self): rs = np.random.RandomState(1) X = rs.rand(20, 20) F = rs.rand(10, 10) Y = rs.rand(20, 1) model = RandomForestWithInstances( types=np.zeros((20, ), dtype=np.uint), bounds=list(map(lambda x: (0, 10), range(10))), pca_components=2, instance_features=F, ) model.train(X, Y) self.assertEqual(model.n_params, 10) self.assertEqual(model.n_feats, 10) self.assertIsNotNone(model.pca) self.assertIsNotNone(model.scaler)
def test_predict_marginalized_over_instances(self): rs = np.random.RandomState(1) X = rs.rand(20, 10) F = rs.rand(10, 5) Y = rs.rand(len(X) * len(F), 1) X_ = rs.rand(200, 15) model = RandomForestWithInstances( configspace=self._get_cs(10), types=np.zeros((15, ), dtype=np.uint), instance_features=F, bounds=list(map(lambda x: (0, 10), range(10))), seed=1, ) model.train(X_, Y) means, vars = model.predict_marginalized_over_instances(X) self.assertEqual(means.shape, (20, 1)) self.assertEqual(vars.shape, (20, 1))
def _refit_model(self, types, bounds, X, y): """ Easily allows for refitting of the model. Parameters ---------- types: list SMAC EPM types X:ndarray X matrix y:ndarray corresponding y vector """ # We need to fake config-space bypass imputation of inactive values in random forest implementation fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint") self.model = RandomForestWithInstances(fake_cs, types, bounds, seed=12345, do_bootstrapping=True) self.model.rf_opts.compute_oob_error = True self.model.train(X, y)
def _refit_model(self, types, bounds, X, y): """ Easily allows for refitting of the model. Parameters ---------- types: list SMAC EPM types X:ndarray X matrix y:ndarray corresponding y vector """ # take at most 80% of the data per split to ensure enough data for oob error self.model = RandomForestWithInstances(types=types, bounds=bounds, do_bootstrapping=True, n_points_per_tree=int( X.shape[1] * 0.8)) self.model.rf_opts.compute_oob_error = True self.model.train(X, y)
def test_log_runtime_with_imputation(self): ''' adding some rundata to RunHistory2EPM4LogCost and impute censored data ''' self.imputor = RFRImputator(rs=np.random.RandomState(seed=12345), cutoff=np.log10(self.scen.cutoff), threshold=np.log10( self.scen.cutoff * self.scen.par_factor), model=RandomForestWithInstances(types=self.types, bounds=self.bounds, instance_features=None, seed=12345) ) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=self.scen, impute_censored_data=True, impute_state=[ StatusType.TIMEOUT], imputor=self.imputor) self.rh.add(config=self.config1, cost=1, time=1, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) X, y = rh2epm.transform(self.rh) self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.]]))) # 10^0 = 1 # rh2epm should use time and not cost field later self.rh.add(config=self.config3, cost=200, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 20}) X, y = rh2epm.transform(self.rh) self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) # log_10(20 * 10) self.assertTrue(np.allclose(y, np.array([[0.], [2.301]]), atol=0.001)) self.rh.add(config=self.config2, cost=100, time=10, status=StatusType.TIMEOUT, instance_id=1, seed=12354, additional_info={"start_time": 10}) X, y = rh2epm.transform(self.rh) print(y) self.assertTrue(np.allclose( X, np.array([[0.005, 0.995], [0.995, 0.005], [0.995, 0.995]]), atol=0.001)) # both timeouts should be imputed to a PAR10 self.assertTrue( np.allclose(y, np.array([[0.], [2.301], [2.301]]), atol=0.001))
def test_rf_on_sklearn_data(self): import sklearn.datasets X, y = sklearn.datasets.load_boston(return_X_y=True) rs = np.random.RandomState(1) types = np.zeros(X.shape[1]) bounds = [(np.min(X[:, i]), np.max(X[:, i])) for i in range(X.shape[1])] cv = sklearn.model_selection.KFold(shuffle=True, random_state=rs, n_splits=2) for do_log in [False, True]: if do_log: targets = np.log(y) model = RandomForestWithInstances( configspace=self._get_cs(X.shape[1]), types=types, bounds=bounds, seed=1, ratio_features=1.0, pca_components=100, log_y=True, ) maes = [0.43169704431695493156, 0.4267519520332511912] else: targets = y model = RandomForestWithInstances( configspace=self._get_cs(X.shape[1]), types=types, bounds=bounds, seed=1, ratio_features=1.0, pca_components=100, ) maes = [9.3298376833224042496, 9.348010654109179346] for i, (train_split, test_split) in enumerate(cv.split(X, targets)): X_train = X[train_split] y_train = targets[train_split] X_test = X[test_split] y_test = targets[test_split] model.train(X_train, y_train) y_hat, mu_hat = model.predict(X_test) mae = np.mean(np.abs(y_hat - y_test), dtype=np.float128) self.assertAlmostEqual( mae, maes[i], msg=('Do log: %s, iteration %i' % (str(do_log), i)), # We observe a difference of around 0.00017 # in github actions if doing log places=3 if do_log else 7)
def test_predict_with_actual_values(self): X = np.array([[0., 0., 0.], [0., 0., 1.], [0., 1., 0.], [0., 1., 1.], [1., 0., 0.], [1., 0., 1.], [1., 1., 0.], [1., 1., 1.]], dtype=np.float64) y = np.array( [[.1], [.2], [9], [9.2], [100.], [100.2], [109.], [109.2]], dtype=np.float64) model = RandomForestWithInstances( configspace=self._get_cs(3), types=np.array([0, 0, 0], dtype=np.uint), bounds=[(0, np.nan), (0, np.nan), (0, np.nan)], instance_features=None, seed=12345, ratio_features=1.0, ) model.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y))) y_hat, _ = model.predict(X) for y_i, y_hat_i in zip( y.reshape((1, -1)).flatten(), y_hat.reshape((1, -1)).flatten()): self.assertAlmostEqual(y_i, y_hat_i, delta=0.1)
def __init__( self, target_names: List[str], configspace: ConfigurationSpace, types: List[int], bounds: List[Tuple[float, float]], seed: int, rf_kwargs: Optional[Dict[str, Any]] = None, instance_features: Optional[np.ndarray] = None, pca_components: Optional[int] = None, ) -> None: """Constructor Parameters ---------- target_names : list List of str, each entry is the name of one target dimension. Length of the list will be ``n_objectives``. types : List[int] Specifies the number of categorical values of an input dimension where the i-th entry corresponds to the i-th input dimension. Let's say we have 2 dimension where the first dimension consists of 3 different categorical choices and the second dimension is continuous than we have to pass [3, 0]. Note that we count starting from 0. bounds : List[Tuple[float, float]] bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims instance_features : np.ndarray (I, K) Contains the K dimensional instance features of the I different instances pca_components : float Number of components to keep when using PCA to reduce dimensionality of instance features. Requires to set n_feats (> pca_dims). """ super().__init__( configspace=configspace, bounds=bounds, types=types, seed=seed, instance_features=instance_features, pca_components=pca_components, ) if rf_kwargs is None: rf_kwargs = {} self.target_names = target_names self.num_targets = len(self.target_names) print(seed, rf_kwargs) self.estimators = [ RandomForestWithInstances(configspace, types, bounds, **rf_kwargs) for _ in range(self.num_targets) ]
def get_model(self, cs, instance_features=None): if instance_features: instance_features = numpy.array( [instance_features[key] for key in instance_features]) types, bounds = get_types(cs, instance_features) model = RandomForestWithInstances( configspace=cs, types=types, bounds=bounds, instance_features=instance_features, seed=1234567980, pca_components=7, ) return model
def test_predict_marginalized_over_instances_wrong_X_dimensions(self): rs = np.random.RandomState(1) model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint), instance_features=rs.rand(10, 2), bounds=np.array(list( map(lambda x: (0, 10), range(10))), dtype=object)) X = rs.rand(10) self.assertRaisesRegexp(ValueError, "Expected 2d array, got 1d array!", model.predict_marginalized_over_instances, X) X = rs.rand(10, 10, 10) self.assertRaisesRegexp(ValueError, "Expected 2d array, got 3d array!", model.predict_marginalized_over_instances, X)
def test_predict_wrong_X_dimensions(self): rs = np.random.RandomState(1) model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint)) X = rs.rand(10) self.assertRaisesRegexp(ValueError, "Expected 2d array, got 1d array!", model.predict, X) X = rs.rand(10, 10, 10) self.assertRaisesRegexp(ValueError, "Expected 2d array, got 3d array!", model.predict, X) X = rs.rand(10, 5) self.assertRaisesRegexp( ValueError, "Rows in X should have 10 entries " "but have 5!", model.predict, X)
def test_predict_marginalized_over_instances_mocked(self, rf_mock): """Use mock to count the number of calls to predict()""" class SideEffect(object): def __call__(self, X): # Numpy array of number 0 to X.shape[0] rval = np.array(list(range(X.shape[0]))).reshape((-1, 1)) # Return mean and variance return rval, rval rf_mock.side_effect = SideEffect() rs = np.random.RandomState(1) F = rs.rand(10, 5) model = RandomForestWithInstances(np.zeros((15, ), dtype=np.uint), instance_features=F) means, vars = model.predict_marginalized_over_instances(rs.rand( 11, 10)) self.assertEqual(rf_mock.call_count, 11) self.assertEqual(means.shape, (11, 1)) self.assertEqual(vars.shape, (11, 1)) for i in range(11): self.assertEqual(means[i], 4.5) self.assertEqual(vars[i], 12.75)
def test_predict_marginalized_over_instances_no_features(self, rf_mock): """The RF should fall back to the regular predict() method.""" rs = np.random.RandomState(1) X = rs.rand(20, 10) Y = rs.rand(10, 1) model = RandomForestWithInstances(np.zeros((10, ), dtype=np.uint)) model.train(X[:10], Y[:10]) model.predict(X[10:]) self.assertEqual(rf_mock.call_count, 1)
def test_predict_wrong_X_dimensions(self): rs = np.random.RandomState(1) model = RandomForestWithInstances( configspace=self._get_cs(10), types=np.zeros((10, ), dtype=np.uint), bounds=list(map(lambda x: (0, 10), range(10))), seed=1, ) X = rs.rand(10) self.assertRaisesRegex(ValueError, "Expected 2d array, got 1d array!", model.predict, X) X = rs.rand(10, 10, 10) self.assertRaisesRegex(ValueError, "Expected 2d array, got 3d array!", model.predict, X) X = rs.rand(10, 5) self.assertRaisesRegex( ValueError, "Rows in X should have 10 entries " "but have 5!", model.predict, X)
def test_predict_marginalized_over_instances_no_features(self, rf_mock): """The RF should fall back to the regular predict() method.""" rs = np.random.RandomState(1) X = rs.rand(20, 10) Y = rs.rand(10, 1) model = RandomForestWithInstances( configspace=self._get_cs(10), types=np.zeros((10, ), dtype=np.uint), bounds=list(map(lambda x: (0, 10), range(10))), seed=1, ) model.train(X[:10], Y[:10]) model.predict(X[10:]) self.assertEqual(rf_mock.call_count, 1)
def model(self, model_short_name='urfi'): self.types, self.bounds = get_types(self.scenario.cs, self.scenario.feature_array) if model_short_name not in ['urfi', 'rfi']: raise ValueError( 'Specified model %s does not exist or not supported!' % model_short_name) elif model_short_name == 'rfi': self._model = RandomForestWithInstances( self.types, self.bounds, instance_features=self.scenario.feature_array, seed=self.rng.randint(99999)) elif model_short_name == 'urfi': self._model = UnloggedRandomForestWithInstances( self.types, self.bounds, self.scenario.feature_array, seed=self.rng.randint(99999), cutoff=self.cutoff, threshold=self.threshold) self._model.rf_opts.compute_oob_error = True
def __init__( self, scenario: Scenario, tae_runner: Optional[Union[Type[BaseRunner], Callable]] = None, tae_runner_kwargs: Optional[Dict] = None, runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None, runhistory_kwargs: Optional[Dict] = None, intensifier: Optional[Type[AbstractRacer]] = None, intensifier_kwargs: Optional[Dict] = None, acquisition_function: Optional[ Type[AbstractAcquisitionFunction]] = None, acquisition_function_kwargs: Optional[Dict] = None, integrate_acquisition_function: bool = False, acquisition_function_optimizer: Optional[ Type[AcquisitionFunctionMaximizer]] = None, acquisition_function_optimizer_kwargs: Optional[Dict] = None, model: Optional[Type[AbstractEPM]] = None, model_kwargs: Optional[Dict] = None, runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None, runhistory2epm_kwargs: Optional[Dict] = None, multi_objective_algorithm: Optional[ Type[AbstractMultiObjectiveAlgorithm]] = None, multi_objective_kwargs: Optional[Dict] = None, initial_design: Optional[Type[InitialDesign]] = None, initial_design_kwargs: Optional[Dict] = None, initial_configurations: Optional[List[Configuration]] = None, stats: Optional[Stats] = None, restore_incumbent: Optional[Configuration] = None, rng: Optional[Union[np.random.RandomState, int]] = None, smbo_class: Optional[Type[SMBO]] = None, run_id: Optional[int] = None, random_configuration_chooser: Optional[ Type[RandomConfigurationChooser]] = None, random_configuration_chooser_kwargs: Optional[Dict] = None, dask_client: Optional[dask.distributed.Client] = None, n_jobs: Optional[int] = 1, ): self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.scenario = scenario self.output_dir = "" if not restore_incumbent: # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an # output directory. This is the default path. # initial random number generator run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger) self.output_dir = create_output_directory(scenario, run_id) elif scenario.output_dir is not None: # type: ignore[attr-defined] # noqa F821 run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger) # output-directory is created in CLI when restoring from a # folder. calling the function again in the facade results in two # folders being created: run_X and run_X.OLD. if we are # restoring, the output-folder exists already and we omit creating it, # but set the self-output_dir to the dir. # necessary because we want to write traj to new output-dir in CLI. self.output_dir = cast(str, scenario.output_dir_for_this_run ) # type: ignore[attr-defined] # noqa F821 rng = cast(np.random.RandomState, rng) if (scenario.deterministic is True # type: ignore[attr-defined] # noqa F821 and getattr(scenario, "tuner_timeout", None) is None and scenario.run_obj == "quality" # type: ignore[attr-defined] # noqa F821 ): self.logger.info( "Optimizing a deterministic scenario for quality without a tuner timeout - will make " "SMAC deterministic and only evaluate one configuration per iteration!" ) scenario.intensification_percentage = 1e-10 # type: ignore[attr-defined] # noqa F821 scenario.min_chall = 1 # type: ignore[attr-defined] # noqa F821 scenario.write() # initialize stats object if stats: self.stats = stats else: self.stats = Stats(scenario) if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG": # type: ignore[attr-defined] # noqa F821 self.logger.warning( "Runtime as objective automatically activates log(y) transformation" ) self.scenario.transform_y = "LOG" # type: ignore[attr-defined] # noqa F821 # initialize empty runhistory num_obj = len(scenario.multi_objectives ) # type: ignore[attr-defined] # noqa F821 runhistory_def_kwargs = {} if runhistory_kwargs is not None: runhistory_def_kwargs.update(runhistory_kwargs) if runhistory is None: runhistory = RunHistory(**runhistory_def_kwargs) elif inspect.isclass(runhistory): runhistory = runhistory( **runhistory_def_kwargs) # type: ignore[operator] # noqa F821 elif isinstance(runhistory, RunHistory): pass else: raise ValueError( "runhistory has to be a class or an object of RunHistory") rand_conf_chooser_kwargs = {"rng": rng} if random_configuration_chooser_kwargs is not None: rand_conf_chooser_kwargs.update( random_configuration_chooser_kwargs) if random_configuration_chooser is None: if "prob" not in rand_conf_chooser_kwargs: rand_conf_chooser_kwargs[ "prob"] = scenario.rand_prob # type: ignore[attr-defined] # noqa F821 random_configuration_chooser_instance = ChooserProb( ** rand_conf_chooser_kwargs # type: ignore[arg-type] # noqa F821 # type: RandomConfigurationChooser ) elif inspect.isclass(random_configuration_chooser): random_configuration_chooser_instance = random_configuration_chooser( # type: ignore # noqa F821 ** rand_conf_chooser_kwargs # type: ignore[arg-type] # noqa F821 ) elif not isinstance(random_configuration_chooser, RandomConfigurationChooser): raise ValueError( "random_configuration_chooser has to be" " a class or object of RandomConfigurationChooser") # reset random number generator in config space to draw different # random configurations with each seed given to SMAC scenario.cs.seed( rng.randint(MAXINT)) # type: ignore[attr-defined] # noqa F821 # initial Trajectory Logger traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats) # initial EPM types, bounds = get_types( scenario.cs, scenario.feature_array) # type: ignore[attr-defined] # noqa F821 model_def_kwargs = { "types": types, "bounds": bounds, "instance_features": scenario.feature_array, "seed": rng.randint(MAXINT), "pca_components": scenario.PCA_DIM, } if model_kwargs is not None: model_def_kwargs.update(model_kwargs) if model is None: for key, value in { "log_y": scenario.transform_y in ["LOG", "LOGS"], # type: ignore[attr-defined] # noqa F821 "num_trees": scenario. rf_num_trees, # type: ignore[attr-defined] # noqa F821 "do_bootstrapping": scenario. rf_do_bootstrapping, # type: ignore[attr-defined] # noqa F821 "ratio_features": scenario. rf_ratio_features, # type: ignore[attr-defined] # noqa F821 "min_samples_split": scenario. rf_min_samples_split, # type: ignore[attr-defined] # noqa F821 "min_samples_leaf": scenario. rf_min_samples_leaf, # type: ignore[attr-defined] # noqa F821 "max_depth": scenario. rf_max_depth, # type: ignore[attr-defined] # noqa F821 }.items(): if key not in model_def_kwargs: model_def_kwargs[key] = value model_def_kwargs[ "configspace"] = self.scenario.cs # type: ignore[attr-defined] # noqa F821 model_instance = RandomForestWithInstances( ** model_def_kwargs # type: ignore[arg-type] # noqa F821 # type: AbstractEPM ) elif inspect.isclass(model): model_def_kwargs[ "configspace"] = self.scenario.cs # type: ignore[attr-defined] # noqa F821 model_instance = model( **model_def_kwargs) # type: ignore # noqa F821 else: raise TypeError("Model not recognized: %s" % (type(model))) # initial acquisition function acq_def_kwargs = {"model": model_instance} if acquisition_function_kwargs is not None: acq_def_kwargs.update(acquisition_function_kwargs) acquisition_function_instance = ( None) # type: Optional[AbstractAcquisitionFunction] if acquisition_function is None: if scenario.transform_y in [ "LOG", "LOGS" ]: # type: ignore[attr-defined] # noqa F821 acquisition_function_instance = LogEI( **acq_def_kwargs # type: ignore[arg-type] # noqa F821 ) else: acquisition_function_instance = EI( **acq_def_kwargs # type: ignore[arg-type] # noqa F821 ) elif inspect.isclass(acquisition_function): acquisition_function_instance = acquisition_function( **acq_def_kwargs) else: raise TypeError( "Argument acquisition_function must be None or an object implementing the " "AbstractAcquisitionFunction, not %s." % type(acquisition_function)) if integrate_acquisition_function: acquisition_function_instance = IntegratedAcquisitionFunction( acquisition_function= acquisition_function_instance, # type: ignore **acq_def_kwargs, ) # initialize optimizer on acquisition function acq_func_opt_kwargs = { "acquisition_function": acquisition_function_instance, "config_space": scenario.cs, # type: ignore[attr-defined] # noqa F821 "rng": rng, } if acquisition_function_optimizer_kwargs is not None: acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs) if acquisition_function_optimizer is None: for key, value in { "max_steps": scenario. sls_max_steps, # type: ignore[attr-defined] # noqa F821 "n_steps_plateau_walk": scenario. sls_n_steps_plateau_walk, # type: ignore[attr-defined] # noqa F821 }.items(): if key not in acq_func_opt_kwargs: acq_func_opt_kwargs[key] = value acquisition_function_optimizer_instance = LocalAndSortedRandomSearch( **acq_func_opt_kwargs # type: ignore ) elif inspect.isclass(acquisition_function_optimizer): acquisition_function_optimizer_instance = acquisition_function_optimizer( # type: ignore # noqa F821 **acq_func_opt_kwargs) # type: ignore # noqa F821 else: raise TypeError( "Argument acquisition_function_optimizer must be None or an object implementing the " "AcquisitionFunctionMaximizer, but is '%s'" % type(acquisition_function_optimizer)) # initialize tae_runner # First case, if tae_runner is None, the target algorithm is a call # string in the scenario file tae_def_kwargs = { "stats": self.stats, "run_obj": scenario.run_obj, "par_factor": scenario.par_factor, # type: ignore[attr-defined] # noqa F821 "cost_for_crash": scenario.cost_for_crash, # type: ignore[attr-defined] # noqa F821 "abort_on_first_run_crash": scenario. abort_on_first_run_crash, # type: ignore[attr-defined] # noqa F821 "multi_objectives": scenario. multi_objectives, # type: ignore[attr-defined] # noqa F821 } if tae_runner_kwargs is not None: tae_def_kwargs.update(tae_runner_kwargs) if "ta" not in tae_def_kwargs: tae_def_kwargs[ "ta"] = scenario.ta # type: ignore[attr-defined] # noqa F821 if tae_runner is None: tae_def_kwargs[ "ta"] = scenario.ta # type: ignore[attr-defined] # noqa F821 tae_runner_instance = ExecuteTARunOld( **tae_def_kwargs ) # type: ignore[arg-type] # noqa F821 # type: BaseRunner elif inspect.isclass(tae_runner): tae_runner_instance = cast( BaseRunner, tae_runner(**tae_def_kwargs)) # type: ignore elif callable(tae_runner): tae_def_kwargs["ta"] = tae_runner tae_def_kwargs[ "use_pynisher"] = scenario.limit_resources # type: ignore[attr-defined] # noqa F821 tae_def_kwargs[ "memory_limit"] = scenario.memory_limit # type: ignore[attr-defined] # noqa F821 tae_runner_instance = ExecuteTAFuncDict( **tae_def_kwargs) # type: ignore else: raise TypeError( "Argument 'tae_runner' is %s, but must be " "either None, a callable or an object implementing " "BaseRunner. Passing 'None' will result in the " "creation of target algorithm runner based on the " "call string in the scenario file." % type(tae_runner)) # In case of a parallel run, wrap the single worker in a parallel # runner if n_jobs is None or n_jobs == 1: _n_jobs = 1 elif n_jobs == -1: _n_jobs = joblib.cpu_count() elif n_jobs > 0: _n_jobs = n_jobs else: raise ValueError( "Number of tasks must be positive, None or -1, but is %s" % str(n_jobs)) if _n_jobs > 1 or dask_client is not None: tae_runner_instance = DaskParallelRunner( # type: ignore tae_runner_instance, n_workers=_n_jobs, output_directory=self.output_dir, dask_client=dask_client, ) # Check that overall objective and tae objective are the same # TODO: remove these two ignores once the scenario object knows all its attributes! if tae_runner_instance.run_obj != scenario.run_obj: # type: ignore[union-attr] # noqa F821 raise ValueError( "Objective for the target algorithm runner and " "the scenario must be the same, but are '%s' and " "'%s'" % (tae_runner_instance.run_obj, scenario.run_obj)) # type: ignore[union-attr] # noqa F821 if intensifier is None: intensifier = Intensifier if isinstance(intensifier, AbstractRacer): intensifier_instance = intensifier elif inspect.isclass(intensifier): # initialize intensification intensifier_def_kwargs = { "stats": self.stats, "traj_logger": traj_logger, "rng": rng, "instances": scenario.train_insts, # type: ignore[attr-defined] # noqa F821 "cutoff": scenario.cutoff, # type: ignore[attr-defined] # noqa F821 "deterministic": scenario. deterministic, # type: ignore[attr-defined] # noqa F821 "run_obj_time": scenario.run_obj == "runtime", # type: ignore[attr-defined] # noqa F821 "instance_specifics": scenario. instance_specific, # type: ignore[attr-defined] # noqa F821 "adaptive_capping_slackfactor": scenario. intens_adaptive_capping_slackfactor, # type: ignore[attr-defined] # noqa F821 "min_chall": scenario. intens_min_chall, # type: ignore[attr-defined] # noqa F821 } if issubclass(intensifier, Intensifier): intensifier_def_kwargs[ "always_race_against"] = scenario.cs.get_default_configuration( ) # type: ignore[attr-defined] # noqa F821 intensifier_def_kwargs[ "use_ta_time_bound"] = scenario.use_ta_time # type: ignore[attr-defined] # noqa F821 intensifier_def_kwargs[ "minR"] = scenario.minR # type: ignore[attr-defined] # noqa F821 intensifier_def_kwargs[ "maxR"] = scenario.maxR # type: ignore[attr-defined] # noqa F821 if intensifier_kwargs is not None: intensifier_def_kwargs.update(intensifier_kwargs) intensifier_instance = intensifier( **intensifier_def_kwargs) # type: ignore[arg-type] # noqa F821 else: raise TypeError( "Argument intensifier must be None or an object implementing the AbstractRacer, but is '%s'" % type(intensifier)) # initialize multi objective # the multi_objective_algorithm_instance will be passed to the runhistory2epm object multi_objective_algorithm_instance = ( None) # type: Optional[AbstractMultiObjectiveAlgorithm] if scenario.multi_objectives is not None and num_obj > 1: # type: ignore[attr-defined] # noqa F821 # define any defaults here _multi_objective_kwargs = {"rng": rng, "num_obj": num_obj} if multi_objective_kwargs is not None: _multi_objective_kwargs.update(multi_objective_kwargs) if multi_objective_algorithm is None: multi_objective_algorithm_instance = MeanAggregationStrategy( **_multi_objective_kwargs ) # type: ignore[arg-type] # noqa F821 elif inspect.isclass(multi_objective_algorithm): multi_objective_algorithm_instance = multi_objective_algorithm( **_multi_objective_kwargs) else: raise TypeError( "Multi-objective algorithm not recognized: %s" % (type(multi_objective_algorithm))) # initial design if initial_design is not None and initial_configurations is not None: raise ValueError( "Either use initial_design or initial_configurations; but not both" ) init_design_def_kwargs = { "cs": scenario.cs, # type: ignore[attr-defined] # noqa F821 "traj_logger": traj_logger, "rng": rng, "ta_run_limit": scenario.ta_run_limit, # type: ignore[attr-defined] # noqa F821 "configs": initial_configurations, "n_configs_x_params": 0, "max_config_fracs": 0.0, } if initial_design_kwargs is not None: init_design_def_kwargs.update(initial_design_kwargs) if initial_configurations is not None: initial_design_instance = InitialDesign(**init_design_def_kwargs) elif initial_design is None: if scenario.initial_incumbent == "DEFAULT": # type: ignore[attr-defined] # noqa F821 init_design_def_kwargs["max_config_fracs"] = 0.0 initial_design_instance = DefaultConfiguration( **init_design_def_kwargs) elif scenario.initial_incumbent == "RANDOM": # type: ignore[attr-defined] # noqa F821 init_design_def_kwargs["max_config_fracs"] = 0.0 initial_design_instance = RandomConfigurations( **init_design_def_kwargs) elif scenario.initial_incumbent == "LHD": # type: ignore[attr-defined] # noqa F821 initial_design_instance = LHDesign(**init_design_def_kwargs) elif scenario.initial_incumbent == "FACTORIAL": # type: ignore[attr-defined] # noqa F821 initial_design_instance = FactorialInitialDesign( **init_design_def_kwargs) elif scenario.initial_incumbent == "SOBOL": # type: ignore[attr-defined] # noqa F821 initial_design_instance = SobolDesign(**init_design_def_kwargs) else: raise ValueError("Don't know what kind of initial_incumbent " "'%s' is" % scenario.initial_incumbent # type: ignore ) # type: ignore[attr-defined] # noqa F821 elif inspect.isclass(initial_design): initial_design_instance = initial_design(**init_design_def_kwargs) else: raise TypeError( "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'" % type(initial_design)) # if we log the performance data, # the RFRImputator will already get # log transform data from the runhistory if scenario.transform_y in [ "LOG", "LOGS" ]: # type: ignore[attr-defined] # noqa F821 cutoff = np.log(np.nanmin([ np.inf, np.float_(scenario.cutoff) ])) # type: ignore[attr-defined] # noqa F821 threshold = cutoff + np.log( scenario.par_factor) # type: ignore[attr-defined] # noqa F821 else: cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff) ]) # type: ignore[attr-defined] # noqa F821 threshold = cutoff * scenario.par_factor # type: ignore[attr-defined] # noqa F821 num_params = len(scenario.cs.get_hyperparameters() ) # type: ignore[attr-defined] # noqa F821 imputor = RFRImputator( rng=rng, cutoff=cutoff, threshold=threshold, model=model_instance, change_threshold=0.01, max_iter=2, ) r2e_def_kwargs = { "scenario": scenario, "num_params": num_params, "success_states": [ StatusType.SUCCESS, ], "impute_censored_data": True, "impute_state": [ StatusType.CAPPED, ], "imputor": imputor, "scale_perc": 5, } # TODO: consider other sorts of multi-objective algorithms if isinstance(multi_objective_algorithm_instance, AggregationStrategy): r2e_def_kwargs.update({ "multi_objective_algorithm": multi_objective_algorithm_instance }) if scenario.run_obj == "quality": r2e_def_kwargs.update({ "success_states": [ StatusType.SUCCESS, StatusType.CRASHED, StatusType.MEMOUT, ], "impute_censored_data": False, "impute_state": None, }) if (isinstance(intensifier_instance, (SuccessiveHalving, Hyperband)) and scenario.run_obj == "quality"): r2e_def_kwargs.update({ "success_states": [ StatusType.SUCCESS, StatusType.CRASHED, StatusType.MEMOUT, StatusType.DONOTADVANCE, ], "consider_for_higher_budgets_state": [ StatusType.DONOTADVANCE, StatusType.TIMEOUT, StatusType.CRASHED, StatusType.MEMOUT, ], }) if runhistory2epm_kwargs is not None: r2e_def_kwargs.update(runhistory2epm_kwargs) if runhistory2epm is None: if scenario.run_obj == "runtime": rh2epm = RunHistory2EPM4LogCost( **r2e_def_kwargs # type: ignore ) # type: ignore[arg-type] # noqa F821 # type: AbstractRunHistory2EPM elif scenario.run_obj == "quality": if scenario.transform_y == "NONE": # type: ignore[attr-defined] # noqa F821 rh2epm = RunHistory2EPM4Cost( **r2e_def_kwargs) # type: ignore # noqa F821 elif scenario.transform_y == "LOG": # type: ignore[attr-defined] # noqa F821 rh2epm = RunHistory2EPM4LogCost( **r2e_def_kwargs) # type: ignore # noqa F821 elif scenario.transform_y == "LOGS": # type: ignore[attr-defined] # noqa F821 rh2epm = RunHistory2EPM4LogScaledCost( **r2e_def_kwargs) # type: ignore # noqa F821 elif scenario.transform_y == "INVS": # type: ignore[attr-defined] # noqa F821 rh2epm = RunHistory2EPM4InvScaledCost( **r2e_def_kwargs) # type: ignore # noqa F821 else: raise ValueError( "Unknown run objective: %s. Should be either " "quality or runtime." % self.scenario.run_obj # type: ignore # noqa F821 ) elif inspect.isclass(runhistory2epm): rh2epm = runhistory2epm(** r2e_def_kwargs) # type: ignore # noqa F821 else: raise TypeError( "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'" % type(runhistory2epm)) smbo_args = { "scenario": scenario, "stats": self.stats, "initial_design": initial_design_instance, "runhistory": runhistory, "runhistory2epm": rh2epm, "intensifier": intensifier_instance, "num_run": run_id, "model": model_instance, "acq_optimizer": acquisition_function_optimizer_instance, "acquisition_func": acquisition_function_instance, "rng": rng, "restore_incumbent": restore_incumbent, "random_configuration_chooser": random_configuration_chooser_instance, "tae_runner": tae_runner_instance, } # type: Dict[str, Any] if smbo_class is None: self.solver = SMBO(** smbo_args) # type: ignore[arg-type] # noqa F821 else: self.solver = smbo_class( **smbo_args) # type: ignore[arg-type] # noqa F821