def test_log_runtime_with_imputation(self): ''' adding some rundata to RunHistory2EPM4LogCost and impute censored data ''' self.imputor = RFRImputator( rng=np.random.RandomState(seed=12345), cutoff=np.log(self.scen.cutoff), threshold=np.log(self.scen.cutoff * self.scen.par_factor), model=RandomForestWithInstances( configspace=self.cs, types=self.types, bounds=self.bounds, instance_features=None, seed=12345, ratio_features=1.0, ) ) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=self.scen, impute_censored_data=True, impute_state=[StatusType.TIMEOUT, ], success_states=[StatusType.SUCCESS, ], imputor=self.imputor) self.rh.add(config=self.config1, cost=1, time=1, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) X, y = rh2epm.transform(self.rh) self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.]]))) # 10^0 = 1 # rh2epm should use time and not cost field later self.rh.add(config=self.config3, cost=200, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 20}) X, y = rh2epm.transform(self.rh) self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) # ln(20 * 10) self.assertTrue(np.allclose(y, np.array([[0.], [5.2983]]), atol=0.001)) self.rh.add(config=self.config2, cost=100, time=10, status=StatusType.TIMEOUT, instance_id=1, seed=12354, additional_info={"start_time": 10}) X, y = rh2epm.transform(self.rh) np.testing.assert_array_almost_equal(X, np.array([[0.005, 0.995], [0.995, 0.005], [0.995, 0.995]]), decimal=3) np.testing.assert_array_almost_equal(y, np.array([[0.], [2.727], [5.2983]]), decimal=3)
def test_log_cost_without_imputation(self): ''' adding some rundata to RunHistory2EPM4LogCost ''' rh2epm = runhistory2epm.RunHistory2EPM4LogCost( num_params=2, success_states=[ StatusType.SUCCESS, ], impute_censored_data=False, scenario=self.scen) self.rh.add(config=self.config1, cost=1, time=1, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) X, y = rh2epm.transform(self.rh) self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.]]))) # 10^0 = 1 # rh2epm should use time and not cost field later self.rh.add(config=self.config3, cost=200, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 20}) X, y = rh2epm.transform(self.rh) self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) # ln(20 * 10) self.assertTrue(np.allclose(y, np.array([[0.], [5.2983]]), atol=0.001)) self.rh.add(config=self.config2, cost=100, time=10, status=StatusType.TIMEOUT, instance_id=1, seed=12354, additional_info={"start_time": 10}) X, y = rh2epm.transform(self.rh) # last entry gets skipped since imputation is disabled self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.], [5.2983]]), atol=0.001))
def test_log_runtime_with_imputation(self): ''' adding some rundata to RunHistory2EPM4LogCost and impute censored data ''' self.imputor = RFRImputator(rs=np.random.RandomState(seed=12345), cutoff=np.log10(self.scen.cutoff), threshold=np.log10( self.scen.cutoff * self.scen.par_factor), model=RandomForestWithInstances(types=self.types, bounds=self.bounds, instance_features=None, seed=12345) ) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=self.scen, impute_censored_data=True, impute_state=[ StatusType.TIMEOUT], imputor=self.imputor) self.rh.add(config=self.config1, cost=1, time=1, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) X, y = rh2epm.transform(self.rh) self.assertTrue(np.allclose(X, np.array([[0.005, 0.995]]), atol=0.001)) self.assertTrue(np.allclose(y, np.array([[0.]]))) # 10^0 = 1 # rh2epm should use time and not cost field later self.rh.add(config=self.config3, cost=200, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 20}) X, y = rh2epm.transform(self.rh) self.assertTrue( np.allclose(X, np.array([[0.005, 0.995], [0.995, 0.995]]), atol=0.001)) # log_10(20 * 10) self.assertTrue(np.allclose(y, np.array([[0.], [2.301]]), atol=0.001)) self.rh.add(config=self.config2, cost=100, time=10, status=StatusType.TIMEOUT, instance_id=1, seed=12354, additional_info={"start_time": 10}) X, y = rh2epm.transform(self.rh) print(y) self.assertTrue(np.allclose( X, np.array([[0.005, 0.995], [0.995, 0.005], [0.995, 0.995]]), atol=0.001)) # both timeouts should be imputed to a PAR10 self.assertTrue( np.allclose(y, np.array([[0.], [2.301], [2.301]]), atol=0.001))
def testRealImputation(self): rs = numpy.random.RandomState(1) imputor = rfr_imputator.RFRImputator(rng=rs, cutoff=self.scen.cutoff, threshold=self.scen.cutoff*10, change_threshold=0.01, max_iter=10, model=self.model) r2e = runhistory2epm.RunHistory2EPM4LogCost( scenario=self.scen, num_params=3, success_states=[StatusType.SUCCESS, ], impute_censored_data=True, impute_state=[StatusType.TIMEOUT], imputor=imputor, rng=rs) print("%s" % str(r2e.transform(self.rh)[0]))
def test_add(self): ''' simply adding some rundata to runhistory ''' rh = runhistory.RunHistory() cs = get_config_space() config1 = Configuration(cs, values={'a': 1, 'b': 2}) config2 = Configuration(cs, values={'a': 1, 'b': 25}) config3 = Configuration(cs, values={'a': 2, 'b': 2}) rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS, instance_id=23, seed=None, additional_info=None) rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS, instance_id=1, seed=12354, additional_info={"start_time": 10}) rh.add(config=config3, cost=10, time=20, status=StatusType.TIMEOUT, instance_id=1, seed=45, additional_info={"start_time": 10}) scen = Scenario({"cutoff_time": 10}) self.assertRaises(TypeError, runhistory2epm.RunHistory2EPM4LogCost) rh2epm = runhistory2epm.RunHistory2EPM4LogCost(num_params=2, scenario=scen) rhArr = rh2epm.transform(rh)
def testRealImputation(self): # Without instance features rs = numpy.random.RandomState(1) cs, rh = self.get_runhistory(num_success=5, num_timeout=1, num_capped=2) scen = self.get_scenario() model = self.get_model(cs) imputor = rfr_imputator.RFRImputator(rng=rs, cutoff=scen.cutoff, threshold=scen.cutoff * 10, change_threshold=0.01, max_iter=10, model=model) r2e = runhistory2epm.RunHistory2EPM4LogCost( scenario=scen, num_params=3, success_states=[ StatusType.SUCCESS, ], impute_censored_data=True, impute_state=[StatusType.TIMEOUT], imputor=imputor, rng=rs, ) self.assertEqual(r2e.transform(rh)[1].shape, (8, 1)) self.assertEqual(r2e.transform(rh)[1].shape, (8, 1)) # Now with instance features instance_features = { run_key.instance_id: numpy.random.rand(10) for run_key in rh.data } scen = self.get_scenario(instance_features) model = self.get_model(cs, instance_features) with unittest.mock.patch.object(model, attribute='train', wraps=model.train) as train_wrapper: imputor = rfr_imputator.RFRImputator(rng=rs, cutoff=scen.cutoff, threshold=scen.cutoff * 10, change_threshold=0.01, max_iter=10, model=model) r2e = runhistory2epm.RunHistory2EPM4LogCost( scenario=scen, num_params=3, success_states=[ StatusType.SUCCESS, ], impute_censored_data=True, impute_state=[StatusType.TIMEOUT], imputor=imputor, rng=rs, ) X, y = r2e.transform(rh) self.assertEqual(X.shape, (8, 13)) self.assertEqual(y.shape, (8, 1)) num_calls = len(train_wrapper.call_args_list) self.assertGreater(num_calls, 1) self.assertEqual(train_wrapper.call_args_list[0][0][0].shape, (5, 13)) self.assertEqual(train_wrapper.call_args_list[1][0][0].shape, (8, 13)) X, y = r2e.transform(rh) self.assertEqual(X.shape, (8, 13)) self.assertEqual(y.shape, (8, 1)) self.assertGreater(len(train_wrapper.call_args_list), num_calls + 1) self.assertEqual( train_wrapper.call_args_list[num_calls][0][0].shape, (5, 13)) self.assertEqual( train_wrapper.call_args_list[num_calls + 1][0][0].shape, (8, 13))