示例#1
0
    def initialize(self, stamp, acq_func, double_intensification,
                   cache_directory, wallclock_limit, runcount_limit, cutoff,
                   memory_limit, downsampling, intensification_fold_size,
                   random_splitting_number, random_splitting_enabled):
        # Check if caching is enabled
        caching = True if acq_func[:2] == "pc" else False

        # Make a cache directory
        if cache_directory == None:
            current_directory = dirname(dirname(os.path.abspath(__file__)))
            self.cache_directory = os.path.join(current_directory, 'cache')
        else:
            self.cache_directory = cache_directory

        # Check if cache_directory exists
        try:
            if not os.path.exists(self.cache_directory):
                os.makedirs(self.cache_directory)
        except FileExistsError:
            pass

        # Load data
        self.data = self.data_loader.get_data()

        # Build runhistory
        # TODO Does this work correctly for non-caching?
        runhistory = PCRunHistory(average_cost)

        # Setup statistics
        info = {
            'stamp': stamp,
            'caching': caching,
            'acquisition_function': acq_func,
            'cache_directory': self.cache_directory,
            'wallclock_limit': wallclock_limit,
            'downsampling': downsampling
        }

        self.statistics = Statistics(stamp,
                                     self.output_dir,
                                     information=info,
                                     total_runtime=wallclock_limit)

        # The pipeline parts that get marginalized
        constant_pipeline_steps = [
            "one_hot_encoder", "imputation", "rescaling", "balancing",
            "feature_preprocessor"
        ]

        variable_pipeline_steps = ["classifier"]

        # The pipeline parts that can get cached
        cached_pipeline_steps = [["one_hot_encoder", "imputation"],
                                 [
                                     "one_hot_encoder", "imputation",
                                     "rescaling", "balancing",
                                     "feature_preprocessor"
                                 ]]

        # Set cache directory
        if caching:
            pr = CachedPipelineRunner(
                self.data,
                self.data_loader.info,
                self.pipeline_space,
                runhistory,
                self.statistics,
                cached_pipeline_steps=cached_pipeline_steps,
                cache_directory=self.cache_directory,
                downsampling=downsampling,
                num_cross_validation_folds=intensification_fold_size)
        else:
            pr = PipelineRunner(
                self.data,
                self.data_loader.info,
                self.pipeline_space,
                runhistory,
                self.statistics,
                downsampling=downsampling,
                num_cross_validation_folds=intensification_fold_size)

        # Choose acquisition function
        if acq_func in [
                "eips", "pc-eips", "m-eips", "pc-m-eips", "pceips",
                "pc-m-pceips"
        ]:
            model_target_names = ['cost', 'time']
        elif acq_func in ["ei", "pc-ei", "m-ei", "pc-m-ei"]:
            model_target_names = ['cost']
        elif acq_func in ["roar", "pc-roar-mrs", "pc-roar-sigmoid-rs"]:
            model_target_names = []
        else:
            # Not a valid acquisition function
            raise ValueError("The provided acquisition function is not valid")

        trajectory_path = self.output_dir + "/logging/" + stamp  # + self.data_path.split("/")[-1] + "/" + str(stamp)
        if not os.path.exists(trajectory_path):
            os.makedirs(trajectory_path)
        self.trajectory_path_json = trajectory_path + "/traj_aclib2.json"
        self.trajectory_path_csv = trajectory_path + "/traj_old.csv"

        # Build scenario
        intensification_instances = [[
            1
        ]] if intensification_fold_size == None else [
            [i] for i in range(0, intensification_fold_size)
        ]
        args = {
            'cs': self.config_space,
            'run_obj': "quality",
            'runcount_limit': runcount_limit,
            'wallclock_limit': wallclock_limit,
            'memory_limit': memory_limit,
            'cutoff_time': cutoff,
            'deterministic': "true",
            'abort_on_first_run_crash': "false",
            'instances': intensification_instances
        }
        scenario = Scenario(args)

        # Build stats
        stats = Stats(scenario,
                      output_dir=self.output_dir + "/smac/",
                      stamp=stamp)

        # Build tae runner
        tae_runner = ExecuteTAFuncDict(ta=pr.run,
                                       stats=stats,
                                       runhistory=runhistory,
                                       run_obj=scenario.run_obj,
                                       memory_limit=scenario.memory_limit)

        # Build SMBO object
        intensification_instances = [
            1
        ] if intensification_fold_size == None else [
            i for i in range(0, intensification_fold_size)
        ]

        smbo_builder = SMBOBuilder()
        self.smbo = smbo_builder.build_pc_smbo(
            tae_runner=tae_runner,
            stats=stats,
            scenario=scenario,
            runhistory=runhistory,
            aggregate_func=average_cost,
            acq_func_name=acq_func,
            model_target_names=model_target_names,
            logging_directory=trajectory_path,
            double_intensification=double_intensification,
            constant_pipeline_steps=constant_pipeline_steps,
            variable_pipeline_steps=variable_pipeline_steps,
            cached_pipeline_steps=cached_pipeline_steps,
            intensification_instances=intensification_instances,
            num_marginalized_configurations_by_random_search=20,
            num_configs_for_marginalization=40,
            random_splitting_number=random_splitting_number,
            random_splitting_enabled=random_splitting_enabled)
示例#2
0
 def setUp(self):
     self.scenario = Scenario({'cs': test_helpers.get_branin_config_space(),
                               'run_obj': 'quality',
                               'output_dir': ''})
def smac():
    # Build Configuration Space which defines all parameters and their ranges
    configuration_space = ConfigurationSpace()

    rate_of_learning = UniformFloatHyperparameter(
        "rate_of_learning",
        hyperparameter_values_dic['rate_of_learning'][0],
        hyperparameter_values_dic['rate_of_learning'][1],
        default_value=hyperparameter_values_dic['rate_of_learning'][0])
    # rate_of_decay = UniformFloatHyperparameter("rate_of_decay", hyperparameter_values_dic['rate_of_decay'][0],
    #                                               hyperparameter_values_dic['rate_of_decay'][1],
    #                                               default_value=hyperparameter_values_dic['rate_of_decay'][1])
    cell_dimension = UniformIntegerHyperparameter(
        "cell_dimension",
        hyperparameter_values_dic['cell_dimension'][0],
        hyperparameter_values_dic['cell_dimension'][1],
        default_value=hyperparameter_values_dic['cell_dimension'][0])
    no_hidden_layers = UniformIntegerHyperparameter(
        "num_hidden_layers",
        hyperparameter_values_dic['num_hidden_layers'][0],
        hyperparameter_values_dic['num_hidden_layers'][1],
        default_value=hyperparameter_values_dic['num_hidden_layers'][0])
    minibatch_size = UniformIntegerHyperparameter(
        "minibatch_size",
        hyperparameter_values_dic['minibatch_size'][0],
        hyperparameter_values_dic['minibatch_size'][1],
        default_value=hyperparameter_values_dic['minibatch_size'][0])
    max_epoch_size = UniformIntegerHyperparameter(
        "max_epoch_size",
        hyperparameter_values_dic['max_epoch_size'][0],
        hyperparameter_values_dic['max_epoch_size'][1],
        default_value=hyperparameter_values_dic['max_epoch_size'][0])
    max_num_of_epochs = UniformIntegerHyperparameter(
        "max_num_epochs",
        hyperparameter_values_dic['max_num_epochs'][0],
        hyperparameter_values_dic['max_num_epochs'][1],
        default_value=hyperparameter_values_dic['max_num_epochs'][0])
    l2_regularization = UniformFloatHyperparameter(
        "l2_regularization",
        hyperparameter_values_dic['l2_regularization'][0],
        hyperparameter_values_dic['l2_regularization'][1],
        default_value=hyperparameter_values_dic['l2_regularization'][0])
    gaussian_noise_stdev = UniformFloatHyperparameter(
        "gaussian_noise_stdev",
        hyperparameter_values_dic['gaussian_noise_stdev'][0],
        hyperparameter_values_dic['gaussian_noise_stdev'][1],
        default_value=hyperparameter_values_dic['gaussian_noise_stdev'][0])
    random_normal_initializer_stdev = UniformFloatHyperparameter(
        "random_normal_initializer_stdev",
        hyperparameter_values_dic['random_normal_initializer_stdev'][0],
        hyperparameter_values_dic['random_normal_initializer_stdev'][1],
        default_value=hyperparameter_values_dic[
            'random_normal_initializer_stdev'][0])

    # add the hyperparameter for learning rate only if the  optimization is not cocob
    if optimizer == "cocob":
        configuration_space.add_hyperparameters([
            cell_dimension, no_hidden_layers, minibatch_size, max_epoch_size,
            max_num_of_epochs, l2_regularization, gaussian_noise_stdev,
            random_normal_initializer_stdev
        ])
    else:

        configuration_space.add_hyperparameters([
            rate_of_learning, cell_dimension, no_hidden_layers, minibatch_size,
            max_epoch_size, max_num_of_epochs, l2_regularization,
            gaussian_noise_stdev, random_normal_initializer_stdev
        ])

    # creating the scenario object
    scenario = Scenario({
        "run_obj": "quality",
        "runcount-limit": hyperparameter_tuning_configs.SMAC_RUNCOUNT_LIMIT,
        "cs": configuration_space,
        "deterministic": "true",
        "abort_on_first_run_crash": "false"
    })

    # optimize using an SMAC object
    smac = SMAC(scenario=scenario,
                rng=np.random.RandomState(seed),
                tae_runner=train_model_smac)

    incumbent = smac.optimize()
    smape_error = train_model_smac(incumbent)

    print("Optimized configuration: {}".format(incumbent))
    print("Optimized Value: {}\n".format(smape_error))
    return incumbent.get_dictionary()
示例#4
0
beta1 = UniformFloatHyperparameter("beta1",0.5,0.99,default_value=0.9)
cs.add_hyperparameters([lr,beta1])

def kmnist_from_cfg(cfg):
    cfg = {k : cfg[k] for k in cfg if cfg[k]}
    lr = cfg["lr"]
    beta1 = cfg["beta1"]
    model = CNN()
    val_accuracy = train(model, lr, beta1, trainDataloader, valDataloader, epochs)
    return 1 - val_accuracy  # Minimize


# Scenario object
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                     "runcount-limit": 200,  # maximum function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true"
                     })

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
            tae_runner=kmnist_from_cfg)
smac.solver.intensifier.tae_runner.use_pynisher = False

incumbent = smac.optimize()


inc_value = kmnist_from_cfg(incumbent)

print("Optimized Value: %.2f" % (inc_value))
示例#5
0
    def get_tuned_config(self,
                         scenario: ASlibScenario,
                         runcount_limit: int = 42,
                         wallclock_limit: int = 300,
                         autofolio_config: dict = dict(),
                         seed: int = 42):
        '''
            uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand
            runcount_limit: int
                runcount_limit for SMAC scenario
            wallclock_limit: int
                wallclock limit in sec for SMAC scenario
                (overwritten by autofolio_config)
            autofolio_config: dict, or None
                An optional dictionary of configuration options
            seed: int
                random seed for SMAC

            Returns
            -------
            Configuration
                best incumbent configuration found by SMAC
        '''

        wallclock_limit = autofolio_config.get("wallclock_limit",
                                               wallclock_limit)
        runcount_limit = autofolio_config.get("runcount_limit", runcount_limit)

        taf = functools.partial(self.called_by_smac, scenario=scenario)
        max_fold = scenario.cv_data.max().max()
        max_fold = int(max_fold)

        ac_scenario = Scenario({
            "run_obj":
            "quality",  # we optimize quality
            "runcount-limit":
            runcount_limit,
            "cs":
            self.cs,  # configuration space
            "deterministic":
            "true",
            "instances": [[str(i)] for i in range(1, max_fold + 1)],
            "wallclock-limit":
            wallclock_limit,
            "output-dir":
            "" if not autofolio_config.get("output-dir", None) else
            autofolio_config.get("output-dir")
        })

        # necessary to use stats options related to scenario information
        AC_Stats.scenario = ac_scenario

        # Optimize
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
        )
        self.logger.info("Start Configuration")
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
        )
        smac = SMAC(scenario=ac_scenario,
                    tae_runner=taf,
                    rng=np.random.RandomState(seed))
        incumbent = smac.optimize()

        self.logger.info("Final Incumbent: %s" % (incumbent))

        return incumbent
示例#6
0
# Or we can add multiple hyperparameters at once:
num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default_value=10)
max_features = UniformIntegerHyperparameter("max_features", 1, boston.data.shape[1], default_value=1)
min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf", 0.0, 0.5, default_value=0.0)
criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse")
min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default_value=2)
min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf", 1, 20, default_value=1)
max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default_value=100)

cs.add_hyperparameters([num_trees, min_weight_frac_leaf, criterion,
        max_features, min_samples_to_split, min_samples_in_leaf, max_leaf_nodes])

# SMAC scenario oject
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternative runtime)
                     "runcount-limit": 50,  # maximum number of function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true",
                     "memory_limit": 3072,   # adapt this to reasonable value for your hardware
                     })

# To optimize, we pass the function to the SMAC-object
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
            tae_runner=rf_from_cfg)

# Example call of the function with default values
# It returns: Status, Cost, Runtime, Additional Infos
def_value = smac.get_tae_runner().run(cs.get_default_configuration(), 1)[1]
print("Value for default configuration: %.2f" % (def_value))

# Start optimization
try:
    incumbent = smac.optimize()
示例#7
0
          "Start this script with one of the following arguments in a suitable python-environment (that fulfills CAVE's requirements):\n"
          "'--generate' will generate suitable test-cases using SMAC-optimization \n"
          "'--cave'     will analyze the results of the generate-option using cave \n"
          "'--clean'    will delete previous results \n"
          "'--firefox'  will open all reports in firefox.")

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    #logging.basicConfig(level=logging.INFO)

    if len(sys.argv) < 2:
        print_help()
    elif sys.argv[1] == '--generate':
        generate_bohb_data()
        for scen in get_scenarios():
            scenario = Scenario(scen)
            smac = SMAC4AC(scenario=scenario, rng=np.random.RandomState(42))
            smac.optimize()
    elif sys.argv[1] == '--cave':
        failed = []
        for scen in get_scenarios():
            try:
                folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0]
                cave = CAVE([os.path.join(scen['output_dir'], folder)],
                            os.path.join(scen['output_dir'], 'CAVE_RESULT'),
                            ta_exec_dir=['.'], validation_method='validation')
                cave.analyze({'fANOVA' : False, 'number_quantiles' : 2})
            except:
                raise
                failed.append(scen['output_dir'])
        print("Failed: %s" % (str(failed)))
示例#8
0
 def test_Exception(self):
     with self.assertRaises(TypeError):
         _ = Scenario(['a', 'b'])
示例#9
0
def runhistory_builder(ta,scenario_dic,rng):

    tae_runner  = ExecuteTARun(ta=ta)
    scenario = Scenario(scenario_dic)
    stats = Stats(scenario=scenario)
    traj_logger = TrajLogger(stats=stats,output_dir="/home/dfki/Desktop/temp")

    # if tae_runner.stats is None:
    #     new_smac =SMAC(scenario=scenario,tae_runner=tae_runner)
    #     tae_runner.stats = new_smac.stats

    stats.start_timing()
    deful_config_builder = DefaultConfiguration(tae_runner,scenario,stats,traj_logger,rng)
    config_milad =deful_config_builder._select_configuration()
    config_milad._values = None
    config_milad._values = {'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 10, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.01}
    # config_milad._values = {'balancing:strategy': 'none',
    #  'categorical_encoding:__choice__': 'no_encoding',
    #  'classifier:__choice__': 'random_forest',
    #  'imputation:strategy': 'mean',
    #  'preprocessor:__choice__': 'pca',
    #  'preprocessor:copy':True,
    #  'preprocessor:iterated_power':'auto',
    #  'preprocessor:n_components':'None',
    #  'preprocessor:random_state':'None',
    #  'preprocessor:svd_solver':'auto',
    #  'preprocessor:tol':0.0,
    #  'preprocessor:whiten':'False',
    #  'rescaling:__choice__': 'None',
    #  'classifier:random_forest:bootstrap': 'True',
    #  'classifier:random_forest:class_weight': 'None',
    #  'classifier:random_forest:criterion': 'gini',
    #  'classifier:random_forest:max_depth': 'None',
    #  'classifier:random_forest:max_features': 'auto',
    #  'classifier:random_forest:max_leaf_nodes': 'None',
    #  'classifier:random_forest:min_impurity_decrease': 0.0,
    #  'classifier:random_forest:min_impurity_split': '1e-07',
    #  'classifier:random_forest:min_samples_leaf': 1,
    #  'classifier:random_forest:min_samples_split': 2,
    #  'classifier:random_forest:min_weight_fraction_leaf': 0.0,
    #  'classifier:random_forest:n_estimators': 10,
    #  'classifier:random_forest:n_jobs': 1,
    #  'classifier:random_forest:oob_score': 'False',
    #  'classifier:random_forest:random_state': 'None',
    #  'classifier:random_forest:verbose': 0,
    #  'classifier:random_forest:warm_start': 'False',
    # }
    # config_milad._vector =None


    status, cost, runtime, additional_info = tae_runner.start(config=config_milad,instance=None)



    print(status, cost, runtime, additional_info)

    runhistory = RunHistory(aggregate_func=average_cost)
    runhistory.add( config=config_milad,
                    cost=cost,
                    time=runtime,
                    status=status,
                    instance_id=None,
                    additional_info=additional_info)

    return runhistory
示例#10
0
    def test_write(self):
        """ Test whether a reloaded scenario still holds all the necessary
        information. A subset of parameters might change, such as the paths to
        pcs- or instance-files, so they are checked manually. """
        def check_scen_eq(scen1, scen2):
            print('check_scen_eq')
            """ Customized check for scenario-equality, ignoring file-paths """
            for name in scen1._arguments:
                dest = scen1._arguments[name]['dest']
                name = dest if dest else name  # if 'dest' is None, use 'name'
                if name in [
                        "pcs_fn", "train_inst_fn", "test_inst_fn",
                        "feature_fn", "output_dir"
                ]:
                    continue  # Those values are allowed to change when writing to disk
                elif name == 'cs':
                    # Using repr because of cs-bug
                    # (https://github.com/automl/ConfigSpace/issues/25)
                    self.assertEqual(repr(scen1.cs), repr(scen2.cs))
                elif name == 'feature_dict':
                    self.assertEqual(len(scen1.feature_dict),
                                     len(scen2.feature_dict))
                    for key in scen1.feature_dict:
                        self.assertTrue(
                            (scen1.feature_dict[key] == scen2.feature_dict[key]
                             ).all())
                else:
                    print(name, getattr(scen1, name), getattr(scen2, name))
                    self.assertEqual(getattr(scen1, name),
                                     getattr(scen2, name))

        # First check with file-paths defined
        feature_filename = 'test/test_files/scenario_test/features_multiple.txt'
        feature_filename = os.path.abspath(feature_filename)
        self.test_scenario_dict['feature_file'] = feature_filename
        scenario = Scenario(self.test_scenario_dict)
        # This injection would usually happen by the facade object!
        scenario.output_dir_for_this_run = scenario.output_dir
        scenario.write()
        path = os.path.join(scenario.output_dir, 'scenario.txt')
        scenario_reloaded = Scenario(path)
        check_scen_eq(scenario, scenario_reloaded)
        # Test whether json is the default pcs_fn
        self.assertTrue(
            os.path.exists(os.path.join(scenario.output_dir, 'param.pcs')))
        self.assertTrue(
            os.path.exists(os.path.join(scenario.output_dir, 'param.json')))
        self.assertEqual(scenario_reloaded.pcs_fn,
                         os.path.join(scenario.output_dir, 'param.json'))

        # Now create new scenario without filepaths
        self.test_scenario_dict.update({
            'paramfile': None,
            'cs': scenario.cs,
            'feature_file': None,
            'features': scenario.feature_dict,
            'feature_names': scenario.feature_names,
            'instance_file': None,
            'instances': scenario.train_insts,
            'test_instance_file': None,
            'test_instances': scenario.test_insts
        })
        logging.debug(scenario_reloaded)
        scenario_no_fn = Scenario(self.test_scenario_dict)
        scenario_reloaded = Scenario(path)
        check_scen_eq(scenario_no_fn, scenario_reloaded)
        # Test whether json is the default pcs_fn
        self.assertTrue(
            os.path.exists(os.path.join(scenario.output_dir, 'param.pcs')))
        self.assertTrue(
            os.path.exists(os.path.join(scenario.output_dir, 'param.json')))
        self.assertEqual(scenario_reloaded.pcs_fn,
                         os.path.join(scenario.output_dir, 'param.json'))
示例#11
0
 def test_no_output_dir(self):
     self.test_scenario_dict['output_dir'] = ""
     scenario = Scenario(self.test_scenario_dict)
     self.assertFalse(scenario.out_writer.write_scenario_file(scenario))
示例#12
0
    def test_merge_foreign_data(self):
        ''' test smac.utils.merge_foreign_data '''

        scenario = Scenario(self.test_scenario_dict)
        scenario_2 = Scenario(self.test_scenario_dict)
        scenario_2.feature_dict = {"inst_new": [4]}

        # init cs
        cs = ConfigurationSpace()
        cs.add_hyperparameter(
            UniformIntegerHyperparameter(name='a', lower=0, upper=100))
        cs.add_hyperparameter(
            UniformIntegerHyperparameter(name='b', lower=0, upper=100))
        # build runhistory
        rh_merge = RunHistory()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        rh_merge.add(config=config,
                     instance_id="inst_new",
                     cost=10,
                     time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # "d" is an instance in <scenario>
        rh_merge.add(config=config,
                     instance_id="d",
                     cost=5,
                     time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # build empty rh
        rh_base = RunHistory()

        merge_foreign_data(scenario=scenario,
                           runhistory=rh_base,
                           in_scenario_list=[scenario_2],
                           in_runhistory_list=[rh_merge])

        # both runs should be in the runhistory
        # but we should not use the data to update the cost of config
        self.assertTrue(len(rh_base.data) == 2)
        self.assertTrue(np.isnan(rh_base.get_cost(config)))

        # we should not get direct access to external run data
        runs = rh_base.get_runs_for_config(config,
                                           only_max_observed_budget=True)
        self.assertTrue(len(runs) == 0)

        rh_merge.add(config=config,
                     instance_id="inst_new_2",
                     cost=10,
                     time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        self.assertRaises(
            ValueError, merge_foreign_data, **{
                "scenario": scenario,
                "runhistory": rh_base,
                "in_scenario_list": [scenario_2],
                "in_runhistory_list": [rh_merge]
            })
示例#13
0
    def fit(self, X_train, y_train, **fit_params):
        data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv,
                                               self.estimator.is_classifier())
        self.search_space: ConfigurationSpace = get_smac_space(
            self.estimator,
            lale_num_grids=self.lale_num_grids,
            data_schema=data_schema)
        # Scenario object
        scenario_options = {
            "run_obj": "quality",  # optimize quality (alternatively runtime)
            "runcount-limit": self.max_evals,  # maximum function evaluations
            "cs": self.search_space,  # configuration space
            "deterministic": "true",
            "abort_on_first_run_crash": False,
        }
        if self.max_opt_time is not None:
            scenario_options["wallclock_limit"] = self.max_opt_time
        self.scenario = Scenario(scenario_options)

        self.cv = check_cv(self.cv,
                           y=y_train,
                           classifier=self.estimator.is_classifier())

        def smac_train_test(trainable, X_train, y_train):
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    trainable,
                    X_train,
                    y_train,
                    cv=self.cv,
                    scoring=self.scoring)
                logger.debug("Successful trial of SMAC")
            except BaseException as e:
                # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    (
                        X_train_part,
                        X_validation,
                        y_train_part,
                        y_validation,
                    ) = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part,
                                            **fit_params)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score = scorer(trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug("Error {} with pipeline:{}".format(
                        e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def f(trainable):
            return_dict = {}
            try:
                score, logloss, execution_time = smac_train_test(
                    trainable, X_train=X_train, y_train=y_train)
                return_dict = {
                    "loss": self.best_score - score,
                    "time": execution_time,
                    "log_loss": logloss,
                }
            except BaseException as e:
                logger.warning(
                    f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT."
                )
                raise e
            return return_dict["loss"]

        try:
            smac = orig_SMAC(
                scenario=self.scenario,
                rng=np.random.RandomState(42),
                tae_runner=lale_op_smac_tae(self.estimator, f),
            )
            incumbent = smac.optimize()
            self.trials = smac.get_runhistory()
            trainable = lale_trainable_op_from_config(self.estimator,
                                                      incumbent)
            # get the trainable corresponding to the best params and train it on the entire training dataset.
            trained = trainable.fit(X_train, y_train, **fit_params)
            self._best_estimator = trained
        except BudgetExhaustedException:
            logger.warning(
                "Maximum alloted optimization time exceeded. Optimization exited prematurely"
            )
        except BaseException as e:
            logger.warning("Error during optimization: {}".format(e))
            self._best_estimator = None

        return self
示例#14
0
                          default=logging.INFO,
                          choices=["INFO", "DEBUG"],
                          help="verbose level")

    args_, misc = parser.parse_known_args()

    # remove leading '-' in option names
    misc = dict(
        (k.lstrip("-"), v.strip("'")) for k, v in zip(misc[::2], misc[1::2]))

    if args_.verbose_level == "INFO":
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.DEBUG)

    scenario = Scenario(args_.scenario)
    traj_logger = TrajLogger(None, Stats(scenario))
    trajectory = traj_logger.read_traj_aclib_format(args_.trajectory,
                                                    scenario.cs)
    if args_.tae == "old":
        tae = ExecuteTARunOld(ta=scenario.ta,
                              run_obj=scenario.run_obj,
                              par_factor=scenario.par_factor,
                              cost_for_crash=scenario.cost_for_crash)
    if args_.tae == "aclib":
        tae = ExecuteTARunAClib(ta=scenario.ta,
                                run_obj=scenario.run_obj,
                                par_factor=scenario.par_factor,
                                cost_for_crash=scenario.cost_for_crash)

    validator = Validator(scenario, trajectory, args_.output, args_.seed)
示例#15
0
文件: validate.py 项目: maxc01/SMAC3
    def validate(
        self,
        config_mode: Union[str, typing.List[Configuration]] = 'def',
        instance_mode: Union[str, typing.List[str]] = 'test',
        repetitions: int = 1,
        n_jobs: int = 1,
        backend: str = 'threading',
        runhistory: RunHistory = None,
        tae: ExecuteTARun = None,
        output_fn: typing.Optional[str] = None,
    ) -> RunHistory:
        """
        Validate configs on instances and save result in runhistory.
        If a runhistory is provided as input it is important that you run it on the same/comparable hardware.

        side effect: if output is specified, saves runhistory to specified
        output directory.

        Parameters
        ----------
        config_mode: str or list<Configuration>
            string or directly a list of Configuration.
            string from [def, inc, def+inc, wallclock_time, cpu_time, all].
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
            with max_time being the highest recorded time
        instance_mode: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of repetitions in nondeterministic algorithms
        n_jobs: int
            number of parallel processes used by joblib
        backend: str
            what backend joblib should use for parallel runs
        runhistory: RunHistory
            optional, RunHistory-object to reuse runs
        tae: ExecuteTARun
            tae to be used. if None, will initialize ExecuteTARunOld
        output_fn: str
            path to runhistory to be saved. if the suffix is not '.json', will
            be interpreted as directory and filename will be
            'validated_runhistory.json'

        Returns
        -------
        runhistory: RunHistory
            runhistory with validated runs
        """
        self.logger.debug(
            "Validating configs '%s' on instances '%s', repeating %d times"
            " with %d parallel runs on backend '%s'.", config_mode,
            instance_mode, repetitions, n_jobs, backend)

        # Get all runs to be evaluated as list
        runs, validated_rh = self._get_runs(config_mode, instance_mode,
                                            repetitions, runhistory)

        # Create new Stats without limits
        inf_scen = Scenario({
            'run_obj': self.scen.run_obj,
            'cutoff_time':
            self.scen.cutoff,  # type: ignore[attr-defined] # noqa F821
            'output_dir': ""
        })
        inf_stats = Stats(inf_scen)
        inf_stats.start_timing()

        # Create TAE
        if not tae:
            tae = ExecuteTARunOld(
                ta=self.scen.ta,  # type: ignore[attr-defined] # noqa F821
                runhistory=runhistory,
                stats=inf_stats,
                run_obj=self.scen.run_obj,
                par_factor=self.scen.
                par_factor,  # type: ignore[attr-defined] # noqa F821
                cost_for_crash=self.scen.cost_for_crash
            )  # type: ignore[attr-defined] # noqa F821
        else:
            # Inject endless-stats
            tae.stats = inf_stats

        # Validate!
        run_results = self._validate_parallel(tae, runs, n_jobs, backend)
        assert len(run_results) == len(runs), (run_results, runs)

        # tae returns (status, cost, runtime, additional_info)
        # Add runs to RunHistory
        for run, result in zip(runs, run_results):
            validated_rh.add(config=run.config,
                             cost=result[1],
                             time=result[2],
                             status=result[0],
                             instance_id=run.inst,
                             seed=run.seed,
                             additional_info=result[3])

        self._save_results(validated_rh,
                           output_fn,
                           backup_fn="validated_runhistory.json")
        return validated_rh
示例#16
0
def main():

    try:
        cmd_args, _ = get_common_cmd_args()

        output_basedir = cmd_args.output_basedir
        model_name = cmd_args.model_name
        if model_name == "resnet20":
            cfg2funcparams = cfg2funcparams_nas_resnet20
            get_cs = get_cs_nas_resnet20
        else:
            raise ValueError(f"model name {model_name} is wrong")

        logger = logging.getLogger(f"SMAC-NAS-{model_name}")
        logger.setLevel(logging.DEBUG)

        expid = get_experiment_id(6)
        output_dir = os.path.join(output_basedir, "SMAC", model_name, expid)
        os.makedirs(output_dir, exist_ok=True)
        log_path = os.path.join(output_dir, f"SMAC-NAS-{model_name}.log")
        setup_logger(logger, log_path)

        logger.info(f"Experiment {expid} starts...")
        logger.info("Experiment Configuration:")
        logger.info(vars(cmd_args))

        def obj_func(cfg):
            logger.info("Starting BO iteration")
            params = cfg2funcparams(cfg)
            obj_info = nas_train_test(cmd_args, params, logger, model_name=model_name)
            logger.info("Finishing BO iteration")
            logger.info(params)
            logger.info(obj_info)

            all_info = {
                "params": params,
                "obj_info": obj_info,
            }
            fn_path = os.path.join(output_dir, "smac_iter_hists.txt")
            with open(fn_path, "a") as f:
                json.dump(all_info, f)
                f.write("\n")

            return obj_info["value"]

        # smac default do minimize
        cs = get_cs()
        scenario = Scenario(
            {
                "run_obj": "quality",  # we optimize quality (alternatively runtime)
                "runcount_limit": 100,  # maximum function evaluations
                "cs": cs,  # configuration space
                "deterministic": "true",
                "initial_incumbent": "LHD",
            }
        )

        smac = SMAC4HPO(scenario=scenario, tae_runner=obj_func,)

        incumbent = smac.optimize()
        print(incumbent)

    except KeyboardInterrupt:
        print("Interrupted. You pressed Ctrl-C!!!")
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)
示例#17
0
    def main_cli(
        self,
        commandline_arguments: typing.Optional[typing.List[str]] = None
    ) -> None:
        """Main function of SMAC for CLI interface"""
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        kwargs = {}
        if commandline_arguments:
            kwargs['commandline_arguments'] = commandline_arguments
        main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs)

        root_logger = logging.getLogger()
        root_logger.setLevel(main_args_.verbose_level)
        logger_handler = logging.StreamHandler(stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter("%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:\t%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        if len(root_logger.handlers) > 1:
            root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scenario = {}
        scenario.update(vars(smac_args_))
        scenario.update(vars(scen_args_))
        scen = Scenario(scenario=scenario)

        # Restore state
        if main_args_.restore_state:
            root_logger.debug("Restoring state from %s...",
                              main_args_.restore_state)
            restore_state = main_args_.restore_state
            rh, stats, traj_list_aclib, traj_list_old = self.restore_state(
                scen, restore_state)

            scen.output_dir_for_this_run = create_output_directory(
                scen,
                main_args_.seed,
                root_logger,
            )
            scen.write()
            incumbent = self.restore_state_after_output_dir(
                scen, stats, traj_list_aclib, traj_list_old)

        if main_args_.warmstart_runhistory:
            rh = RunHistory()

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=main_args_.warmstart_scenario,
                in_runhistory_fn_list=main_args_.warmstart_runhistory,
                cs=scen.cs,  # type: ignore[attr-defined] # noqa F821
            )

        if main_args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()
                               ]  # type: ignore[attr-defined] # noqa F821
            for traj_fn in main_args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(
                    fn=traj_fn,
                    cs=scen.cs,  # type: ignore[attr-defined] # noqa F821
                )
                initial_configs.append(trajectory[-1]["incumbent"])

        if main_args_.mode == "SMAC4AC":
            optimizer = SMAC4AC(scenario=scen,
                                rng=np.random.RandomState(main_args_.seed),
                                runhistory=rh,
                                initial_configurations=initial_configs,
                                stats=stats,
                                restore_incumbent=incumbent,
                                run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4HPO":
            optimizer = SMAC4HPO(scenario=scen,
                                 rng=np.random.RandomState(main_args_.seed),
                                 runhistory=rh,
                                 initial_configurations=initial_configs,
                                 stats=stats,
                                 restore_incumbent=incumbent,
                                 run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4BB":
            optimizer = SMAC4BB(scenario=scen,
                                rng=np.random.RandomState(main_args_.seed),
                                runhistory=rh,
                                initial_configurations=initial_configs,
                                stats=stats,
                                restore_incumbent=incumbent,
                                run_id=main_args_.seed)
        elif main_args_.mode == "ROAR":
            optimizer = ROAR(scenario=scen,
                             rng=np.random.RandomState(main_args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             run_id=main_args_.seed)
        elif main_args_.mode == "Hydra":
            optimizer = Hydra(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed,
                random_configuration_chooser=main_args_.
                random_configuration_chooser,
                n_iterations=main_args_.hydra_iterations,
                val_set=main_args_.hydra_validation,
                incs_per_round=main_args_.hydra_incumbents_per_round,
                n_optimizers=main_args_.hydra_n_optimizers)
        elif main_args_.mode == "PSMAC":
            optimizer = PSMAC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                run_id=main_args_.seed,
                shared_model=smac_args_.shared_model,
                validate=main_args_.psmac_validate,
                n_optimizers=main_args_.hydra_n_optimizers,
                n_incs=main_args_.hydra_incumbents_per_round,
            )
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)
示例#18
0
#     inc_value = mysmac_from_cfg(incumbent)

#     print("Optimized Value: %.2f" % (inc_value))

#     # We can also validate our results (though this makes a lot more sense with instances)
#     smac.validate(config_mode='inc',  # We can choose which configurations to evaluate
#                   # instance_mode='train+test',  # Defines what instances to validate
#                   repetitions=3,  # Ignored, unless you set "deterministic" to "false" in line 95
#                   n_jobs=1)  # How many cores to use in parallel for optimization
   ##########################SMAC------end---------------##############################
    # SMAC scenario object
    scenario = Scenario({"run_obj": "quality",  # we optimize quality (alternative to runtime)
                         "wallclock-limit": 40,  #100 max duration to run the optimization (in seconds)
                         "cs": cs,  # configuration space
                         "deterministic": "true",
                         "limit_resources": True,  # Uses pynisher to limit memory and runtime
                         # Alternatively, you can also disable this.
                         # Then you should handle runtime and memory yourself in the TA
                         "cutoff": 15,  #30 runtime limit for target algorithm
                         "memory_limit": 307,  # 3072adapt this to reasonable value for your hardware
                         })

    # max budget for hyperband can be anything. Here, we set it to maximum no. of epochs to train the MLP for
    max_iters = 15
    # intensifier parameters
    intensifier_kwargs = {'initial_budget': 5, 'max_budget': max_iters, 'eta': 3}
    # To optimize, we pass the function to the SMAC-object
    smac = BOHB4HPO(scenario=scenario, rng=np.random.RandomState(42),
                    tae_runner=mysmac_from_cfg,
                    intensifier_kwargs=intensifier_kwargs)  # all arguments related to intensifier can be passed like this
    
    # Example call of the function with default values
示例#19
0
 def setUp(self):
     self.output_dirs = []
     fn = os.path.join(os.path.dirname(__file__), '../test_files/spear_hydra_test_scenario.txt')
     self.scenario = Scenario(fn)
示例#20
0
def main():
    parser = argparse.ArgumentParser(description='Dump data of a log.')
    parser.add_argument('--dataset',
                        type=str,
                        default='labelme',
                        help='dataset to run smac on')
    parser.add_argument('--m',
                        type=int,
                        default=8,
                        help=' number of codebooks')

    args = parser.parse_args()

    # Fixed parameters
    dataset = CategoricalHyperparameter("dataset", [args.dataset],
                                        default_value=args.dataset)
    m = CategoricalHyperparameter("m", [str(args.m)],
                                  default_value=str(args.m))

    # Build Configuration Space which defines all parameters and their ranges
    ilsiter = UniformIntegerHyperparameter("ilsiter", 1, 16, default_value=8)
    npert = UniformIntegerHyperparameter("npert",
                                         0,
                                         args.m - 1,
                                         default_value=4)
    randord = CategoricalHyperparameter("randord", ["true", "false"],
                                        default_value="true")

    # SR parameters
    sr_method = CategoricalHyperparameter("SR_method", ["LSQ", "SR_C", "SR_D"],
                                          default_value="SR_D")
    schedule = CategoricalHyperparameter("schedule", ["1", "2", "3"],
                                         default_value="1")
    p = UniformFloatHyperparameter("p", 0.1, 1., default_value=0.5)

    # Schedule and p only make sense in SR
    use_schedule = InCondition(child=schedule,
                               parent=sr_method,
                               values=["SR_C", "SR_D"])
    use_p = InCondition(child=p, parent=sr_method, values=["SR_C", "SR_D"])

    cs = ConfigurationSpace()
    cs.add_hyperparameters(
        [dataset, m, ilsiter, npert, randord, sr_method, schedule, p])
    cs.add_conditions([use_schedule, use_p])

    # Scenario object
    scenario = Scenario({
        "run_obj": "quality",  # we optimize quality (alternatively runtime)
        "runcount-limit": 200,  # maximum function evaluations
        "cs": cs,  # configuration space
        "deterministic": "false"
    })

    # Optimize, using a SMAC-object
    thing_to_call = AbstractTAFunc(recall_from_cfg, use_pynisher=False)
    smac = SMAC(scenario=scenario,
                rng=np.random.RandomState(42),
                tae_runner=thing_to_call)

    print("Optimizing!")
    incumbent = smac.optimize()
    inc_value = recall_from_cfg(incumbent)
    print("Optimized Value: %.2f" % (inc_value))
示例#21
0
def centroid(n_eval, random_seed_pair):
    name_tag = 'centroid_' + datetime.now().strftime("%Y-%m-%d-%H:%M:%S:%f")
    cs = ConfigurationSpace()
    for i in range(CENTROID_N_EDGES):
        car_var = CategoricalHyperparameter(
            'x' + str(i + 1).zfill(2),
            [str(elm) for elm in range(CENTROID_N_CHOICE)],
            default_value='0')
        cs.add_hyperparameter(car_var)

    init_points_numpy = sample_init_points([CENTROID_N_CHOICE] *
                                           CENTROID_N_EDGES, 20,
                                           random_seed_pair[1]).long().numpy()
    init_points = []
    for i in range(init_points_numpy.shape[0]):
        init_points.append(
            Configuration(
                cs, {
                    'x' + str(j + 1).zfill(2): str(init_points_numpy[i][j])
                    for j in range(CENTROID_N_EDGES)
                }))

    evaluator = Centroid(random_seed_pair)
    interaction_list = evaluator.interaction_list
    covariance_list = evaluator.covariance_list
    partition_original_list = evaluator.partition_original_list

    def evaluate(x):
        interaction_mixed = edge_choice(
            np.array([
                int(x['x' + str(j + 1).zfill(2)])
                for j in range(CENTROID_N_EDGES)
            ]), interaction_list)
        partition_mixed = partition(interaction_mixed, CENTROID_GRID)
        kld_sum = 0
        for i in range(evaluator.n_ising_models):
            kld = ising_dense(interaction_sparsified=interaction_mixed,
                              interaction_original=interaction_list[i],
                              covariance=covariance_list[i],
                              partition_sparsified=partition_mixed,
                              partition_original=partition_original_list[i],
                              grid_h=CENTROID_GRID[0])
            kld_sum += kld
        return kld_sum / float(evaluator.n_ising_models)

    print('Began    at ' + datetime.now().strftime("%H:%M:%S"))
    scenario = Scenario({
        "run_obj": "quality",
        "runcount-limit": n_eval,
        "cs": cs,
        "deterministic": "true",
        'output_dir': os.path.join(EXP_DIR, name_tag)
    })
    smac = SMAC(scenario=scenario,
                tae_runner=evaluate,
                initial_configurations=init_points)
    smac.optimize()

    evaluations, optimum = evaluations_from_smac(smac)
    print('Finished at ' + datetime.now().strftime("%H:%M:%S"))
    return optimum
示例#22
0
def fmin_smac(func: typing.Callable,
              x0: typing.List[float],
              bounds: typing.List[typing.Iterable[float]],
              maxfun: int = -1,
              rng: typing.Union[np.random.RandomState, int] = None,
              scenario_args: typing.Mapping[str, typing.Any] = None,
              **kwargs):
    """
    Minimize a function func using the SMAC4HPO facade
    (i.e., a modified version of SMAC).
    This function is a convenience wrapper for the SMAC4HPO class.

    Parameters
    ----------
    func : typing.Callable
        Function to minimize.
    x0 : typing.List[float]
        Initial guess/default configuration.
    bounds : typing.List[typing.List[float]]
        ``(min, max)`` pairs for each element in ``x``, defining the bound on
        that parameters.
    maxfun : int, optional
        Maximum number of function evaluations.
    rng : np.random.RandomState, optional
            Random number generator used by SMAC.
    scenario_args: typing.Mapping[str,typing.Any]
        Arguments passed to the scenario
        See smac.scenario.scenario.Scenario
    **kwargs:
        Arguments passed to the optimizer class
        See ~smac.facade.smac_facade.SMAC

    Returns
    -------
    x : list
        Estimated position of the minimum.
    f : float
        Value of `func` at the minimum.
    s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO`
        SMAC objects which enables the user to get
        e.g., the trajectory and runhistory.

    """
    # create configuration space
    cs = ConfigurationSpace()

    # Adjust zero padding
    tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}'

    for idx, (lower_bound, upper_bound) in enumerate(bounds):
        parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1),
                                               lower=lower_bound,
                                               upper=upper_bound,
                                               default_value=x0[idx])
        cs.add_hyperparameter(parameter)

    # create scenario
    scenario_dict = {
        "run_obj": "quality",
        "cs": cs,
        "deterministic": "true",
        "initial_incumbent": "DEFAULT",
    }

    if scenario_args is not None:
        scenario_dict.update(scenario_args)

    if maxfun > 0:
        scenario_dict["runcount_limit"] = maxfun
    scenario = Scenario(scenario_dict)

    smac = SMAC4HPO(scenario=scenario,
                    tae_runner=ExecuteTAFuncArray,
                    tae_runner_kwargs={'ta': func},
                    rng=rng,
                    **kwargs)

    smac.logger = logging.getLogger(smac.__module__ + "." +
                                    smac.__class__.__name__)
    incumbent = smac.optimize()
    config_id = smac.solver.runhistory.config_ids[incumbent]
    run_key = RunKey(config_id, None, 0)
    incumbent_performance = smac.solver.runhistory.data[run_key]
    incumbent = np.array(
        [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))],
        dtype=np.float)
    return incumbent, incumbent_performance.cost, smac
else:
    memory_limit_factor = 2

print('Starting to validate configurations')
for i, entry in enumerate(trajectory):
    print('Starting to validate configuration %d/%d' %
          (i + 1, len(trajectory)))
    incumbent_id = entry.incumbent_id
    train_performance = entry.train_perf
    if incumbent_id not in incumbent_id_to_model:
        config = entry.incumbent

        logger = logging.getLogger('Testing:)')
        stats = Stats(
            Scenario({
                'cutoff_time': per_run_time_limit * 2,
                'run_obj': 'quality',
            }))
        stats.start_timing()
        # To avoid the output "first run crashed"...
        stats.submitted_ta_runs += 1
        stats.finished_ta_runs += 1
        memory_lim = memory_limit_factor * automl_arguments['memory_limit']
        ta = ExecuteTaFuncWithQueue(
            backend=automl.automl_._backend,
            autosklearn_seed=seed,
            resampling_strategy='test',
            memory_limit=memory_lim,
            disable_file_output=True,
            logger=logger,
            stats=stats,
            all_scoring_functions=True,
示例#24
0
    def __init__(
            self,
            scenario: Scenario,
            tae_runner: typing.Union[ExecuteTARun, typing.Callable] = None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            acquisition_function_optimizer: AcquisitionFunctionMaximizer = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            restore_incumbent: Configuration = None,
            rng: typing.Union[np.random.RandomState, int] = None,
            smbo_class: SMBO = None,
            run_id: int = 1,
            hoag: AbstractHOAG = None,
            #server: Server=None,
            bayesian_optimization: bool = False):
        """Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        runhistory : RunHistory
            runhistory to store all algorithm runs
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` if not set.
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        initial_design : InitialDesign
            initial sampling design
        initial_configurations : typing.List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id: int, (default: 1)
            Run ID will be used as subfolder for output_dir.
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        if acquisition_function_optimizer is None:
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                acquisition_function, scenario.cs,
                np.random.RandomState(seed=rng.randint(MAXINT)))
        elif not isinstance(
                acquisition_function_optimizer,
                AcquisitionFunctionMaximizer,
        ):
            raise ValueError(
                "Argument 'acquisition_function_optimizer' must be of type"
                "'AcquisitionFunctionMaximizer', but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(tae_runner=tae_runner,
                                      stats=self.stats,
                                      traj_logger=traj_logger,
                                      rng=rng,
                                      instances=scenario.train_insts,
                                      cutoff=scenario.cutoff,
                                      deterministic=scenario.deterministic,
                                      run_obj_time=scenario.run_obj == "runtime",
                                      always_race_against=scenario.cs.get_default_configuration() \
                                        if scenario.always_race_default else None,
                                      instance_specifics=scenario.instance_specific,
                                      minR=scenario.minR,
                                      maxR=scenario.maxR)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log10(scenario.cutoff)
                threshold = np.log10(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[StatusType.SUCCESS, StatusType.CRASHED],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': num_run,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'hoag': hoag,
            #'server': server,
            'bayesian_optimization': bayesian_optimization
        }
        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
示例#25
0
 def get_smbo(intensification_perc):
     """ Return SMBO with intensification_percentage. """
     scen = Scenario({'cs': test_helpers.get_branin_config_space(),
                      'run_obj': 'quality', 'output_dir': '',
                      'intensification_percentage' : intensification_perc})
     return SMAC(scen, tae_runner=target, rng=1).solver
示例#26
0
def fmin_smac(func: callable,
              x0: list,
              bounds: list,
              maxfun: int=-1,
              maxtime: int=-1,
              rng: np.random.RandomState=None):
    """ Minimize a function func using the SMAC algorithm.
    This function is a convenience wrapper for the SMAC class.

    Parameters
    ----------
    func : callable f(x)
        Function to minimize.
    x0 : list
        Initial guess/default configuration.
    bounds : list
        ``(min, max)`` pairs for each element in ``x``, defining the bound on
        that parameters.
    maxtime : int, optional
        Maximum runtime in seconds.
    maxfun : int, optional
        Maximum number of function evaluations.
    rng : np.random.RandomState, optional
            Random number generator used by SMAC.

    Returns
    -------
    x : list
        Estimated position of the minimum.
    f : float
        Value of `func` at the minimum.
    s : :class:`smac.facade.smac_facade.SMAC`
        SMAC objects which enables the user to get
        e.g., the trajectory and runhistory.
    """
    # create configuration space
    cs = ConfigurationSpace()
    for idx, (lower_bound, upper_bound) in enumerate(bounds):
        parameter = UniformFloatHyperparameter(name="x%d" % (idx + 1),
                                               lower=lower_bound,
                                               upper=upper_bound,
                                               default_value=x0[idx])
        cs.add_hyperparameter(parameter)

    # Create target algorithm runner
    ta = ExecuteTAFuncArray(ta=func)

    # create scenario
    scenario_dict = {"run_obj": "quality",
                     "cs": cs,
                     "deterministic": "true",
                     "initial_incumbent": "DEFAULT"
                     }
    if maxfun > 0:
        scenario_dict["runcount_limit"] = maxfun
    if maxtime > 0:
        scenario_dict["wallclock_limit"] = maxtime
    scenario = Scenario(scenario_dict)

    smac = SMAC(scenario=scenario, tae_runner=ta, rng=rng)
    smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__)
    incumbent = smac.optimize()

    config_id = smac.solver.runhistory.config_ids[incumbent]
    run_key = RunKey(config_id, None, 0)
    incumbent_performance = smac.solver.runhistory.data[run_key]
    incumbent = np.array([incumbent['x%d' % (idx + 1)]
                          for idx in range(len(bounds))], dtype=np.float)
    return incumbent, incumbent_performance.cost, \
           smac
示例#27
0
                                           1,
                                           50,
                                           default_value=5)

weights = CategoricalHyperparameter("weights", ["uniform", "distance"],
                                    default_value="uniform")

p = UniformIntegerHyperparameter("p", 1, 5, default_value=2)

cs.add_hyperparameters([n_neighbors, weights, p])

# Scenario object
scenario = Scenario({
    "run_obj": "quality",  # we optimize quality (alternatively runtime)
    "runcount-limit":
    500,  # max. number of function evaluations; for this example set to a low number
    "cs": cs,  # configuration space
    "deterministic": "true"
})

# Example call of the function
# It returns: Status, Cost, Runtime, Additional Infos
def_value = kNN_from_cfg(cs.get_default_configuration())
print("Default Value: %.2f" % (def_value))

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC4HPO(scenario=scenario,
                rng=np.random.RandomState(42),
                tae_runner=kNN_from_cfg)
    def dont_test_car_smac(self):
        import numpy as np

        from lale.datasets.auto_weka import fetch_car
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.preprocessing import LabelEncoder
        import pandas as pd
        from lale.lib.weka import J48
        from lalegpl.lib.r import ArulesCBAClassifier 
        from lale.operators import make_pipeline
        from lale.lib.lale import HyperoptClassifier
        from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        from smac.configspace import ConfigurationSpace


        (X_train, y_train), (X_test, y_test) = fetch_car()
        y_name = y_train.name
        le = LabelEncoder()
        y_train = le.fit_transform(y_train)
        y_test = le.transform(y_test)

        y_train = pd.Series(y_train, name=y_name)
        y_test = pd.Series(y_test, name=y_name)

#        planned_pipeline = make_pipeline(J48() | ArulesCBAClassifier() | LogisticRegression() | KNeighborsClassifier())
        planned_pipeline = make_pipeline(ArulesCBAClassifier() | KNeighborsClassifier() | LogisticRegression())

        cs:ConfigurationSpace = get_smac_space(planned_pipeline)
        print(cs)
#        X_train = X_train[0:20]
#        y_train = y_train[0:20]
        # Scenario object
        run_count_limit = 1
        scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                            "runcount-limit": run_count_limit,  # maximum function evaluations
                            "cs": cs,               # configuration space
                            "deterministic": "true",
                            "abort_on_first_run_crash": False
                            })

        # Optimize, using a SMAC-object
        def f_min(op): 
            return test_f_min(op, X_train, y_train, num_folds=2)
        tae = lale_op_smac_tae(planned_pipeline, f_min)

        print("Optimizing! Depending on your machine, this might take a few minutes.")
        smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
                tae_runner=tae)

        incumbent = smac.optimize()
        trainable_pipeline = lale_trainable_op_from_config(planned_pipeline, incumbent)
        trained_pipeline = trainable_pipeline.fit(X_train, y_train)
        pred = trained_pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, pred)
        print("Accuracy: %.2f" % (accuracy))
        inc_value = tae(incumbent)

        print("Optimized Value: %.2f" % (inc_value))
        print(f"Run count limit: {run_count_limit}")
示例#29
0
import numpy as np
import random
from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter

from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

scenario = Scenario({
    "run_obj": "quality",
    "runcount-limit": 200,
    "paramfile": "./searchspace.pcs",
    "deterministic": "true",
    "algo": "python SMACOptimizerClient.py",
    "wallclock-limit": 600
})

print("create smac object")
smac = SMAC(scenario=scenario, rng=np.random.RandomState(42))
print("Start optimization process")
smac.optimize()
print("Optimization done")
示例#30
0
 def test_no_rh_epm(self):
     scen = Scenario(self.scen_fn, cmd_args={'run_obj': 'quality'})
     scen.feature_array = None
     validator = Validator(scen, self.trajectory)
     self.assertRaises(ValueError, validator.validate_epm)