def get_scenario(self): run_1_existed = os.path.exists('run_1') in_reader = InputReader() # Create Scenario (disable output_dir to avoid cluttering) scen_fn = os.path.join(self.folder, 'scenario.txt') scen_dict = in_reader.read_scenario_file(scen_fn) scen_dict['output_dir'] = "" # We always prefer the less error-prone json-format if available: cs_json = os.path.join(self.folder, 'configspace.json') if os.path.exists(cs_json): self.logger.debug("Detected '%s'", cs_json) with open(cs_json, 'r') as fh: pcs_fn = scen_dict.pop('pcs_fn', 'no pcs_fn in scenario') self.logger.debug("Ignoring %s", pcs_fn) scen_dict['cs'] = pcs_json.read(fh.read()) with changedir(self.ta_exec_dir): self.logger.debug("Creating scenario from \"%s\"", self.ta_exec_dir) scen = Scenario(scen_dict) if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1') return scen
def get_scenario(self): run_1_existed = os.path.exists('run_1') in_reader = InputReader() # Create Scenario (disable output_dir to avoid cluttering) scen_fn = os.path.join(self.folder, 'scenario.txt') if not os.path.isfile(scen_fn): scen_fn = self.get_glob_file(self.folder, 'scenario.txt') scen_dict = in_reader.read_scenario_file(scen_fn) scen_dict['output_dir'] = "" with changedir(self.ta_exec_dir): # We always prefer the less error-prone json-format if available: pcs_fn = scen_dict.get('pcs_fn', 'no_pcs_fn') cs_json = os.path.join(os.path.dirname(pcs_fn), 'configspace.json') if not pcs_fn.endswith('.json') and os.path.exists(cs_json): self.logger.debug("Detected, '%s' ignoring '%s'", cs_json, pcs_fn) with open(cs_json, 'r') as fh: scen_dict['cs'] = pcs_json.read(fh.read()) scen_dict['pcs_fn'] = cs_json self.logger.debug("Creating scenario from '%s'", self.ta_exec_dir) scen = Scenario(scen_dict) if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1') return scen
def test_feature_input(self): feature_fn = "test/test_files/features_example.csv" in_reader = InputReader() feats = in_reader.read_instance_features_file(fn=feature_fn) self.assertEqual(feats[0], ["feature1", "feature2", "feature3"]) feats_original = {"inst1": [1.0, 2.0, 3.0], "inst2": [1.5, 2.5, 3.5], "inst3": [1.7, 1.8, 1.9]} for i in feats[1]: self.assertEqual(feats_original[i], list(feats[1][i]))
def __init__(self, folder: str, ta_exec_dir: Union[str, None] = None): """Initialize scenario, runhistory and incumbent from folder, execute init-method of SMAC facade (so you could simply use SMAC-instances instead) Parameters ---------- folder: string output-dir of this run ta_exec_dir: string if the execution directory for the SMAC-run differs from the cwd, there might be problems loading instance-, feature- or PCS-files in the scenario-object. since instance- and PCS-files are necessary, specify the path to the execution-dir of SMAC here """ run_1_existed = os.path.exists('run_1') self.logger = logging.getLogger("cave.SMACrun.{}".format(folder)) in_reader = InputReader() self.folder = folder self.logger.debug("Loading from %s", folder) split_folder = os.path.split(folder) self.logger.info(split_folder) if ta_exec_dir is None: ta_exec_dir = '.' self.scen_fn = os.path.join(folder, 'scenario.txt') self.rh_fn = os.path.join(folder, 'runhistory.json') self.traj_fn = os.path.join(folder, 'traj_aclib2.json') self.traj_old_fn = os.path.join(folder, 'traj_old.csv') # Create Scenario (disable output_dir to avoid cluttering) scen_dict = in_reader.read_scenario_file(self.scen_fn) scen_dict['output_dir'] = "" with changedir(ta_exec_dir): self.scen = Scenario(scen_dict) # Load runhistory and trajectory self.runhistory = RunHistory(average_cost) self.runhistory.update_from_json(self.rh_fn, self.scen.cs) self.traj = TrajLogger.read_traj_aclib_format(fn=self.traj_fn, cs=self.scen.cs) incumbent = self.traj[-1]['incumbent'] self.train_inst = self.scen.train_insts self.test_inst = self.scen.test_insts # Initialize SMAC-object super().__init__(scenario=self.scen, runhistory=self.runhistory) #restore_incumbent=incumbent) # TODO use restore, delete next line self.solver.incumbent = incumbent if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1')
def test_save_load_configspace(self): """Check if inputreader can load different config-spaces""" cs = ConfigurationSpace() hyp = UniformFloatHyperparameter('A', 0.0, 1.0, default_value=0.5) cs.add_hyperparameters([hyp]) output_writer = OutputWriter() input_reader = InputReader() # pcs_new output_writer.save_configspace(cs, self.pcs_fn, 'pcs_new') restored_cs = input_reader.read_pcs_file(self.pcs_fn) self.assertEqual(cs, restored_cs) restored_cs = input_reader.read_pcs_file(self.pcs_fn, self.logger) self.assertEqual(cs, restored_cs) # json output_writer.save_configspace(cs, self.json_fn, 'json') restored_cs = input_reader.read_pcs_file(self.json_fn) self.assertEqual(cs, restored_cs) restored_cs = input_reader.read_pcs_file(self.json_fn, self.logger) self.assertEqual(cs, restored_cs) # pcs with open(self.pcs_fn, 'w') as fh: fh.write(pcs.write(cs)) restored_cs = input_reader.read_pcs_file(self.pcs_fn) self.assertEqual(cs, restored_cs) restored_cs = input_reader.read_pcs_file(self.pcs_fn) self.assertEqual(cs, restored_cs) restored_cs = input_reader.read_pcs_file(self.pcs_fn, self.logger) self.assertEqual(cs, restored_cs)
def get_scenario(self): run_1_existed = os.path.exists('run_1') in_reader = InputReader() # Create Scenario (disable output_dir to avoid cluttering) scen_fn = os.path.join(self.folder, 'scenario.txt') scen_dict = in_reader.read_scenario_file(scen_fn) scen_dict['output_dir'] = "" with changedir(self.ta_exec_dir): self.logger.debug("Creating scenario from \"%s\"", self.ta_exec_dir) scen = Scenario(scen_dict) if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1') self.scen = scen return scen
def restore_state(self, args_, scen, root_logger): # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path = os.path.join(args_.restore_state, "traj_aclib2.json") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, scen.cs) root_logger.debug("Restored runhistory from %s", rh_path) stats = Stats(scen) stats.load(stats_path) root_logger.debug("Restored stats from %s", stats_path) trajectory = TrajLogger.read_traj_aclib_format(fn=traj_path, cs=scen.cs) incumbent = trajectory[-1]["incumbent"] root_logger.debug("Restored incumbent %s from %s", incumbent, traj_path) # Copy traj if output_dir of specified scenario-file is different than # the output_dir of the scenario-file in the folder from which to restore. if scen.output_dir != InputReader().read_scenario_file( scen_path)['output_dir']: new_traj_path = os.path.join(scen.output_dir, "traj_aclib2.json") shutil.copy(traj_path, new_traj_path) root_logger.debug("Copied traj %s", rh_path) return rh, stats, incumbent
def get_scenario(self, path, ta_exec_dir=None, out_path=None): run_1_existed = os.path.exists('run_1') if ta_exec_dir is None: ta_exec_dir = '.' in_reader = InputReader() # Create Scenario scen_fn = os.path.join(path, 'scenario.txt') scen_dict = in_reader.read_scenario_file(scen_fn) scen_dict['output_dir'] = out_path with changedir(ta_exec_dir): self.logger.debug("Creating scenario from \"%s\"", ta_exec_dir) scen = Scenario(scen_dict) if (not run_1_existed) and os.path.exists('run_1'): shutil.rmtree('run_1') return scen
def restore_state_before_scen(self, args_): """Read in files for state-restoration: runhistory, stats, trajectory. """ # Construct dummy-scenario for object-creation (mainly cs is needed) tmp_scen = InputReader().read_scenario_file(args_.scenario_file) tmp_scen = Scenario(tmp_scen, cmd_args={'output_dir': ''}) # Check for folder and files rh_path = os.path.join(args_.restore_state, "runhistory.json") stats_path = os.path.join(args_.restore_state, "stats.json") traj_path_aclib = os.path.join(args_.restore_state, "traj_aclib2.json") traj_path_old = os.path.join(args_.restore_state, "traj_old.csv") scen_path = os.path.join(args_.restore_state, "scenario.txt") if not os.path.isdir(args_.restore_state): raise FileNotFoundError( "Could not find folder from which to restore.") # Load runhistory and stats rh = RunHistory(aggregate_func=None) rh.load_json(rh_path, tmp_scen.cs) self.logger.debug("Restored runhistory from %s", rh_path) stats = Stats( tmp_scen) # Need to inject actual scenario later for output_dir! stats.load(stats_path) self.logger.debug("Restored stats from %s", stats_path) with open(traj_path_aclib, 'r') as traj_fn: traj_list_aclib = traj_fn.readlines() with open(traj_path_old, 'r') as traj_fn: traj_list_old = traj_fn.readlines() return rh, stats, traj_list_aclib, traj_list_old
def _get_feature_names(self): if not self.scenario.feature_dict: self.logger.info( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with _changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) return feat_names
def __init__(self, scenario, cmd_args=None): """Construct scenario object from file or dictionary. Parameters ---------- scenario : str or dict if str, it will be interpreted as to a path a scenario file if dict, it will be directly to get all scenario related information cmd_args : dict command line arguments that were not processed by argparse """ self.logger = logging.getLogger("scenario") self.in_reader = InputReader() if type(scenario) is str: scenario_fn = scenario self.logger.info("Reading scenario file: %s" % (scenario_fn)) scenario = self.in_reader.read_scenario_file(scenario_fn) elif type(scenario) is dict: pass else: raise TypeError( "Wrong type of scenario (str or dict are supported)") if cmd_args: scenario.update(cmd_args) self._arguments = {} self._add_arguments() # Parse arguments parsed_arguments = {} for key, value in self._arguments.items(): arg_name, arg_value = self._parse_argument(key, scenario, **value) parsed_arguments[arg_name] = arg_value if len(scenario) != 0: raise ValueError('Could not parse the following arguments: %s' % str(list(scenario.keys()))) for arg_name, arg_value in parsed_arguments.items(): setattr(self, arg_name, arg_value) self._transform_arguments()
def __init__( self, scenario: Union[str, Dict, None] = None, cmd_options: Optional[Dict] = None, ): self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter() self.output_dir_for_this_run = None # type: Optional[str] self._arguments = {} # type: Dict[str, Any] self._arguments.update(CMDReader().scen_cmd_actions) if scenario is None: scenario = {} if isinstance(scenario, str): scenario_fn = scenario scenario = {} if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() self.logger.info("Reading scenario file: %s", scenario_fn) smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario, scenario_fn) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) elif isinstance(scenario, dict): scenario = copy.copy(scenario) if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") for arg_name, arg_value in scenario.items(): setattr(self, arg_name, arg_value) self._transform_arguments() self.logger.debug("SMAC and Scenario Options:") if cmd_options: for arg_name, arg_value in cmd_options.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value))
import re import shlex import sys import time import typing import numpy as np from smac.utils.constants import MAXINT, N_TREES from smac.utils.io.input_reader import InputReader, INSTANCE_TYPE, INSTANCE_FEATURES_TYPE __author__ = "Marius Lindauer" __copyright__ = "Copyright 2018, ML4AAD" __license__ = "3-clause BSD" in_reader = InputReader() PARSED_SCENARIO_ARGS_TYPE = typing.Dict[str, typing.Union[str, int, typing.Dict, INSTANCE_TYPE, INSTANCE_FEATURES_TYPE, np.ndarray, typing.List[str]]] parsed_scen_args = {} # type: PARSED_SCENARIO_ARGS_TYPE # Placeholder logger that will not be used in practice, but which will be replaced by # the logger onces the parsing logic is instantiated logger = logging.getLogger(__name__) def truthy(x: typing.Any) -> bool: """Convert x into its truth value""" if isinstance(x, bool):
def read_csv_to_rh( self, data, cs: Union[None, str, ConfigurationSpace] = None, id_to_config: Union[None, dict] = None, train_inst: Union[None, str, list] = None, test_inst: Union[None, str, list] = None, instance_features: Union[None, str, dict] = None, ): """ Interpreting a .csv-file as runhistory. Valid values for the header of the csv-file/DataFrame are: ['seed', 'cost', 'time', 'status', 'budget', 'config_id', 'instance_id'] or any parameter- or instance-feature-names. Parameters ---------- data: str or pd.DataFrame either string to csv-formatted runhistory-file or DataFrame containing the same information cs: str or ConfigurationSpace config-space to use for this runhistory id_to_config: dict mapping ids to Configuration-objects train_inst: str or list[str] train instances or path to file test_inst: str or list[str] test instances or path to file instance_features: str or dict instance features as dict mapping instance-ids to feature-array or file to appropriately formatted instance-feature-file Returns: -------- rh: RunHistory runhistory with all the runs from the csv-file """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.input_reader = InputReader() self.train_inst = self.input_reader.read_instance_file( train_inst) if type(train_inst) == str else train_inst self.test_inst = self.input_reader.read_instance_file( test_inst) if type(test_inst) == str else test_inst feature_names = [] # names of instance-features if type(instance_features) == str: feature_names, instance_features = self.input_reader.read_instance_features_file( instance_features) # Read in data if isinstance(data, str): self.logger.debug("Detected path for csv-file (\'%s\')", data) data = load_csv_to_pandaframe(data, self.logger, apply_numeric=False) # Expecting header as described in docstring self.valid_values = [ 'seed', 'cost', 'time', 'status', 'budget', 'config_id', 'instance_id' ] if isinstance(cs, str): self.logger.debug("Reading PCS from %s", cs) with open(cs, 'r') as fh: cs = pcs.read(fh) elif not cs: self.logger.debug("No config-space provided, create from columns") if id_to_config: cs = np.random.choice(list( id_to_config.values())).configuration_space else: parameters = set(data.columns) parameters -= set(self.valid_values) parameters -= set(feature_names) parameters = list(parameters) cs = self.create_cs_from_pandaframe(data[parameters]) parameters = cs.get_hyperparameter_names() if not feature_names and not 'instance_id' in data.columns: feature_names = [ c for c in data.columns if not c.lower() in self.valid_values and not c in parameters ] for c in set(self.valid_values).intersection(set(data.columns)): # Cast to numeric data[c] = data[c].apply(pd.to_numeric, errors='ignore') data, id_to_config = self.extract_configs(data, cs, id_to_config) data, id_to_inst_feats = self.extract_instances( data, feature_names, instance_features) self.logger.debug( "Found: seed=%s, cost=%s, time=%s, status=%s, budget=%s", 'seed' in data.columns, 'cost' in data.columns, 'time' in data.columns, 'status' in data.columns, 'budget' in data.columns) # Create RunHistory rh = RunHistory() def add_to_rh(row): new_status = self._interpret_status( row['status']) if 'status' in row else StatusType.SUCCESS rh.add(config=id_to_config[row['config_id']], cost=row['cost'], time=row['time'] if 'time' in row else -1, status=new_status, instance_id=row['instance_id'] if 'instance_id' in row else None, seed=row['seed'] if 'seed' in row else None, budget=row['budget'] if 'budget' in row else 0, additional_info=None, origin=DataOrigin.INTERNAL) data.apply(add_to_rh, axis=1) return rh
class CSV2RH(object): def read_csv_to_rh( self, data, cs: Union[None, str, ConfigurationSpace] = None, id_to_config: Union[None, dict] = None, train_inst: Union[None, str, list] = None, test_inst: Union[None, str, list] = None, instance_features: Union[None, str, dict] = None, ): """ Interpreting a .csv-file as runhistory. Valid values for the header of the csv-file/DataFrame are: ['seed', 'cost', 'time', 'status', 'budget', 'config_id', 'instance_id'] or any parameter- or instance-feature-names. Parameters ---------- data: str or pd.DataFrame either string to csv-formatted runhistory-file or DataFrame containing the same information cs: str or ConfigurationSpace config-space to use for this runhistory id_to_config: dict mapping ids to Configuration-objects train_inst: str or list[str] train instances or path to file test_inst: str or list[str] test instances or path to file instance_features: str or dict instance features as dict mapping instance-ids to feature-array or file to appropriately formatted instance-feature-file Returns: -------- rh: RunHistory runhistory with all the runs from the csv-file """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.input_reader = InputReader() self.train_inst = self.input_reader.read_instance_file( train_inst) if type(train_inst) == str else train_inst self.test_inst = self.input_reader.read_instance_file( test_inst) if type(test_inst) == str else test_inst feature_names = [] # names of instance-features if type(instance_features) == str: feature_names, instance_features = self.input_reader.read_instance_features_file( instance_features) # Read in data if isinstance(data, str): self.logger.debug("Detected path for csv-file (\'%s\')", data) data = load_csv_to_pandaframe(data, self.logger, apply_numeric=False) # Expecting header as described in docstring self.valid_values = [ 'seed', 'cost', 'time', 'status', 'budget', 'config_id', 'instance_id' ] if isinstance(cs, str): self.logger.debug("Reading PCS from %s", cs) with open(cs, 'r') as fh: cs = pcs.read(fh) elif not cs: self.logger.debug("No config-space provided, create from columns") if id_to_config: cs = np.random.choice(list( id_to_config.values())).configuration_space else: parameters = set(data.columns) parameters -= set(self.valid_values) parameters -= set(feature_names) parameters = list(parameters) cs = self.create_cs_from_pandaframe(data[parameters]) parameters = cs.get_hyperparameter_names() if not feature_names and not 'instance_id' in data.columns: feature_names = [ c for c in data.columns if not c.lower() in self.valid_values and not c in parameters ] for c in set(self.valid_values).intersection(set(data.columns)): # Cast to numeric data[c] = data[c].apply(pd.to_numeric, errors='ignore') data, id_to_config = self.extract_configs(data, cs, id_to_config) data, id_to_inst_feats = self.extract_instances( data, feature_names, instance_features) self.logger.debug( "Found: seed=%s, cost=%s, time=%s, status=%s, budget=%s", 'seed' in data.columns, 'cost' in data.columns, 'time' in data.columns, 'status' in data.columns, 'budget' in data.columns) # Create RunHistory rh = RunHistory() def add_to_rh(row): new_status = self._interpret_status( row['status']) if 'status' in row else StatusType.SUCCESS rh.add(config=id_to_config[row['config_id']], cost=row['cost'], time=row['time'] if 'time' in row else -1, status=new_status, instance_id=row['instance_id'] if 'instance_id' in row else None, seed=row['seed'] if 'seed' in row else None, budget=row['budget'] if 'budget' in row else 0, additional_info=None, origin=DataOrigin.INTERNAL) data.apply(add_to_rh, axis=1) return rh def create_cs_from_pandaframe(self, data): # TODO use from pyimp after https://github.com/automl/ParameterImportance/issues/72 is implemented warnings.warn( "No parameter configuration space (pcs) provided! " "Interpreting all parameters as floats. This might lead " "to suboptimal analysis.", RuntimeWarning) self.logger.debug("Interpreting as parameters: %s", data.columns) minima = data.min() # to define ranges of hyperparameter maxima = data.max() cs = ConfigurationSpace(seed=42) for p in data.columns: cs.add_hyperparameter( UniformFloatHyperparameter(p, lower=minima[p] - 1, upper=maxima[p] + 1)) return cs def _interpret_status(self, status, types=None): """ Parameters ---------- status: str status-string types: dict[str:StatusType] optional, mapping to use Returns ------- status: StatusType interpreted status-type """ if not types: types = { "SAT": StatusType.SUCCESS, "UNSAT": StatusType.SUCCESS, "SUCCESS": StatusType.SUCCESS, "STATUSTYPE.SUCCESS": StatusType.SUCCESS, "TIMEOUT": StatusType.TIMEOUT, "STATUSTYPE.TIMEOUT": StatusType.TIMEOUT, "CRASHED": StatusType.CRASHED, "STATUSTYPE.CRASHED": StatusType.CRASHED, "MEMOUT": StatusType.MEMOUT, "STATUSTYPE.MEMOUT": StatusType.MEMOUT, "ABORT": StatusType.ABORT, "STATUSTYPE.ABORT": StatusType.ABORT, } status = status.strip().upper() if status in types: status = types[status] else: self.logger.warning( "Could not parse %s as a status. Valid values " "are: %s. Treating as CRASHED run.", status, types.keys()) status = StatusType.CRASHED return status def extract_configs(self, data, cs: ConfigurationSpace, id_to_config=None): """ After completion, every unique configuration in the data will have a corresponding id in the data-frame. The data-frame is expected to either contain a column for config-id OR columns for each individual hyperparameter. Parameter-names will be used from the provided configspace. If a mapping of ids to configurations already exists, it will be used. Parameters ---------- data: pd.DataFrame pandas dataframe containing either a column called `config_id` or a column for every individual parameter cs: ConfigurationSpace optional, if provided the `parameters`-argument will be ignored id_to_config: dict[int:Configuration] optional, mapping ids to Configurations (necessary when using `config_id`-column) Returns ------- data: pd.DataFrame if no config-id-columns was there before, there is one now. id_to_config: dict mapping every id to a configuration """ if id_to_config: config_to_id = {conf: name for name, conf in id_to_config.items()} else: id_to_config = {} config_to_id = {} parameters = cs.get_hyperparameter_names() if 'config_id' in data.columns and not id_to_config: raise ValueError("When defining configs with \"config_id\" " "in header, you need to provide the argument " "\"configurations\" to the CSV2RH-object - " "either as a dict, mapping the id's to " "Configurations or as a path to a csv-file " "containing the necessary information.") if 'config_id' not in data.columns: # Map to configurations ids_in_order = [] data['config_id'] = -1 def add_config(row): values = { name: row[name] for name in parameters if row[name] != '' } config = deactivate_inactive_hyperparameters( fix_types(values, cs), cs) if config not in config_to_id: config_to_id[config] = len(config_to_id) row['config_id'] = config_to_id[config] return row data = data.apply(add_config, axis=1) id_to_config = {conf: name for name, conf in config_to_id.items()} data["config_id"] = pd.to_numeric(data["config_id"]) # Check whether all config-ids are present if len(set(data['config_id']) - set(id_to_config.keys())) > 0: raise ValueError( "config id {} cannot be identified (is your configurations.csv complete? Or maybe " "this is a type-issue...".format( set(data['config_id']) - set(id_to_config.keys()))) return data, id_to_config def extract_instances(self, data, feature_names, features): """ After completion, every unique instance in the data will have a corresponding id in the data-frame. The data-frame is expected to either contain a column for instance-id OR columns for each individual instance-feature. Parameter-names will be used from the provided configspace. If a mapping of ids to configurations already exists, it will be used. Parameters ---------- data: pd.DataFrame pandas dataframe containing either a column called `instance_id` or a column for every individual instance-features feature_names: list[str] optional, list of feature-names features: dict[int:np.array] optional, mapping ids to instance-feature vectors (necessary when using `instance_id`-column) Returns ------- data: pd.DataFrame if no instance_id-columns was there before, there is one now. id_to_inst_feats: dict mapping every id to instance-features """ id_to_inst_feats = {} inst_feats_to_id = {} if features: id_to_inst_feats = { i: tuple([str(f) for f in feat]) for i, feat in features.items() } inst_feats_to_id = { feat: i for i, feat in id_to_inst_feats.items() } if 'instance_id' in data.columns and not features: raise ValueError( "Instances defined via \'instance_id\'-column, but no instance features available." ) elif 'instance_id' not in data.columns and feature_names: # Add new column for instance-ids data['instance_id'] = -1 self.old = None def add_instance(row): row_features = tuple([str(row[idx]) for idx in feature_names]) if row_features not in inst_feats_to_id: new_id = len(inst_feats_to_id) inst_feats_to_id[row_features] = new_id id_to_inst_feats[new_id] = features row['instance_id'] = inst_feats_to_id[row_features] self.old = row_features return row data = data.apply(add_instance, axis=1) else: self.logger.info("No instances detected.") id_to_inst_feats = { i: np.array(f).astype('float64') for i, f in id_to_inst_feats.items() } return data, id_to_inst_feats
def __init__(self, scenario, cmd_args: dict = None, run_id: int = 1): """Constructor Parameters ---------- scenario : str or dict If str, it will be interpreted as to a path a scenario file If dict, it will be directly to get all scenario related information cmd_args : dict Command line arguments that were not processed by argparse run_id: int Run ID will be used as suffix for output_dir """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter() if type(scenario) is str: scenario_fn = scenario self.logger.info("Reading scenario file: %s" % (scenario_fn)) scenario = self.in_reader.read_scenario_file(scenario_fn) elif type(scenario) is dict: scenario = copy.copy(scenario) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") if cmd_args: scenario.update(cmd_args) self._arguments = {} self._groups = defaultdict(set) self._add_arguments() # Make cutoff mandatory if run_obj is runtime if scenario['run_obj'] == 'runtime': self._arguments['cutoff_time']['required'] = True # Parse arguments parsed_arguments = {} for key, value in self._arguments.items(): arg_name, arg_value = self._parse_argument(key, scenario, **value) parsed_arguments[arg_name] = arg_value if len(scenario) != 0: raise ValueError('Could not parse the following arguments: %s' % str(list(scenario.keys()))) for group, potential_members in self._groups.items(): n_members_in_scenario = 0 for pm in potential_members: if pm in parsed_arguments: n_members_in_scenario += 1 if n_members_in_scenario != 1: raise ValueError('Exactly one of the following arguments must ' 'be specified in the scenario file: %s' % str(potential_members)) for arg_name, arg_value in parsed_arguments.items(): setattr(self, arg_name, arg_value) self._transform_arguments() if self.output_dir: self.output_dir += "_run%d" % (run_id) self.out_writer.write_scenario_file(self) self.logger.debug("Scenario Options:") for arg_name, arg_value in parsed_arguments.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value))
class Scenario(object): """ Scenario contains the configuration of the optimization process and constructs a scenario object from a file or dictionary. All arguments set in the Scenario are set as attributes. """ def __init__(self, scenario, cmd_args: dict = None, run_id: int = 1): """Constructor Parameters ---------- scenario : str or dict If str, it will be interpreted as to a path a scenario file If dict, it will be directly to get all scenario related information cmd_args : dict Command line arguments that were not processed by argparse run_id: int Run ID will be used as suffix for output_dir """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter() if type(scenario) is str: scenario_fn = scenario self.logger.info("Reading scenario file: %s" % (scenario_fn)) scenario = self.in_reader.read_scenario_file(scenario_fn) elif type(scenario) is dict: scenario = copy.copy(scenario) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") if cmd_args: scenario.update(cmd_args) self._arguments = {} self._groups = defaultdict(set) self._add_arguments() # Make cutoff mandatory if run_obj is runtime if scenario['run_obj'] == 'runtime': self._arguments['cutoff_time']['required'] = True # Parse arguments parsed_arguments = {} for key, value in self._arguments.items(): arg_name, arg_value = self._parse_argument(key, scenario, **value) parsed_arguments[arg_name] = arg_value if len(scenario) != 0: raise ValueError('Could not parse the following arguments: %s' % str(list(scenario.keys()))) for group, potential_members in self._groups.items(): n_members_in_scenario = 0 for pm in potential_members: if pm in parsed_arguments: n_members_in_scenario += 1 if n_members_in_scenario != 1: raise ValueError('Exactly one of the following arguments must ' 'be specified in the scenario file: %s' % str(potential_members)) for arg_name, arg_value in parsed_arguments.items(): setattr(self, arg_name, arg_value) self._transform_arguments() if self.output_dir: self.output_dir += "_run%d" % (run_id) self.out_writer.write_scenario_file(self) self.logger.debug("Scenario Options:") for arg_name, arg_value in parsed_arguments.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value)) def add_argument(self, name: str, help: str, callback=None, default=None, dest: str = None, required: bool = False, mutually_exclusive_group: str = None, choice=None): """Add argument to the scenario object. Parameters ---------- name : str Argument name help : str Help text which can be displayed in the documentation. callback : callable, optional If given, the callback will be called when the argument is parsed. Useful for custom casting/typechecking. default : object, optional Default value if the argument is not given. Default to ``None``. dest : str Assign the argument to scenario object by this name. required : bool If True, the scenario will raise an error if the argument is not given. mutually_exclusive_group : str Group arguments with similar behaviour by assigning the same string value. The scenario will ensure that exactly one of the arguments is given. Is used for example to ensure that either a configuration space object or a parameter file is passed to the scenario. Can not be used together with ``required``. choice: list/set/tuple List of possible string for this argument """ if not isinstance(required, bool): raise TypeError("Argument 'required' must be of type 'bool'.") if required is not False and mutually_exclusive_group is not None: raise ValueError("Cannot make argument '%s' required and add it to" " a group of mutually exclusive arguments." % name) if choice is not None and not isinstance(choice, (list, set, tuple)): raise TypeError('Choice must be of type list/set/tuple.') self._arguments[name] = { 'default': default, 'required': required, 'help': help, 'dest': dest, 'callback': callback, 'choice': choice } if mutually_exclusive_group: self._groups[mutually_exclusive_group].add(name) def _parse_argument(self, name: str, scenario: dict, help: str, callback=None, default=None, dest: str = None, required: bool = False, choice=None): """Search the scenario dict for a single allowed argument and parse it. Side effect: the argument is removed from the scenario dict if found. name : str Argument name, as specified in the Scenario class. scenario : dict Scenario dict as provided by the user or as parsed by the cli interface. help : str Help string of the argument callback : callable, optional (default=None) If given, will be called to transform the given argument. default : object, optional (default=None) Will be used as default value if the argument is not given by the user. dest : str, optional (default=None) Will be used as member name of the scenario. required : bool (default=False) If ``True``, the scenario will raise an Exception if the argument is not given. choice : list, optional (default=None) If given, the scenario checks whether the argument is in the list. If not, it raises an Exception. Returns ------- str Member name of the attribute. object Value of the attribute. """ normalized_name = name.lower().replace('-', '').replace('_', '') value = None # Allows us to pop elements in order to remove all parsed elements # from the dictionary for key in list(scenario.keys()): # Check all possible ways to spell an argument normalized_key = key.lower().replace('-', '').replace('_', '') if normalized_key == normalized_name: value = scenario.pop(key) if dest is None: dest = name.lower().replace('-', '_') if required is True: if value is None: raise ValueError('Required scenario argument %s not given.' % name) if value is None: value = default if value is not None and callable(callback): value = callback(value) if value is not None and choice: value = value.strip() if value not in choice: raise ValueError('Argument %s can only take a value in %s, ' 'but is %s' % (name, choice, value)) return dest, value def _add_arguments(self): """TODO""" # Add allowed arguments self.add_argument( name='abort_on_first_run_crash', help="If true, *SMAC* will abort if the first run of " "the target algorithm crashes.", default=True, callback=_is_truthy) self.add_argument(name='always_race_default', default=False, help="Race new incumbents always against default " "configuration.", callback=_is_truthy, dest="always_race_default") self.add_argument( name='algo', dest='ta', callback=shlex.split, help="Specifies the target algorithm call that *SMAC* " "will optimize. Interpreted as a bash-command.") self.add_argument( name='execdir', default='.', help="Specifies the path to the execution-directory.") self.add_argument(name='deterministic', default=False, help="If true, the optimization process will be " "repeatable.", callback=_is_truthy) self.add_argument(name='intensification_percentage', default=0.5, help="The fraction of time to be used on " "intensification (versus choice of next " "Configurations).", callback=float) self.add_argument(name='paramfile', help="Specifies the path to the " "PCS-file.", dest='pcs_fn', mutually_exclusive_group='cs') self.add_argument(name='run_obj', help="Defines what metric to optimize. When " "optimizing runtime, *cutoff_time* is " "required as well.", required=True, choice=['runtime', 'quality']) self.add_argument(name='overall_obj', help="PARX, where X is an integer defining the " "penalty imposed on timeouts (i.e. runtimes that " "exceed the *cutoff-time*).", default='par10') self.add_argument(name='cost_for_crash', default=float(MAXINT), help="Defines the cost-value for crashed runs " "on scenarios with quality as run-obj.", callback=float) self.add_argument(name='cutoff_time', help="Maximum runtime, after which the " "target algorithm is cancelled. **Required " "if *run_obj* is runtime.**", default=None, dest='cutoff', callback=float) self.add_argument(name='memory_limit', help="Maximum available memory the target algorithm " "can occupy before being cancelled.") self.add_argument( name='tuner-timeout', help="Maximum amount of CPU-time used for optimization.", default=numpy.inf, dest='algo_runs_timelimit', callback=float) self.add_argument( name='wallclock_limit', help="Maximum amount of wallclock-time used for optimization.", default=numpy.inf, callback=float) self.add_argument( name='always_race_default', help="Race new incumbents always against default configuration.", default=False, callback=_is_truthy, dest="always_race_default") self.add_argument( name='runcount_limit', help="Maximum number of algorithm-calls during optimization.", default=numpy.inf, callback=float, dest="ta_run_limit") self.add_argument(name='minR', help="Minimum number of calls per configuration.", default=1, callback=int, dest='minR') self.add_argument(name='maxR', help="Maximum number of calls per configuration.", default=2000, callback=int, dest='maxR') self.add_argument( name='instance_file', help="Specifies the file with the training-instances.", dest='train_inst_fn') self.add_argument(name='test_instance_file', help="Specifies the file with the test-instances.", dest='test_inst_fn') self.add_argument( name='feature_file', help="Specifies the file with the instance-features.", dest='feature_fn') self.add_argument( name='output_dir', help="Specifies the output-directory for all emerging " "files, such as logging and results.", default="smac3-output_%s" % (datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_(%f)'))) self.add_argument( name='input_psmac_dirs', default=None, help="For parallel SMAC, multiple output-directories " "are used.") self.add_argument(name='shared_model', help="Whether to run SMAC in parallel mode.", default=False, callback=_is_truthy) self.add_argument(name='instances', default=[[None]], help=None, dest='train_insts') self.add_argument(name='test_instances', default=[[None]], help=None, dest='test_insts') self.add_argument(name='initial_incumbent', default="DEFAULT", help="DEFAULT is the default from the PCS.", dest='initial_incumbent', choice=['DEFAULT', 'RANDOM']) # instance name -> feature vector self.add_argument(name='features', default={}, help=None, dest='feature_dict') # ConfigSpace object self.add_argument(name='cs', help=None, mutually_exclusive_group='cs') def _transform_arguments(self): """TODO""" self.n_features = len(self.feature_dict) self.feature_array = None if self.overall_obj[:3] in ["PAR", "par"]: par_str = self.overall_obj[3:] elif self.overall_obj[:4] in ["mean", "MEAN"]: par_str = self.overall_obj[4:] # Check for par-value as in "par10"/ "mean5" if len(par_str) > 0: self.par_factor = int(par_str) else: self.logger.debug("No par-factor detected. Using 1 by default.") self.par_factor = 1 # read instance files if self.train_inst_fn: if os.path.isfile(self.train_inst_fn): self.train_insts = self.in_reader.read_instance_file( self.train_inst_fn) else: self.logger.error("Have not found instance file: %s" % (self.train_inst_fn)) sys.exit(1) if self.test_inst_fn: if os.path.isfile(self.test_inst_fn): self.test_insts = self.in_reader.read_instance_file( self.test_inst_fn) else: self.logger.error("Have not found test instance file: %s" % (self.test_inst_fn)) sys.exit(1) self.instance_specific = {} def extract_instance_specific(instance_list): insts = [] for inst in instance_list: if len(inst) > 1: self.instance_specific[inst[0]] = " ".join(inst[1:]) insts.append(inst[0]) return insts self.train_insts = extract_instance_specific(self.train_insts) if self.test_insts: self.test_insts = extract_instance_specific(self.test_insts) self.train_insts = self._to_str_and_warn(l=self.train_insts) self.test_insts = self._to_str_and_warn(l=self.test_insts) # read feature file if self.feature_fn: if os.path.isfile(self.feature_fn): self.feature_dict = self.in_reader.read_instance_features_file( self.feature_fn)[1] if self.feature_dict: self.feature_array = [] for inst_ in self.train_insts: self.feature_array.append(self.feature_dict[inst_]) self.feature_array = numpy.array(self.feature_array) self.n_features = self.feature_array.shape[1] # read pcs file if self.pcs_fn and os.path.isfile(self.pcs_fn): with open(self.pcs_fn) as fp: pcs_str = fp.readlines() try: self.cs = pcs.read(pcs_str) except: self.logger.debug( "Could not parse pcs file with old format; trying new format next" ) self.cs = pcs_new.read(pcs_str) self.cs.seed(42) elif self.pcs_fn: self.logger.error("Have not found pcs file: %s" % (self.pcs_fn)) sys.exit(1) # you cannot set output dir to None directly # because None is replaced by default always if self.output_dir == "": self.output_dir = None self.logger.debug("Deactivate output directory.") else: self.logger.info("Output to %s" % (self.output_dir)) if self.shared_model and self.input_psmac_dirs is None: # per default, we assume that # all psmac runs write to the same directory self.input_psmac_dirs = [self.output_dir] def __getstate__(self): d = dict(self.__dict__) del d['logger'] return d def __setstate__(self, d): self.__dict__.update(d) self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) def _to_str_and_warn(self, l: typing.List[typing.Any]): warn_ = False for i, e in enumerate(l): if e is not None and not isinstance(e, str): warn_ = True try: l[i] = str(e) except ValueError: raise ValueError("Failed to cast all instances to str") if warn_: self.logger.warn("All instances were casted to str.") return l def write_options_to_doc(self, path='scenario_options.rst'): """Writes the option-list to file for autogeneration in documentation. The list is created in doc/conf.py and read in doc/options.rst. Parameters ---------- path: string Where to write to (relative to doc-folder since executed in conf.py) """ exclude = ['cs', 'features', 'instances', 'test_instances'] with open(path, 'w') as fh: for arg in sorted(self._arguments.keys()): if arg in exclude: continue fh.write(":{}: ".format(arg)) fh.write("{}".format(self._arguments[arg]['help'])) if self._arguments[arg]['default']: fh.write(" Default: {}.".format( self._arguments[arg]['default'])) if self._arguments[arg]['choice']: fh.write(" Must be from: {}.".format( self._arguments[arg]['choice'])) fh.write("\n") fh.write("\n\n")
def __init__(self, scenario, cmd_args: dict = None): """ Creates a scenario-object. The output_dir will be "output_dir/run_id/" and if that exists already, the old folder and its content will be moved (without any checks on whether it's still used by another process) to "output_dir/run_id.OLD". If that exists, ".OLD"s will be appended until possible. Parameters ---------- scenario : str or dict If str, it will be interpreted as to a path a scenario file If dict, it will be directly to get all scenario related information cmd_args : dict Command line arguments that were not processed by argparse """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter() self.output_dir_for_this_run = None if type(scenario) is str: scenario_fn = scenario self.logger.info("Reading scenario file: %s" % (scenario_fn)) scenario = self.in_reader.read_scenario_file(scenario_fn) elif type(scenario) is dict: scenario = copy.copy(scenario) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") if cmd_args: scenario.update(cmd_args) self._arguments = {} self._groups = defaultdict(set) self._add_arguments() # Make cutoff mandatory if run_obj is runtime if scenario['run_obj'] == 'runtime': self._arguments['cutoff_time']['required'] = True # Parse arguments parsed_arguments = {} for key, value in self._arguments.items(): arg_name, arg_value = self._parse_argument(key, scenario, **value) parsed_arguments[arg_name] = arg_value if len(scenario) != 0: raise ValueError('Could not parse the following arguments: %s' % str(list(scenario.keys()))) for group, potential_members in self._groups.items(): n_members_in_scenario = 0 for pm in potential_members: if pm in parsed_arguments: n_members_in_scenario += 1 if n_members_in_scenario != 1: raise ValueError('Exactly one of the following arguments must ' 'be specified in the scenario file: %s' % str(potential_members)) for arg_name, arg_value in parsed_arguments.items(): setattr(self, arg_name, arg_value) self._transform_arguments() self.logger.debug("Scenario Options:") for arg_name, arg_value in parsed_arguments.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value))
def __init__(self, scenario=None, cmd_options: dict = None): """ Creates a scenario-object. The output_dir will be "output_dir/run_id/" and if that exists already, the old folder and its content will be moved (without any checks on whether it's still used by another process) to "output_dir/run_id.OLD". If that exists, ".OLD"s will be appended until possible. Parameters ---------- scenario : str or dict or None If str, it will be interpreted as to a path a scenario file If dict, it will be directly to get all scenario related information If None, only cmd_options will be used cmd_options : dict Options from parsed command line arguments """ self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.PCA_DIM = 7 self.in_reader = InputReader() self.out_writer = OutputWriter() self.output_dir_for_this_run = None self._arguments = {} self._arguments.update(CMDReader().scen_cmd_actions) if scenario is None: scenario = {} if isinstance(scenario, str): scenario_fn = scenario scenario = {} if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() self.logger.info("Reading scenario file: %s", scenario_fn) smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario, scenario_fn) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) elif isinstance(scenario, dict): scenario = copy.copy(scenario) if cmd_options: scenario.update(cmd_options) cmd_reader = CMDReader() smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd( scenario) scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) else: raise TypeError( "Wrong type of scenario (str or dict are supported)") for arg_name, arg_value in scenario.items(): setattr(self, arg_name, arg_value) self._transform_arguments() self.logger.debug("SMAC and Scenario Options:") if cmd_options: for arg_name, arg_value in cmd_options.items(): if isinstance(arg_value, (int, str, float)): self.logger.debug("%s = %s" % (arg_name, arg_value))
def feature_analysis(self, box_violin=False, correlation=False, clustering=False, importance=False): if not (box_violin or correlation or clustering or importance): self.logger.debug("No feature analysis.") return # FEATURE ANALYSIS (ASAPY) # TODO make the following line prettier # TODO feat-names from scenario? in_reader = InputReader() feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature Analysis needs valid feature " "file! Either {} is not a valid " "filename or features are not saved in " "the scenario.") self.logger.error("Skipping Feature Analysis.") return else: feat_names = in_reader.read_instance_features_file( self.scenario.feature_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) self.website["Feature Analysis"] = OrderedDict([]) # feature importance using forward selection if importance: self.website["Feature Analysis"][ "Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance() imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html self.website["Feature Analysis"]["Feature Importance"]["Table"] = { "table": imp } for p in plots: name = os.path.splitext(os.path.basename(p))[0] self.website["Feature Analysis"]["Feature Importance"][ name] = { "figure": p } # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) self.website["Feature Analysis"][ "Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) self.website["Feature Analysis"]["Violin and Box Plots"][ key] = { "figure": plot_tuple[1] } # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: self.website["Feature Analysis"]["Correlation"] = { "figure": correlation_plot } # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) self.website["Feature Analysis"]["Clustering"] = { "figure": cluster_plot } self.build_website()
def feature_analysis(self, d, box_violin=False, correlation=False, clustering=False, importance=False): if not self.scenario.feature_dict: self.logger.error( "No features available. Skipping feature analysis.") return feat_fn = self.scenario.feature_fn if not self.scenario.feature_names: self.logger.debug( "`scenario.feature_names` is not set. Loading from '%s'", feat_fn) with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'): if not feat_fn or not os.path.exists(feat_fn): self.logger.warning( "Feature names are missing. Either provide valid feature_file in scenario " "(currently %s) or set `scenario.feature_names` manually." % feat_fn) self.logger.error("Skipping Feature Analysis.") return else: # Feature names are contained in feature-file and retrieved feat_names = InputReader().read_instance_features_file( feat_fn)[0] else: feat_names = copy.deepcopy(self.scenario.feature_names) # feature importance using forward selection if importance: d["Feature Importance"] = OrderedDict() imp, plots = self.analyzer.feature_importance(self.pimp) imp = DataFrame(data=list(imp.values()), index=list(imp.keys()), columns=["Error"]) imp = imp.to_html() # this is a table with the values in html d["Feature Importance"]["Table"] = {"table": imp} for p in plots: name = os.path.splitext(os.path.basename(p))[0] d["Feature Importance"][name] = {"figure": p} # box and violin plots if box_violin: name_plots = self.analyzer.feature_analysis( 'box_violin', feat_names) d["Violin and Box Plots"] = OrderedDict() for plot_tuple in name_plots: key = "%s" % (plot_tuple[0]) d["Violin and Box Plots"][key] = {"figure": plot_tuple[1]} # correlation plot if correlation: correlation_plot = self.analyzer.feature_analysis( 'correlation', feat_names) if correlation_plot: d["Correlation"] = {"figure": correlation_plot} # cluster instances in feature space if clustering: cluster_plot = self.analyzer.feature_analysis( 'clustering', feat_names) d["Clustering"] = {"figure": cluster_plot} self.build_website()
class Scenario(object): ''' main class of SMAC ''' def __init__(self, scenario, cmd_args=None): """Construct scenario object from file or dictionary. Parameters ---------- scenario : str or dict if str, it will be interpreted as to a path a scenario file if dict, it will be directly to get all scenario related information cmd_args : dict command line arguments that were not processed by argparse """ self.logger = logging.getLogger("scenario") self.in_reader = InputReader() if type(scenario) is str: scenario_fn = scenario self.logger.info("Reading scenario file: %s" % (scenario_fn)) scenario = self.in_reader.read_scenario_file(scenario_fn) elif type(scenario) is dict: pass else: raise TypeError( "Wrong type of scenario (str or dict are supported)") if cmd_args: scenario.update(cmd_args) self._arguments = {} self._add_arguments() # Parse arguments parsed_arguments = {} for key, value in self._arguments.items(): arg_name, arg_value = self._parse_argument(key, scenario, **value) parsed_arguments[arg_name] = arg_value if len(scenario) != 0: raise ValueError('Could not parse the following arguments: %s' % str(list(scenario.keys()))) for arg_name, arg_value in parsed_arguments.items(): setattr(self, arg_name, arg_value) self._transform_arguments() def add_argument(self, name, help, callback=None, default=None, dest=None, required=False): if not isinstance(required, bool): raise TypeError("Argument required must be of type 'bool'.") self._arguments[name] = { 'default': default, 'required': required, 'help': help, 'dest': dest, 'callback': callback } def _parse_argument(self, name, scenario, help, callback=None, default=None, dest=None, required=False): normalized_name = name.lower().replace('-', '').replace('_', '') value = None # Allows us to pop elements in order to remove all parsed elements # from the dictionary for key in list(scenario.keys()): # Check all possible ways to spell an argument normalized_key = key.lower().replace('-', '').replace('_', '') if normalized_key == normalized_name: value = scenario.pop(key) if dest is None: dest = name.lower().replace('-', '_') if required is True: if value is None: raise ValueError('Required argument %s not given.' % name) if value is None: value = default if value is not None and callable(callback): value = callback(value) return dest, value def _add_arguments(self): # Add allowed arguments self.add_argument(name='algo', help=None, dest='ta', callback=lambda arg: shlex.split(arg)) self.add_argument(name='execdir', default='.', help=None) self.add_argument(name='deterministic', default="0", help=None, callback=lambda arg: arg in ["1", "true", True]) self.add_argument(name='paramfile', help=None, dest='pcs_fn') self.add_argument(name='run_obj', help=None, default='runtime') self.add_argument(name='overall_obj', help=None, default='par10') self.add_argument(name='cutoff_time', help=None, default=None, dest='cutoff', callback=lambda arg: float(arg)) self.add_argument(name='memory_limit', help=None) self.add_argument(name='tuner-timeout', help=None, default=numpy.inf, dest='algo_runs_timelimit', callback=lambda arg: float(arg)) self.add_argument(name='wallclock_limit', help=None, default=numpy.inf, callback=lambda arg: float(arg)) self.add_argument(name='runcount_limit', help=None, default=numpy.inf, callback=lambda arg: float(arg), dest="ta_run_limit") self.add_argument(name='instance_file', help=None, dest='train_inst_fn') self.add_argument(name='test_instance_file', help=None, dest='test_inst_fn') self.add_argument(name='feature_file', help=None, dest='feature_fn') self.add_argument(name='output_dir', help=None, default="smac3-output_%s" % (datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S'))) self.add_argument(name='shared_model', help=None, default='0', callback=lambda arg: arg in ['1', 'true', True]) self.add_argument(name='instances', default=[[None]], help=None, dest='train_insts') self.add_argument(name='test_instances', default=[[None]], help=None, dest='test_insts') # instance name -> feature vector self.add_argument(name='features', default={}, help=None, dest='feature_dict') self.add_argument(name='cs', help=None) # ConfigSpace object def _transform_arguments(self): self.n_features = len(self.feature_dict) self.feature_array = None if self.overall_obj[:3] in ["PAR", "par"]: self.par_factor = int(self.overall_obj[3:]) elif self.overall_obj[:4] in ["mean", "MEAN"]: self.par_factor = int(self.overall_obj[4:]) else: self.par_factor = 1 # read instance files if self.train_inst_fn: if os.path.isfile(self.train_inst_fn): self.train_insts = self.in_reader.read_instance_file( self.train_inst_fn) else: self.logger.error("Have not found instance file: %s" % (self.train_inst_fn)) sys.exit(1) if self.test_inst_fn: if os.path.isfile(self.test_inst_fn): self.test_insts = self.in_reader.read_instance_file( self.test_inst_fn) else: self.logger.error("Have not found test instance file: %s" % (self.test_inst_fn)) sys.exit(1) self.instance_specific = {} def extract_instance_specific(instance_list): insts = [] for inst in instance_list: if len(inst) > 1: self.instance_specific[inst[0]] = " ".join(inst[1:]) insts.append(inst[0]) return insts self.train_insts = extract_instance_specific(self.train_insts) if self.test_insts: self.test_insts = extract_instance_specific(self.test_insts) # read feature file if self.feature_fn: if os.path.isfile(self.feature_fn): self.feature_dict = self.in_reader.read_instance_features_file( self.feature_fn)[1] if self.feature_dict: self.n_features = len(self.feature_dict[list( self.feature_dict.keys())[0]]) self.feature_array = [] for inst_ in self.train_insts: self.feature_array.append(self.feature_dict[inst_]) self.feature_array = numpy.array(self.feature_array) # read pcs file if self.pcs_fn and os.path.isfile(self.pcs_fn): with open(self.pcs_fn) as fp: pcs_str = fp.readlines() self.cs = pcs.read(pcs_str) self.cs.seed(42) elif self.pcs_fn: self.logger.error("Have not found pcs file: %s" % (self.pcs_fn)) sys.exit(1) self.logger.info("Output to %s" % (self.output_dir))