def _execute_experiment(self): """Instantiate and run a :class:`experiments.CrossValidationExperiment` after checking for duplicated keys Notes ----- As described in the Notes of :meth:`BaseOptimizationProtocol.set_experiment_guidelines`, the `auto_start` kwarg of :meth:`experiments.CrossValidationExperiment.__init__` is set to False in order to check for duplicated keys""" self._update_current_hyperparameters() self.current_experiment = CrossValidationExperiment( model_initializer=self.model_initializer, model_init_params=self.current_init_params, model_extra_params=self.current_extra_params, feature_selector=self.feature_selector, preprocessing_pipeline=self.preprocessing_pipeline, preprocessing_params=self.preprocessing_params, notes=self.notes, do_raise_repeated=self.do_raise_repeated, auto_start=False) self.current_experiment.preparation_workflow() # Future Hunter, if multi-cross_experiment_keys ever supported, this will be a problem. Should've fixed it earlier, dummy if self.current_experiment.hyperparameter_key.key not in self.tested_keys: self.tested_keys.append( self.current_experiment.hyperparameter_key.key) self.current_experiment.experiment_workflow() self.current_score = get_path( self.current_experiment.last_evaluation_results, self.target_metric) self.successful_iterations += 1 self._clean_up_experiment()
def get_scored_params(experiment_description_path, target_metric): """Retrieve the hyperparameters of a completed Experiment, along with an evaluation of its performance Parameters ---------- experiment_description_path: String The path to an Experiment's description .json file target_metric: Tuple A path denoting the metric to be used. If tuple, the first value should be one of ['oof', 'holdout', 'in_fold'], and the second value should be the name of a metric supplied in :attr:`environment.Environment.metrics_params` Returns ------- all_hyperparameters: Dict A dict of the hyperparameters used by the Experiment evaluation: Float Value of the Experiment's `target_metric`""" description = read_json(file_path=experiment_description_path) evaluation = get_path(description['final_evaluations'], target_metric) all_hyperparameters = description['hyperparameters'] if description['module_name'].lower() == 'keras': all_hyperparameters['model_init_params'][ 'layers'] = consolidate_layers( all_hyperparameters['model_init_params']['layers'], class_name_key=False, separate_args=False) return (all_hyperparameters, evaluation)
def dimension_subset(hyperparameters, dimensions): """Return only the values of `hyperparameters` specified by `dimensions`, in the same order as `dimensions` Parameters ---------- hyperparameters: Dict Dict of hyperparameters containing at least the following keys: ['model_init_params', 'model_extra_params', 'preprocessing_pipeline', 'preprocessing_params', 'feature_selector'] dimensions: List of: (strings, or tuples) Locations and order of the values to return from `hyperparameters`. If a value is a string, it is assumed to belong to `model_init_params`, and its path will be adjusted accordingly Returns ------- List of hyperparameter values""" dimensions = [("model_init_params", _) if isinstance(_, str) else _ for _ in dimensions] if not all(isinstance(_, tuple) for _ in dimensions): raise TypeError( f"All dimensions should be strings or tuples. Received: {dimensions}" ) values = [get_path(hyperparameters, _, default=None) for _ in dimensions] # FLAG: Might need to set `default`=<some sentinel str> in above `get_path` call - In case `None` is an accepted value return values
def _get_initialized_exp( self, exp: Tuple[dict, Number, str]) -> Tuple[dict, Number, str]: """Initialize :class:`~hyperparameter_hunter.feature_engineering.EngineerStep`s for a single :attr:`similar_experiments` result entry Parameters ---------- exp: Tuple[dict, Number, str] Tuple of (<parameters>, <evaluation>, <ID>), whose parameters dict will be searched for `EngineerStep`-like dicts Returns ------- Dict Experiment parameters dict, in which any `EngineerStep`-like dicts have been initialized to `EngineerStep` instances. All other key/value pairs are unchanged Number Unchanged target evaluation result of `exp` String Unchanged experiment ID of `exp`""" (exp_params, exp_score, exp_id) = exp engineer_steps = get_path( exp_params, ("feature_engineer", "steps")) # type: List[dict] for i, step in enumerate(engineer_steps): # TODO: Requires consistent `EngineerStep` order - Update this if unordered steps added dimension = self.space.get_by_name( ("feature_engineer", "steps", i), default=None) if dimension is not None: new_step = EngineerStep.honorary_step_from_dict( step, dimension) exp_params["feature_engineer"]["steps"][i] = new_step return (exp_params, exp_score, exp_id)
def _filter_by_guidelines_multi(self, location): """Helper to filter by guidelines when one of the guideline hyperparameters is directly affected by a hyperparameter that is given as a space choice Parameters ---------- location: Tuple Location of the hyperparameter space choice that affects the acceptable guideline values of a particular hyperparameter. In other words, this is the path of a hyperparameter, which, if changed, would change the expected default value of another hyperparameter Notes ----- This is used for Keras Experiments when the `optimizer` value in a model's `compile_params` is given as a hyperparameter space choice. Each possible value of `optimizer` prescribes different default values for the `optimizer_params` argument, so special measures need to be taken to ensure the correct Experiments are declared to fit within the constraints""" _model_params = deepcopy(self.model_params) if location == ("model_init_params", "compile_params", "optimizer"): from keras.optimizers import get as k_opt_get update_location = ("model_init_params", "compile_params", "optimizer_params") allowed_values = get_path(_model_params, location).bounds #################### Handle First Value (Dummy) #################### self._filter_by_guidelines() allowed_values = allowed_values[1:] #################### Handle Remaining Values #################### for allowed_val in allowed_values: updated_value = k_opt_get(allowed_val).get_config() def _visit(path, key, value): """If `path` + `key` == `update_location`, return default for this choice. Else, default_visit""" if path + (key, ) == update_location: return (key, updated_value) return (key, value) self._filter_by_guidelines( model_params=remap(_model_params, visit=_visit)) self.similar_experiments = sorted(self.similar_experiments, key=lambda _: _[1], reverse=True) else: raise ValueError( "Received unhandled location: {}".format(location))
def dimension_subset(hyperparameters, dimensions): """Return only the values of `hyperparameters` specified by `dimensions`, in the same order as `dimensions` Parameters ---------- hyperparameters: Dict Dict of hyperparameters containing at least the following keys: ['model_init_params', 'model_extra_params', 'feature_engineer', 'feature_selector'] dimensions: List of: (strings, or tuples) Locations and order of the values to return from `hyperparameters`. If a value is a string, it is assumed to belong to `model_init_params`, and its path will be adjusted accordingly Returns ------- List of hyperparameter values""" dimensions = [("model_init_params", _) if isinstance(_, str) else _ for _ in dimensions] values = [ get_path(hyperparameters, _, default=RejectedOptional()) for _ in dimensions ] return values
def get_scored_params(experiment_description_path, target_metric, get_description=False): """Retrieve the hyperparameters of a completed Experiment, along with its performance evaluation Parameters ---------- experiment_description_path: String The path to an Experiment's description .json file target_metric: Tuple A path denoting the metric to be used. If tuple, the first value should be one of ['oof', 'holdout', 'in_fold'], and the second value should be the name of a metric supplied in :attr:`environment.Environment.metrics_params` get_description: Boolean, default=False If True, return a tuple of: ((`all_hyperparameters`, `evaluation`), `description`), in which `description` is the original description dict for the experiment. Else, return a tuple of: (`all_hyperparameters`, `evaluation`) Returns ------- all_hyperparameters: Dict A dict of the hyperparameters used by the Experiment evaluation: Float Value of the Experiment's `target_metric`""" description = read_json(file_path=experiment_description_path) evaluation = get_path(description["final_evaluations"], target_metric) all_hyperparameters = description["hyperparameters"] if description["module_name"].lower() == "keras": all_hyperparameters["model_init_params"][ "layers"] = consolidate_layers( all_hyperparameters["model_init_params"]["layers"], class_name_key=False) if get_description: return ((all_hyperparameters, evaluation), description) return (all_hyperparameters, evaluation)
def does_match_init_params_guidelines_multi(self, exp_id, params, score, location) -> bool: """Check candidate compatibility with `model_init_params` template guidelines when a guideline hyperparameter is directly affected by another hyperparameter that is given as a space choice Parameters ---------- exp_id: String Candidate Experiment ID params: Dict Candidate "model_init_params" to compare to the template in :attr:`model_params` score: Number Value of the candidate Experiment's target metric location: Tuple Location of the hyperparameter space choice that affects the acceptable guideline values of a particular hyperparameter. In other words, this is the path of a hyperparameter, which, if changed, would change the expected default value of another hyperparameter Returns ------- Boolean True if candidate `params` match `model_init_params` guidelines. Else, False Notes ----- This is used for Keras Experiments when the `optimizer` value in a model's `compile_params` is given as a hyperparameter space choice. Each possible value of `optimizer` prescribes different default values for the `optimizer_params` argument, so special measures need to be taken to ensure the correct Experiments are declared to fit within the constraints""" _model_params = deepcopy(self.model_params["model_init_params"]) if location == ("compile_params", "optimizer"): from keras.optimizers import get as k_opt_get update_location = ("compile_params", "optimizer_params") # `update_location` = Path to hyperparameter whose default value depends on `location` allowed_values = get_path(_model_params, location).bounds # `allowed_values` = Good `("model_init_params", "compile_params", "optimizer")` values #################### Handle First Value (Dummy) #################### is_match = self.does_match_init_params_guidelines( exp_id, params, score) # The first value gets handled separately from the rest because the value at # `update_location` is set according to `allowed_values[0]`. For the remaining # `allowed_values`, we need to manually set `update_location` for each # If the first value was a match, the below `while` loop will never be entered because # `is_match` is already True #################### Handle Remaining Values #################### allowed_val_index = 1 while is_match is not True and allowed_val_index < len( allowed_values): allowed_val = allowed_values[allowed_val_index] # Determine current default value for the dependent hyperparameter updated_val = k_opt_get(allowed_val).get_config() # Set value at `update_location` to `updated_val`, then check if params match def _visit(path, key, value): """If `path` + `key` == `update_location`, return default for this choice. Else, default_visit""" if path + (key, ) == update_location: return (key, updated_val) return (key, value) is_match = self.does_match_init_params_guidelines( exp_id, params, score, template_params=remap(_model_params, visit=_visit)) # If `is_match` is True, the loop stops and :attr:`match_status`'s value at `exp_id` # for `does_match_init_params_guidelines` remains truthy allowed_val_index += 1 return is_match else: raise ValueError( "Received unhandled location: {}".format(location))