def _filter_parameters_to_hash(self, parameters): """Produce a filtered version of `parameters` that does not include hyperparameters that should be ignored during hashing, such as those pertaining to verbosity, seeds, and random states, as they have no effect on HyperparameterHunter experiment results Parameters ---------- parameters: Dict Full dictionary of initial parameters to be filtered Returns ------- parameters: Dict Filtered version of the given `parameters`""" reject = ["verbose", "verbosity", "silent"] reject += ["random_state", "random_seed", "seed", "n_jobs", "nthread"] if self.is_task_keras: reject.append("build_fn") parameters["model_init_params"] = subdict( parameters["model_init_params"], drop=reject) parameters["model_extra_params"] = subdict( parameters["model_extra_params"], drop=reject) return parameters
def merge_compile_params(compile_params, dummified_params): """Update `compile_params` to reflect those values that were given hyperparameter space choices, as specified by `dummified_params` Parameters ---------- compile_params: Dict All the compile parameters provided to a dummy model's `compile` method, or their default values if they were not explicitly given. If the original value of one of the keys in `compile_params` was a hyperparameter space choice, its current value will be the dummy chosen for it, and this change will be reflected by the contents of `dummified_params` dummified_params: Dict A mapping of keys in `compile_params` (possibly nested keys) to a tuple pair of (<original hyperparameter space choice>, <tuple path to key>) Returns ------- merged_params: Dict A dictionary that mirrors `compile_params`, except where an element of `dummified_params` has the same path/key, in which case the hyperparameter space choice value in `dummified_params` is used""" # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`="Adam" vs "adam" _dummy_params = subdict(dummified_params.copy(), key=lambda _: _[1:] if _[0] == "params" else _) def _visit(path, key, value): """If (`path` + `key`) in `_dummy_params`, return its value instead. Else, default""" location = path + (key,) if len(_dummy_params) and location in _dummy_params: return (key, _dummy_params.pop(location)) return (key, value) merged_params = remap(compile_params, visit=_visit) return merged_params
def format_result(self): """Format an OrderedDict containing the Experiment's identifying attributes, results, hyperparameters used, and other stats or information that may be useful""" self.result = OrderedDict([ ("experiment_id", self.experiment_id), ("algorithm_name", self.algorithm_name), ("module_name", self.module_name), ("hyperparameter_key", self.hyperparameter_key.key), ("cross_experiment_key", self.cross_experiment_key.key), ("final_evaluations", self.last_evaluation_results), ("hyperparameters", self.hyperparameter_key.parameters), ("cross_experiment_parameters", self.cross_experiment_key.parameters), ("train_features", None), # TODO: Record the column features in train df ("platform", node()), ("source_script", self.source_script), ("notes", self.notes or ""), ("aggregates", self.stat_aggregates), ]) #################### Filter Hyperparameters' model_init_params #################### self.result["hyperparameters"]["model_init_params"] = subdict( self.result["hyperparameters"]["model_init_params"], drop=["random_state", "seed"])
def get_datasets_for_f(self, datasets: DFDict) -> DFDict: """Produce a dict of DataFrames containing only the merged datasets and standard datasets requested in :attr:`params`. In other words, add the requested merged datasets and remove unnecessary standard datasets Parameters ---------- datasets: DFDict Original dict of datasets, containing all datasets provided to :meth:`EngineerStep.__call__`, some of which may be superfluous, or may require additional processing to resolve merged/coupled datasets Returns ------- DFDict Updated version of `datasets`, in which unnecessary datasets have been filtered out, and the requested merged datasets have been added""" self.merged_datasets: List[str] = validate_dataset_names( self.params, self.stage) datasets_for_f = datasets for _dataset_name in self.merged_datasets: datasets_for_f[_dataset_name] = merge_dfs(_dataset_name, self.stage, datasets) return subdict(datasets_for_f, keep=self.params)
def __init__(self, parameters, cross_experiment_key, **kwargs): """A KeyMaker class dedicated to creating hyperparameter keys, which determine when experiments were executed using identical hyperparameters. Two separate instances of :class:`experiments.CVExperiment` should produce identical `hyperparameter_key` s if their hyperparameters are the same (or close enough) Parameters ---------- parameters: Dict All the parameters to be included when creating the key hash. Keys should correspond to parameter names, and values should be the values of the corresponding keys cross_experiment_key: Str The key produced by the active Environment via :class:`key_handler.CrossExperimentKeyMaker`, used for determining when a hyperparameter key has already been tested under the same cross-experiment parameters **kwargs: Dict Additional arguments supplied to :meth:`key_handler.KeyMaker.__init__`""" self.cross_experiment_key = cross_experiment_key self.is_task_keras = ( hasattr(G.Env, "current_task") and G.Env.current_task and G.Env.current_task.module_name == "keras" ) if self.is_task_keras: parameters = deepcopy(parameters) #################### Initialize and Parameterize Dummy Model #################### temp_model = initialize_dummy_model( parameters["model_initializer"], parameters["model_init_params"]["build_fn"], parameters["model_extra_params"], ) temp_layers, temp_compile_params = parameterize_compiled_keras_model(temp_model) #################### Process Parameters #################### # noinspection PyUnusedLocal def _visit(path, key, value): """If `key` not in ('input_shape', 'input_dim'), return True. Else, return False""" return key not in ("input_shape", "input_dim") temp_layers = remap(temp_layers, visit=_visit) parameters["model_init_params"]["layers"] = temp_layers parameters["model_init_params"]["compile_params"] = temp_compile_params parameters["model_extra_params"] = subdict( parameters["model_extra_params"], drop=["params"] ) KeyMaker.__init__(self, parameters, **kwargs)
def get_concise_params_dict(params, split_args=False): # TODO: Add docstring new_params = subdict(params, drop=HH_ARG_ATTRS) arg_vals = {} #################### Resolve Kwargs Used as Args #################### if len(params.get(U_ARGS, [])) > len(params.get(D_ARGS, [])): for i in range(len(params[D_ARGS]), len(params[U_ARGS])): # Find the kwarg key that probably should have been used target_kwarg = list(params[D_KWARGS])[i - len(params[D_ARGS])] if target_kwarg in params[U_KWARGS]: raise SyntaxError( f"Misplaced argument (i={i}/{target_kwarg}): {params[U_ARGS][i]}" ) else: params[U_KWARGS][target_kwarg] = params[U_ARGS][ i] # Move arg to kwargs params[U_ARGS] = params[U_ARGS][:len( params[D_ARGS])] # Remove arg (now in kwargs) #################### Gather Args #################### for i, expected_arg in enumerate(params.get(D_ARGS, [])): try: arg_vals[expected_arg] = params[U_ARGS][i] except IndexError: if expected_arg in params[U_KWARGS]: arg_vals[expected_arg] = params[U_KWARGS][expected_arg] else: raise #################### Gather Kwargs #################### # Merge default and used kwargs with constraints: only include if k in default, and give priority to used values # This means that args used as kwargs won't make it through because they have no default values, and # nonsensical kwargs won't make it through because the defaults are the point of reference kwarg_vals = { k: params[U_KWARGS].get(k, v) for k, v in params.get(D_KWARGS, {}).items() } #################### Consolidate #################### if split_args: new_params = dict(**new_params, **dict(arg_vals=arg_vals, kwarg_vals=kwarg_vals)) else: new_params = {**new_params, **arg_vals, **kwarg_vals} return new_params
def est_except(skip: str) -> list: """Return flattened list of all "est" values in `EST_OPT_PRO_PAIRS` that are not members of the dict named by `skip` Parameters ---------- skip: String Key in `EST_OPT_PRO_PAIRS`, declaring the `base_estimator` values to exclude from the result Returns ------- List Flat list of `base_estimator` values in `EST_OPT_PRO_PAIRS`, less those specified by `skip` Examples -------- >>> est_except("gbrt") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE ['GP', GaussianProcessRegressor(...), 'RF', RandomForestRegressor(...), 'ET', ExtraTreesRegressor(...), 'DUMMY'] """ return flatten( [_["est"] for _ in subdict(EST_OPT_PRO_PAIRS, drop=[skip]).values()])
def est_except(skip: str) -> list: return flatten( [_["est"] for _ in subdict(scenario_pairs, drop=[skip]).values()])