def __call__(self, wrapped, instance, args, kwargs): file = self.file file_params = {} #################### Locate Parameters File #################### if not file and self.key is not None: with suppress(TypeError): file = kwargs.get(self.key, None) or args[self.key] if file: # If `file=None`, continue with empty dict of `file_params` file_params = read_json(file) if not isinstance(file_params, dict): raise TypeError("{} must have dict, not {}".format( file, file_params)) #################### Check Valid Parameters for `wrapped` #################### ok_keys = [ k for k, v in signature(wrapped).parameters.items() if v.kind == v.KEYWORD_ONLY ] for k, v in file_params.items(): if k not in ok_keys: if self.verbose: G.warn( f"Invalid key ({k}) in user parameters file: {file}") if k not in kwargs: kwargs[k] = v if self.verbose: G.debug( f"Parameter `{k}` set to user default in parameters file: '{file}'" ) return wrapped(*args, **kwargs)
def update_custom_environment_params(self): """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS""" allowed_parameter_keys = [k for k, v in signature(Environment).parameters.items() if v.kind == v.KEYWORD_ONLY] user_defaults = {} if (not isinstance(self.environment_params_path, str)) and (self.environment_params_path is not None): raise TypeError('environment_params_path must be a str, not {}: {}'.format(*type_val(self.environment_params_path))) try: user_defaults = read_json(self.environment_params_path) except TypeError: if self.environment_params_path is not None: raise except FileNotFoundError: raise if not isinstance(user_defaults, dict): raise TypeError('environment_params_path must contain a dict. Received {}: {}'.format(*type_val(user_defaults))) #################### Check user_defaults #################### for k, v in user_defaults.items(): if k not in allowed_parameter_keys: G.warn('\n\t'.join([ 'Invalid key ({}) in user-defined default Environment parameter file at "{}". If expected to do something,', 'it really won\'t, so it should be removed or fixed. The following are valid default keys: {}' ]).format(k, self.environment_params_path, allowed_parameter_keys)) elif getattr(self, k) is None: setattr(self, k, v) G.debug('Environment kwarg "{}" was set to user default at "{}"'.format(k, self.environment_params_path)) #################### Check Module Default Environment Arguments #################### for k in allowed_parameter_keys: if getattr(self, k) is None: setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
def _random_seed_initializer(self): """Generate set of random seeds for each repetition/fold/run if not provided""" if self.experiment_params['random_seeds'] is None: self.experiment_params['random_seeds'] = np.random.randint(*self.experiment_params['random_seed_bounds'], size=( self.cross_validation_params.get('n_repeats', 1), self.cross_validation_params['n_splits'], self.experiment_params['runs'] )).tolist() G.debug('BaseExperiment._random_seed_initializer() done')
def preparation_workflow(self): """Execute all tasks that must take place before the experiment is actually started. Such tasks include (but are not limited to): Creating experiment IDs and hyperparameter keys, creating script backups, and validating parameters""" G.debug('Starting preparation_workflow...') self._generate_experiment_id() self._create_script_backup() self._validate_parameters() self._generate_hyperparameter_key() self._additional_preparation_steps() G.debug('Completed preparation_workflow')
def _validate_parameters(self): """Ensure provided input parameters are properly formatted""" #################### target_metric #################### self.target_metric = get_formatted_target_metric(self.target_metric, self.metrics_map) #################### feature_selector #################### if self.feature_selector is None: restricted_cols = [_ for _ in [self.target_column, self.id_column] if _ is not None] self.feature_selector = [_ for _ in self.train_dataset.columns.values if _ not in restricted_cols] G.debug('Experiment parameters have been validated')
def _random_seed_initializer(self): """Generate set of random seeds for each repetition/fold/run if not provided""" if self.experiment_params["random_seeds"] is None: self.experiment_params["random_seeds"] = np.random.randint( *self.experiment_params["random_seed_bounds"], size=( self.cv_params.get("n_repeats", 1), self.cv_params["n_splits"], self.experiment_params["runs"], ), ).tolist() G.debug("BaseExperiment._random_seed_initializer() done")
def on_repetition_end(self): content = format_fold_run(rep=self._rep, fold="-", run="-") content += self.log_separator if not content.endswith(" ") else "" content += format_evaluation(self.last_evaluation_results, float_format=self.float_format) content += self.log_separator if not content.endswith(" ") else "" content += self.__elapsed_helper("reps") if G.Env.verbose >= 2 and G.Env.cv_params.get("n_repeats", 1) > 1: G.log(content, previous_frame=inspect.currentframe().f_back) else: G.debug(content, previous_frame=inspect.currentframe().f_back) super().on_repetition_end()
def on_run_end(self): content = [ format_fold_run(rep=self._rep, fold=self._fold, run=self._run), format_evaluation(self.last_evaluation_results, float_format=self.float_format), self.__elapsed_helper("runs"), ] if G.Env.verbose >= 3 and G.Env.runs > 1: G.log(self.log_separator.join(content), previous_frame=inspect.currentframe().f_back) else: G.debug(self.log_separator.join(content), previous_frame=inspect.currentframe().f_back) super().on_run_end()
def _generate_hyperparameter_key(self): """Set :attr:`hyperparameter_key` to a key to describe the experiment's hyperparameters""" parameters = dict( model_initializer=self.model_initializer, model_init_params=self.model_init_params, model_extra_params=self.model_extra_params, feature_engineer=self.feature_engineer, feature_selector=self.feature_selector, # FLAG: Should probably add :attr:`target_metric` to key - With option to ignore it? ) self.hyperparameter_key = HyperparameterKeyMaker( parameters, self.cross_experiment_key) G.log("Hyperparameter Key: '{}'".format(self.hyperparameter_key)) G.debug("Raw hyperparameters...") G.debug(self.hyperparameter_key.parameters)
def update_custom_environment_params(self): """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS""" allowed_parameter_keys = [ k for k, v in signature(Environment).parameters.items() if v.kind == v.KEYWORD_ONLY ] user_defaults = {} if (not isinstance(self.environment_params_path, str)) and ( self.environment_params_path is not None): raise TypeError( f"Non-str `environment_params_path`: {self.environment_params_path}" ) try: user_defaults = read_json(self.environment_params_path) except TypeError: if self.environment_params_path is not None: raise # If `environment_params_path=None`, no error raised - `user_defaults` continues as {} except FileNotFoundError: raise if not isinstance(user_defaults, dict): raise TypeError( "environment_params_path must have dict, not {}".format( user_defaults)) #################### Check user_defaults #################### for k, v in user_defaults.items(): if k not in allowed_parameter_keys: G.warn("\n\t".join([ "Invalid key ({}) in user-defined default Environment parameter file at '{}'. If expected to do something,", "it really won't, so it should be removed or fixed. The following are valid default keys: {}", ]).format(k, self.environment_params_path, allowed_parameter_keys)) elif getattr(self, k) is None: setattr(self, k, v) G.debug( f"Environment.`{k}` set to user default: '{self.environment_params_path}'" ) #################### Check Module Default Environment Arguments #################### for k in allowed_parameter_keys: if getattr(self, k) is None: setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
def on_fold_end(self): content = "F{}.{} AVG: ".format( self._rep, self._fold) # TODO: Prepend rep count content += format_evaluation(self.last_evaluation_results, float_format=self.float_format) content += self.log_separator if not content.endswith(" ") else "" content += self.__elapsed_helper("folds") if G.Env.verbose >= 2 and G.Env.cross_validation_params["n_splits"] > 1: G.log(content, previous_frame=inspect.currentframe().f_back, add_time=False) else: G.debug(content, previous_frame=inspect.currentframe().f_back, add_time=False) super().on_fold_end()
def on_fold_end(self): content = format_fold_run(rep=self._rep, fold=self._fold, run="-") content += self.log_separator if not content.endswith(" ") else "" content += format_evaluation(self.last_evaluation_results, float_format=self.float_format) content += self.log_separator if not content.endswith(" ") else "" content += self.__elapsed_helper("folds") if G.Env.verbose >= 2 and G.Env.cv_params["n_splits"] > 1: G.log(content, previous_frame=inspect.currentframe().f_back, add_time=False) else: G.debug(content, previous_frame=inspect.currentframe().f_back, add_time=False) super().on_fold_end()
def on_run_start(self): content = format_fold_run(rep=self._rep, fold=self._fold, run=self._run) content += format( self.log_separator if content != "" and self.current_seed else "") content += "Seed: {}".format( self.current_seed) if self.current_seed else "" if G.Env.verbose >= 4 and G.Env.runs > 1: G.log(content, previous_frame=inspect.currentframe().f_back, add_time=True) else: G.debug(content, previous_frame=inspect.currentframe().f_back, add_time=True) super().on_run_start()
def experiment_workflow(self): """Define the actual experiment process, including execution, result saving, and cleanup""" if self.hyperparameter_key.exists is True: _ex = f"{self!r} has already been run" if self.do_raise_repeated is True: self._clean_up() raise RepeatedExperimentError(_ex) G.debug(_ex) G.warn("WARNING: Duplicate experiment!") self._initialize_random_seeds() self.execute() #################### Save Experiment Results #################### recorders = RecorderList(file_blacklist=G.Env.file_blacklist, extra_recorders=G.Env.experiment_recorders) recorders.format_result() G.log(f"Saving results for Experiment: '{self.experiment_id}'") recorders.save_result() self._clean_up()
def _update_model_params(self): """Update random state of :attr:`model_init_params` according to :attr:`current_seed`""" # TODO: Add this to some workflow in Experiment class. For now it is never used, unless the subclass decides to... # `model_init_params` initialized to all algorithm hyperparameters - Works even if 'random_state' not explicitly given try: if "random_state" in self.model_init_params: self.model_init_params["random_state"] = self.current_seed elif "seed" in self.model_init_params: self.model_init_params["seed"] = self.current_seed else: G.debug( "WARNING: Model has no random_state/seed parameter to update" ) # FLAG: HIGH PRIORITY BELOW # TODO: BELOW IS NOT THE CASE IF MODEL IS NN - SETTING THE GLOBAL RANDOM SEED DOES SOMETHING # TODO: If this is logged, there is no reason to execute multiple-run-averaging, so don't # TODO: ... Either 1) Set `runs` = 1 (this would mess with the environment key), or... # TODO: ... 2) Set the results of all subsequent runs to the results of the first run (this could be difficult) # FLAG: HIGH PRIORITY ABOVE except Exception as _ex: G.log( "WARNING: Failed to update model's random_state {}".format( _ex.__repr__()))
def update_custom_environment_params(self): """Try to update null parameters from environment_params_path, or DEFAULT_PARAMS""" allowed_parameter_keys = [ k for k, v in signature(Environment).parameters.items() if v.kind == v.KEYWORD_ONLY ] user_defaults = {} try: user_defaults = read_json(self.environment_params_path) except (TypeError, OSError): # If `environment_params_path=None`, no error raised - `user_defaults` continues as {} if self.environment_params_path is not None: raise if not isinstance(user_defaults, dict): raise TypeError( "environment_params_path must have dict, not {}".format( user_defaults)) #################### Check user_defaults #################### for k, v in user_defaults.items(): if k not in allowed_parameter_keys: G.warn( f"Invalid key ({k}) in user Environment parameters: {self.environment_params_path}" ) elif getattr(self, k) is None: setattr(self, k, v) G.debug( f"Environment.`{k}` set to user default: '{self.environment_params_path}'" ) #################### Check Module Default Environment Arguments #################### for k in allowed_parameter_keys: if getattr(self, k) is None: setattr(self, k, self.DEFAULT_PARAMS.get(k, None))
def on_fold_start(self): """Perform tasks when a fold is started during an Experiment's cross-validation scheme""" G.debug('BaseCallback.on_fold_start()')
def on_run_start(self): """Perform tasks on run start in an Experiment's multiple-run-averaging phase""" G.debug("BaseCallback.on_run_start()")
def _initialize_random_seeds(self): """Initialize global random seed, and generate random seeds for stages if not provided""" np.random.seed(self.experiment_params["global_random_seed"]) random.seed(self.experiment_params["global_random_seed"]) self._random_seed_initializer() G.debug("Initialized random seeds for experiment")
def on_repetition_end(self): """Perform tasks on repetition end in an Experiment's repeated cross-validation scheme""" G.debug("BaseCallback.on_repetition_end()")
def on_fold_end(self): """Perform tasks on fold end in an Experiment's cross-validation scheme""" G.debug("BaseCallback.on_fold_end()")
def on_experiment_start(self): """Perform tasks when an Experiment is started""" G.debug("BaseCallback.on_experiment_start()")
def on_experiment_end(self): """Perform tasks when an Experiment ends""" G.debug("BaseCallback.on_experiment_end()")
def on_run_end(self): """Perform tasks when a run ends during an Experiment's multiple-run-averaging phase""" G.debug('BaseCallback.on_run_end()')
def on_run_start(self): """Perform tasks when a run is started during an Experiment's multiple-run-averaging phase""" G.debug('BaseCallback.on_run_start()')
def on_fold_end(self): """Perform tasks when a fold ends during an Experiment's cross-validation scheme""" G.debug('BaseCallback.on_fold_end()')
def _initialize_random_seeds(self): """Initialize global random seed, and generate set of random seeds for each fold/run if not provided""" np.random.seed(self.experiment_params['global_random_seed']) random.seed(self.experiment_params['global_random_seed']) self._random_seed_initializer() G.debug('Initialized random seeds for experiment')
def on_repetition_end(self): """Perform tasks when a repetition ends during an Experiment's repeated cross-validation scheme""" G.debug('BaseCallback.on_repetition_end()')