def _execute_experiment(self):
        """Instantiate and run a :class:`experiments.CVExperiment` after checking for duplicate keys

        Notes
        -----
        As described in the Notes of :meth:`BaseOptimizationProtocol.set_experiment_guidelines`, the
        `auto_start` kwarg of :meth:`experiments.CVExperiment.__init__` is set to False in order to
        check for duplicated keys"""
        self._update_current_hyperparameters()

        self.current_experiment = CVExperiment(
            # model=None,  # TODO: May need to pass `model` from `set_experiment_guidelines`
            model_initializer=self.model_initializer,
            model_init_params=self.current_init_params,
            model_extra_params=self.current_extra_params,
            feature_selector=self.feature_selector,
            preprocessing_pipeline=self.preprocessing_pipeline,
            preprocessing_params=self.preprocessing_params,
            notes=self.notes,
            do_raise_repeated=self.do_raise_repeated,
            auto_start=False,
        )

        self.current_experiment.preparation_workflow()

        # Future Hunter, if multi-cross_experiment_keys ever supported, this will be a problem. Should've fixed it earlier, dummy
        if self.current_experiment.hyperparameter_key.key not in self.tested_keys:
            self.tested_keys.append(self.current_experiment.hyperparameter_key.key)

        self.current_experiment.experiment_workflow()
        self.current_score = get_path(
            self.current_experiment.last_evaluation_results, self.target_metric
        )
        self.successful_iterations += 1
        self._clean_up_experiment()
def test_invalid_environment(monkeypatch, env_fixture_0):
    """Test that initializing an Experiment when there is an active `Environment` -- but
    :attr:`hyperparameter_hunter.environment.Environment.current_task` is not None -- raises
    `EnvironmentInvalidError`"""
    # Currently have a valid `settings.G.Env` (`env_fixture_0`), so give it a fake `current_task`
    monkeypatch.setattr(settings.G.Env, "current_task", "some other task")
    with pytest.raises(EnvironmentInvalidError, match="Current experiment must finish before .*"):
        CVExperiment(LogisticRegression, dict())
class BaseOptimizationProtocol(metaclass=MergedOptimizationMeta):
    def __init__(
        self,
        target_metric=None,
        iterations=1,
        verbose=1,
        read_experiments=True,
        reporter_parameters=None,
    ):
        """Base class for intermediate base optimization protocol classes

        Parameters
        ----------
        target_metric: Tuple, default=('oof', <first key in :attr:`environment.Environment.metrics`>)
            A path denoting the metric to be used to compare completed Experiments within the
            Optimization Protocol. The first value should be one of ['oof', 'holdout', 'in_fold'].
            The second value should be the name of a metric being recorded according to the values
            supplied in :attr:`environment.Environment.metrics_params`. See the documentation for
            :func:`metrics.get_formatted_target_metric` for more info. Any values returned by, or
            given as the `target_metric` input to, :func:`metrics.get_formatted_target_metric` are
            acceptable values for :attr:`BaseOptimizationProtocol.target_metric`
        iterations: Int, default=1
            The number of distinct experiments to execute
        verbose: Int 0, 1, or 2, default=1
            Verbosity mode for console logging. 0: Silent. 1: Show only logs from the Optimization
            Protocol. 2: In addition to logs shown when verbose=1, also show the logs from individual
            Experiments
        read_experiments: Boolean, default=True
            If True, all Experiment records that fit in the current :attr:`space` and guidelines,
            and match :attr:`algorithm_name`, will be read in and used to fit any optimizers
        reporter_parameters: Dict, or None, default={}
            Additional parameters passed to :meth:`reporting.OptimizationReporter.__init__`. Note:
            Unless provided explicitly, the key "do_maximize" will be added by default to
            `reporter_params`, with a value inferred from the `direction` of :attr:`target_metric`
            in `G.Env.metrics`. In nearly all cases, the "do_maximize" key should be ignored,
            as there are very few reasons to explicitly include it

        Notes
        -----
        By default, 'script_backup' for Experiments is blacklisted when executed within
        :class:`BaseOptimizationProtocol` since it would just repeatedly create copies of the same,
        unchanged file (this file). So don't expect any script_backup files for Experiments executed
        during optimization rounds"""
        #################### Optimization Protocol Parameters ####################
        self.target_metric = target_metric
        self.iterations = iterations
        self.verbose = verbose
        self.read_experiments = read_experiments
        self.reporter_parameters = reporter_parameters or {}

        #################### Experiment Guidelines ####################
        self.model_initializer = None
        self.model_init_params = None
        self.model_extra_params = None
        self.feature_selector = None
        self.preprocessing_pipeline = None
        self.preprocessing_params = None
        self.notes = None
        self.do_raise_repeated = True

        #################### Search Parameters ####################
        self.dimensions = []
        self.search_bounds = dict()

        self.space = None
        self.similar_experiments = []
        self.best_experiment = None
        self.best_score = None
        self.successful_iterations = 0
        self.skipped_iterations = 0
        self.tested_keys = []
        self._search_space_size = None

        self.current_init_params = None
        self.current_extra_params = None

        #################### Identification Attributes ####################
        self.algorithm_name = None
        self.module_name = None
        self.current_experiment = None
        self.current_score = None

        #################### Keras-Specific Attributes ####################
        self.dummy_layers = []
        self.dummy_compile_params = dict()
        self.init_iter_attrs = []
        self.extra_iter_attrs = []

        self.logger = None
        self._preparation_workflow()
        self.do_maximize = G.Env.metrics[self.target_metric[-1]].direction == "max"

    ##################################################
    # Core Methods:
    ##################################################
    # TODO: Add `model` here, with a `TranslateTrace` decorator, and document it below
    def set_experiment_guidelines(
        self,
        model_initializer,
        model_init_params,
        model_extra_params=None,
        feature_selector=None,
        preprocessing_pipeline=None,
        preprocessing_params=None,
        notes=None,
        do_raise_repeated=True,
    ):
        """Provide arguments necessary to instantiate :class:`experiments.CVExperiment`. This method
        has the same signature as :meth:`experiments.BaseExperiment.__init__` except where noted

        Parameters
        ----------
        model_initializer: Class, or functools.partial, or class instance
            The algorithm class being used to initialize a model
        model_init_params: Dict, or object
            The dictionary of arguments given when creating a model instance with
            `model_initializer` via the `__init__` method of :class:`models.Model`. Any kwargs that
            are considered valid by the `__init__` method of `model_initializer` are
            valid in `model_init_params`
        model_extra_params: Dict, or None, default=None
            A dictionary of extra parameters passed to :class:`models.Model`. This is used to
            provide parameters to models' non-initialization methods (like `fit`, `predict`,
            `predict_proba`, etc.), and for neural networks
        feature_selector: List of str, callable, list of booleans, default=None
            The value provided when splitting apart the input data for all provided DataFrames.
            `feature_selector` is provided as the second argument for calls to
            `pandas.DataFrame.loc` in :meth:`BaseExperiment._initial_preprocessing`. If None,
            `feature_selector` is set to all columns in :attr:`train_dataset`, less
            :attr:`target_column`, and :attr:`id_column`
        preprocessing_pipeline: ...
            ... Experimental...
        preprocessing_params: ...
            ... Experimental...
        notes: String, or None, default=None
            Additional information about the Experiment that will be saved with the Experiment's
            description result file. This serves no purpose other than to facilitate saving
            Experiment details in a more readable format
        do_raise_repeated: Boolean, default=False
            If True and this Experiment locates a previous Experiment's results with matching
            Environment and Hyperparameter Keys, a RepeatedExperimentError will be raised. Else, a
            warning will be logged

        Notes
        -----
        The `auto_start` kwarg is not available here because
        :meth:`BaseOptimizationProtocol._execute_experiment` sets it to False in order to check for
        duplicated keys before running the whole Experiment. This is the most notable difference
        between calling :meth:`set_experiment_guidelines` and instantiating
        :class:`experiments.CVExperiment`"""
        self.model_initializer = model_initializer

        self.model_init_params = identify_algorithm_hyperparameters(self.model_initializer)
        try:
            self.model_init_params.update(model_init_params)
        except TypeError:
            self.model_init_params.update(dict(build_fn=model_init_params))

        self.model_extra_params = model_extra_params if model_extra_params is not None else {}
        self.feature_selector = feature_selector if feature_selector is not None else []
        self.preprocessing_pipeline = preprocessing_pipeline or {}
        self.preprocessing_params = preprocessing_params if preprocessing_params is not None else {}

        self.notes = notes
        self.do_raise_repeated = do_raise_repeated

        if self.do_raise_repeated is False:
            G.warn_("WARNING: Setting `do_raise_repeated`=False allows duplicated Experiments")

        self.algorithm_name, self.module_name = identify_algorithm(self.model_initializer)
        self._validate_guidelines()

        #################### Deal with Keras ####################
        if self.module_name == "keras":
            reusable_build_fn, reusable_wrapper_params, dummy_layers, dummy_compile_params = keras_prep_workflow(
                self.model_initializer,
                self.model_init_params["build_fn"],
                self.model_extra_params,
                self.source_script,
            )
            self.model_init_params = dict(build_fn=reusable_build_fn)
            self.model_extra_params = reusable_wrapper_params
            self.dummy_layers = dummy_layers
            self.dummy_compile_params = dummy_compile_params
            # FLAG: Deal with capitalization conflicts when comparing similar experiments: `optimizer`='Adam' vs 'adam'

        self.set_dimensions()

    def set_dimensions(self):
        """Locate given hyperparameters that are `space` choice declarations and add them to
        :attr:`dimensions`"""
        all_dimension_choices = []

        #################### Remap Extra Objects ####################
        if self.module_name == "keras":
            from keras.initializers import Initializer as KerasInitializer
            from keras.callbacks import Callback as KerasCB

            self.init_iter_attrs.append(lambda _p, _k, _v: isinstance(_v, KerasInitializer))
            self.extra_iter_attrs.append(lambda _p, _k, _v: isinstance(_v, KerasCB))

        #################### Collect Choice Dimensions ####################
        init_dim_choices = get_choice_dimensions(self.model_init_params, self.init_iter_attrs)
        extra_dim_choices = get_choice_dimensions(self.model_extra_params, self.extra_iter_attrs)

        for (path, choice) in init_dim_choices:
            choice._name = ("model_init_params",) + path
            all_dimension_choices.append(choice)

        for (path, choice) in extra_dim_choices:
            choice._name = ("model_extra_params",) + path
            all_dimension_choices.append(choice)

        self.dimensions = all_dimension_choices

        if self.module_name == "keras":
            self.model_extra_params = link_choice_ids(
                self.dummy_layers,
                self.dummy_compile_params,
                self.model_extra_params,
                self.dimensions,
            )

    def go(self):
        """Begin hyperparameter optimization process after experiment guidelines have been set and
        search dimensions are in place. This process includes the following: setting the
        hyperparameter space; locating similar experiments to be used as learning material for
        :class:`SKOptimizationProtocol` s; and executing :meth:`_optimization_loop`, which
        actually sets off the Experiment execution process"""
        if self.model_initializer is None:
            raise ValueError("Experiment guidelines must be set before starting optimization")

        _reporter_params = dict(dict(do_maximize=self.do_maximize), **self.reporter_parameters)
        self.logger = OptimizationReporter([_.name for _ in self.dimensions], **_reporter_params)

        self.tested_keys = []
        self._set_hyperparameter_space()
        self._find_similar_experiments()

        loop_start_time = datetime.now()
        self._optimization_loop()
        loop_end_time = datetime.now()
        G.log_(f"Optimization loop completed in {loop_end_time - loop_start_time}")
        G.log_(f'Best score was {self.best_score} from Experiment "{self.best_experiment}"')
        self._clean_up_optimization()

    ##################################################
    # Helper Methods:
    ##################################################
    def _optimization_loop(self, iteration=0):
        """Perform Experiment execution loop while `iteration` < `iterations`. At each iteration, an
        Experiment will be executed, its results will be logged, and it will be compared to the
        current best experiment

        Parameters
        ----------
        iteration: Int, default=0
            The current iteration in the optimization loop"""
        self.logger.print_optimization_header()

        while iteration < self.iterations:
            try:
                self._execute_experiment()
            except RepeatedExperimentError:
                # G.debug_(F'Skipping repeated Experiment: {_ex!s}\n')
                if len(self.similar_experiments) + len(self.tested_keys) >= self.search_space_size:
                    G.log_(f"Hyperparameter search space has been exhausted")
                    break
                self.skipped_iterations += 1
                continue
            except StopIteration:
                if len(self.similar_experiments) + len(self.tested_keys) >= self.search_space_size:
                    G.log_(f"Hyperparameter search space has been exhausted")
                    break
                # G.debug_(f'Re-initializing hyperparameter grid after testing {len(self.tested_keys)} keys')
                self._set_hyperparameter_space()
                continue

            self.logger.print_result(
                self.current_hyperparameters_list,
                self.current_score,
                experiment_id=self.current_experiment.experiment_id,
            )

            if (
                (self.best_experiment is None)  # First evaluation
                or (self.do_maximize and (self.best_score < self.current_score))  # New best max
                or (not self.do_maximize and (self.best_score > self.current_score))  # New best min
            ):
                self.best_experiment = self.current_experiment.experiment_id
                self.best_score = self.current_score

            iteration += 1

    def _execute_experiment(self):
        """Instantiate and run a :class:`experiments.CVExperiment` after checking for duplicate keys

        Notes
        -----
        As described in the Notes of :meth:`BaseOptimizationProtocol.set_experiment_guidelines`, the
        `auto_start` kwarg of :meth:`experiments.CVExperiment.__init__` is set to False in order to
        check for duplicated keys"""
        self._update_current_hyperparameters()

        self.current_experiment = CVExperiment(
            # model=None,  # TODO: May need to pass `model` from `set_experiment_guidelines`
            model_initializer=self.model_initializer,
            model_init_params=self.current_init_params,
            model_extra_params=self.current_extra_params,
            feature_selector=self.feature_selector,
            preprocessing_pipeline=self.preprocessing_pipeline,
            preprocessing_params=self.preprocessing_params,
            notes=self.notes,
            do_raise_repeated=self.do_raise_repeated,
            auto_start=False,
        )

        self.current_experiment.preparation_workflow()

        # Future Hunter, if multi-cross_experiment_keys ever supported, this will be a problem. Should've fixed it earlier, dummy
        if self.current_experiment.hyperparameter_key.key not in self.tested_keys:
            self.tested_keys.append(self.current_experiment.hyperparameter_key.key)

        self.current_experiment.experiment_workflow()
        self.current_score = get_path(
            self.current_experiment.last_evaluation_results, self.target_metric
        )
        self.successful_iterations += 1
        self._clean_up_experiment()

    @staticmethod
    def _clean_up_optimization():
        """Perform any cleanup necessary after completion of the optimization loop. Most notably,
        this handles removal of temporary model files created for Keras optimization"""
        for (root, dirs, files) in walk(TEMP_MODULES_DIR_PATH, topdown=False):
            for file in files:
                if file.startswith("__temp_"):
                    remove(f"{root}/{file}")
            try:
                rmdir(root)
            except OSError:
                G.warn_(f"Unidentified file found in temporary directory: {root}")

    def _clean_up_experiment(self):
        """Perform any cleanup necessary after completion of an Experiment"""
        if self.module_name == "keras":
            K.clear_session()

    def _update_current_hyperparameters(self):
        """Update :attr:`current_init_params`, and :attr:`current_extra_params` according to the
        upcoming set of hyperparameters to be searched"""
        current_hyperparameters = self._get_current_hyperparameters().items()

        init_params = {
            _k[1:]: _v for _k, _v in current_hyperparameters if _k[0] == "model_init_params"
        }
        extra_params = {
            _k[1:]: _v for _k, _v in current_hyperparameters if _k[0] == "model_extra_params"
        }
        # TODO: Replace above two with `general_utils.subdict` call that modifies key to a slice

        # FLAG: At this point, `dummy_layers` shows "kernel_initializer" as `orthogonal` instance with "__hh" attrs
        # FLAG: HOWEVER, the `orthogonal` instance does have `gain` set to the correct dummy value, ...
        # FLAG: ... so it might be ok, as long as experiment matching can still work with that

        self.current_init_params = deep_restricted_update(
            self.model_init_params, init_params, iter_attrs=self.init_iter_attrs
        )
        self.current_extra_params = deep_restricted_update(
            self.model_extra_params, extra_params, iter_attrs=self.extra_iter_attrs
        )

        if (self.module_name == "keras") and ("callbacks" in self.current_extra_params):
            self.current_extra_params["callbacks"] = reinitialize_callbacks(
                self.current_extra_params["callbacks"]
            )

        # No need to reinitialize Keras `initializers` - Their values are passed to `build_fn` via extra `params`

    ##################################################
    # Abstract Methods:
    ##################################################
    @abstractmethod
    def _set_hyperparameter_space(self):
        """Initialize :attr:`space` according to the provided hyperparameter search dimensions"""
        raise NotImplementedError()

    @abstractmethod
    def _get_current_hyperparameters(self):
        """Retrieve the upcoming set of hyperparameters to be searched

        Returns
        -------
        current_hyperparameters: Dict
            The next set of hyperparameters that will be searched"""
        raise NotImplementedError()

    @property
    @abstractmethod
    def search_space_size(self):
        """The number of different hyperparameter permutations possible given the current
        hyperparameter search space"""
        raise NotImplementedError()

    ##################################################
    # Utility Methods:
    ##################################################
    def _preparation_workflow(self):
        """Perform housekeeping tasks to prepare for core functionality like validating the
        `Environment` and parameters, and updating the verbosity of individual Experiments"""
        self._validate_environment()
        self._validate_parameters()
        self._update_verbosity()

    @staticmethod
    def _validate_environment():
        """Check that there is a currently active and unoccupied Environment instance"""
        if G.Env is None:
            raise EnvironmentInactiveError()
        if G.Env.current_task is None:
            G.log_(f'Validated Environment with key: "{G.Env.cross_experiment_key}"')
        else:
            raise EnvironmentInvalidError("Must finish current task before starting a new one")

    def _validate_parameters(self):
        """Ensure provided input parameters are properly formatted"""
        self.target_metric = get_formatted_target_metric(
            self.target_metric, G.Env.metrics, default_dataset="oof"
        )

    def _validate_guidelines(self):
        """Ensure provided Experiment guideline parameters are properly formatted"""
        target_column = G.Env.target_column
        id_column = G.Env.id_column
        train_dataset = G.Env.train_dataset.copy()

        self.feature_selector = self.feature_selector or train_dataset.columns.values
        restricted_cols = [_ for _ in target_column + [id_column] if _ is not None]
        self.feature_selector = [_ for _ in self.feature_selector if _ not in restricted_cols]

    def _find_similar_experiments(self):
        """Look for Experiments that were performed under similar conditions (algorithm and
        cross-experiment parameters)"""
        if self.read_experiments is False:
            return

        self.logger.print_saved_results_header()

        model_params = dict(
            model_init_params=self.model_init_params,
            model_extra_params=self.model_extra_params,
            preprocessing_pipeline=self.preprocessing_pipeline,
            preprocessing_params=self.preprocessing_params,
            feature_selector=self.feature_selector,
        )

        if self.module_name == "keras":
            model_params["model_init_params"]["layers"] = self.dummy_layers
            model_params["model_init_params"]["compile_params"] = self.dummy_compile_params

        experiment_finder = finder_selector(self.module_name)(
            self.algorithm_name,
            self.module_name,
            G.Env.cross_experiment_key,
            self.target_metric,
            self.space,
            G.Env.result_paths["global_leaderboard"],
            G.Env.result_paths["description"],
            model_params,
        )
        experiment_finder.find()
        self.similar_experiments = experiment_finder.similar_experiments

    def _update_verbosity(self):
        """Update :attr:`environment.Environment.reporting_params` if required by :attr:`verbose`"""
        #################### Mute non-critical console logging for Experiments ####################
        if self.verbose in [0, 1]:
            G.Env.reporting_params.setdefault("console_params", {})["level"] = "CRITICAL"

        #################### Blacklist 'script_backup' ####################
        G.Env.result_paths["script_backup"] = None