def __init__(self,
                 domain: Domain,
                 transform: Transform = None,
                 pretrained_model_config_path=None,
                 model_size="standard",
                 **kwargs):
        Strategy.__init__(self, domain, transform)

        # Create directories to store temporary files
        summit_config_path = get_summit_config_path()
        self.uuid_val = uuid.uuid4()  # Unique identifier for this run
        tmp_dir = summit_config_path / "dro" / str(self.uuid_val)
        if not os.path.isdir(tmp_dir):
            os.makedirs(tmp_dir)

        self._pretrained_model_config_path = pretrained_model_config_path
        self._infer_model_path = tmp_dir
        self._model_size = model_size
        self.prev_param = None
Пример #2
0
    def run(self, **kwargs):
        """  Run the closed loop experiment cycle

        Parameters
        ----------
        save_freq : int, optional
            The frequency with which to checkpoint the state of the optimization. Defaults to None.
        save_at_end : bool, optional
            Save the state of the optimization at the end of a run, even if it is stopped early.
            Default is True.
        save_dir : str, optional
            The directory to save checkpoints locally. Defaults to `~/.summit/runner`.
        """
        # Set parameters
        prev_res = None
        self.restarts = 0
        n_objs = len(self.experiment.domain.output_variables)
        fbest_old = np.zeros(n_objs)
        fbest = np.zeros(n_objs)

        # Serialization
        save_freq = kwargs.get("save_freq")
        save_dir = kwargs.get("save_dir", str(get_summit_config_path()))
        self.uuid_val = uuid.uuid4()
        save_dir = pathlib.Path(save_dir) / "runner" / str(self.uuid_val)
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
        save_at_end = kwargs.get("save_at_end", True)

        # Create neptune experiment

        if self.neptune_exp is None:
            session = Session(backend=HostedNeptuneBackend())
            proj = session.get_project(self.neptune_project)
            neptune_exp = proj.create_experiment(
                name=self.neptune_experiment_name,
                description=self.neptune_description,
                upload_source_files=self.neptune_files,
                logger=self.logger,
                tags=self.neptune_tags,
            )
        else:
            neptune_exp = self.neptune_exp

        # Run optimization loop
        for i in progress_bar(range(self.max_iterations)):
            # Get experiment suggestions
            if i == 0:
                k = self.n_init if self.n_init is not None else self.batch_size
                next_experiments = self.strategy.suggest_experiments(
                    num_experiments=k)
            else:
                next_experiments = self.strategy.suggest_experiments(
                    num_experiments=self.batch_size, prev_res=prev_res)
            prev_res = self.experiment.run_experiments(next_experiments)

            # Send best objective values to Neptune
            for j, v in enumerate(self.experiment.domain.output_variables):
                if i > 0:
                    fbest_old[j] = fbest[j]
                if v.maximize:
                    fbest[j] = self.experiment.data[v.name].max()
                elif not v.maximize:
                    fbest[j] = self.experiment.data[v.name].min()

                neptune_exp.send_metric(v.name + "_best", fbest[j])

            # Send hypervolume for multiobjective experiments
            if n_objs > 1:
                output_names = [
                    v.name for v in self.experiment.domain.output_variables
                ]
                data = self.experiment.data[output_names].copy()
                for v in self.experiment.domain.output_variables:
                    if v.maximize:
                        data[(v.name, "DATA")] = -1.0 * data[v.name]
                y_pareto, _ = pareto_efficient(data.to_numpy(), maximize=False)
                hv = hypervolume(y_pareto, self.ref)
                neptune_exp.send_metric("hypervolume", hv)

            # Save state
            if save_freq is not None:
                file = save_dir / f"iteration_{i}.json"
                if i % save_freq == 0:
                    self.save(file)
                    neptune_exp.send_artifact(str(file))
                if not save_dir:
                    os.remove(file)

            # Stop if no improvement
            compare = np.abs(fbest - fbest_old) > self.f_tol
            if all(compare) or i <= 1:
                nstop = 0
            else:
                nstop += 1

            if self.max_same is not None:
                if nstop >= self.max_same and self.restarts >= self.max_restarts:
                    self.logger.info(
                        f"{self.strategy.__class__.__name__} stopped after {i+1} iterations and {self.restarts} restarts."
                    )
                    break
                elif nstop >= self.max_same:
                    nstop = 0
                    prev_res = None
                    self.strategy.reset()
                    self.restarts += 1

        # Save at end
        if save_at_end:
            file = save_dir / f"iteration_{i}.json"
            self.save(file)
            neptune_exp.send_artifact(str(file))
            if not save_dir:
                os.remove(file)

        # Stop the neptune experiment
        neptune_exp.stop()
Пример #3
0
    def run(self, **kwargs):
        """  Run the closed loop experiment cycle

        Parameters
        ----------
        save_freq : int, optional
            The frequency with which to checkpoint the state of the optimization. Defaults to None.
        save_at_end : bool, optional
            Save the state of the optimization at the end of a run, even if it is stopped early.
            Default is True.
        save_dir : str, optional
            The directory to save checkpoints locally. Defaults to not saving locally.
        """
        save_freq = kwargs.get("save_freq")
        save_dir = kwargs.get("save_dir", str(get_summit_config_path()))
        self.uuid_val = uuid.uuid4()
        save_dir = pathlib.Path(save_dir) / "runner" / str(self.uuid_val)
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
        save_at_end = kwargs.get("save_at_end", True)

        n_objs = len(self.experiment.domain.output_variables)
        fbest_old = np.zeros(n_objs)
        fbest = np.zeros(n_objs)
        prev_res = None
        self.restarts = 0

        for i in progress_bar(range(self.max_iterations)):
            # Get experiment suggestions
            if i == 0:
                k = self.n_init if self.n_init is not None else self.batch_size
                next_experiments = self.strategy.suggest_experiments(
                    num_experiments=k)
            else:
                next_experiments = self.strategy.suggest_experiments(
                    num_experiments=self.batch_size, prev_res=prev_res)
            prev_res = self.experiment.run_experiments(next_experiments)

            for j, v in enumerate(self.experiment.domain.output_variables):
                if i > 0:
                    fbest_old[j] = fbest[j]
                if v.maximize:
                    fbest[j] = self.experiment.data[v.name].max()
                elif not v.maximize:
                    fbest[j] = self.experiment.data[v.name].min()

            # Save state
            if save_freq is not None:
                file = save_dir / f"iteration_{i}.json"
                if i % save_freq == 0:
                    self.save(file)

            compare = np.abs(fbest - fbest_old) > self.f_tol
            if all(compare) or i <= 1:
                nstop = 0
            else:
                nstop += 1

            if self.max_same is not None:
                if nstop >= self.max_same and self.restarts >= self.max_restarts:
                    self.logger.info(
                        f"{self.strategy.__class__.__name__} stopped after {i+1} iterations and {self.restarts} restarts."
                    )
                    break
                elif nstop >= self.max_same:
                    nstop = 0
                    prev_res = None
                    self.strategy.reset()
                    self.restarts += 1

        # Save at end
        if save_at_end:
            file = save_dir / f"iteration_{i}.json"
            self.save(file)
Пример #4
0
    def suggest_experiments(self,
                            num_experiments,
                            prev_res: DataSet = None,
                            **kwargs):
        """Suggest experiments using TSEMO

        Parameters
        ----------
        num_experiments : int
            The number of experiments (i.e., samples) to generate
        prev_res : :class:`~summit.utils.data.DataSet`, optional
            Dataset with data from previous experiments.
            If no data is passed, then latin hypercube sampling will
            be used to suggest an initial design.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments
        """
        # Suggest lhs initial design or append new experiments to previous experiments
        if prev_res is None:
            lhs = LHS(self.domain)
            self.iterations += 1
            k = num_experiments if num_experiments > 1 else 2
            return lhs.suggest_experiments(k, criterion="maximin")
        elif self.iterations == 1 and len(prev_res) == 1:
            lhs = LHS(self.domain)
            self.iterations += 1
            self.all_experiments = prev_res
            return lhs.suggest_experiments(num_experiments)
        elif prev_res is not None and self.all_experiments is None:
            self.all_experiments = prev_res
        elif prev_res is not None and self.all_experiments is not None:
            self.all_experiments = self.all_experiments.append(prev_res)

        # Get inputs (decision variables) and outputs (objectives)
        inputs, outputs = self.transform.transform_inputs_outputs(
            self.all_experiments, transform_descriptors=True)
        if inputs.shape[0] < self.domain.num_continuous_dimensions():
            self.logger.warning(
                f"The number of examples ({inputs.shape[0]}) is less the number of input dimensions ({self.domain.num_continuous_dimensions()}."
            )

        # Scale decision variables [0,1]
        inputs_scaled = (inputs - self.inputs_min.to_numpy()) / (
            self.inputs_max.to_numpy() - self.inputs_min.to_numpy())

        # Standardize objectives
        self.output_mean = outputs.mean()
        std = outputs.std()
        std[std < 1e-5] = 1e-5
        self.output_std = std
        outputs_scaled = (outputs - self.output_mean.to_numpy()
                          ) / self.output_std.to_numpy()

        # Set up models
        input_dim = self.kern_dim
        self.models = {
            v.name: gpr(
                inputs_scaled.to_numpy(),
                outputs_scaled[[v.name]].to_numpy(),
                kernel=self.kernel(input_dim=input_dim, ARD=True),
            )
            for v in self.domain.output_variables
        }

        output_dim = len(self.domain.output_variables)
        rmse_train = np.zeros(output_dim)
        rmse_train_spectral = np.zeros(output_dim)
        lengthscales = [None for _ in range(output_dim)]
        variances = [None for _ in range(output_dim)]
        noises = [None for _ in range(output_dim)]
        rffs = [None for _ in range(output_dim)]
        i = 0
        num_restarts = kwargs.get("num_restarts",
                                  100)  # This is a kwarg solely for debugging
        self.logger.debug(
            f"Fitting models (number of optimization restarts={num_restarts})\n"
        )
        for name, model in self.models.items():
            # Constrain hyperparameters
            model.kern.lengthscale.constrain_bounded(np.sqrt(1e-3),
                                                     np.sqrt(1e3),
                                                     warning=False)
            model.kern.lengthscale.set_prior(GPy.priors.LogGaussian(0, 10),
                                             warning=False)
            model.kern.variance.constrain_bounded(np.sqrt(1e-3),
                                                  np.sqrt(1e3),
                                                  warning=False)
            model.kern.variance.set_prior(GPy.priors.LogGaussian(-6, 10),
                                          warning=False)
            model.Gaussian_noise.constrain_bounded(np.exp(-6),
                                                   1,
                                                   warning=False)

            # Train model
            model.optimize_restarts(num_restarts=num_restarts,
                                    max_iters=10000,
                                    parallel=True,
                                    verbose=False)

            # self.logger.info model hyperparameters
            lengthscales[i] = model.kern.lengthscale.values
            variances[i] = model.kern.variance.values[0]
            noises[i] = model.Gaussian_noise.variance.values[0]
            self.logger.debug(f"Model {name} lengthscales: {lengthscales[i]}")
            self.logger.debug(f"Model {name} variance: {variances[i]}")
            self.logger.debug(f"Model {name} noise: {noises[i]}")

            # Model validation
            rmse_train[i] = rmse(
                model.predict(inputs_scaled.to_numpy())[0],
                outputs_scaled[[name]].to_numpy(),
                mean=self.output_mean[name].values[0],
                std=self.output_std[name].values[0],
            )
            self.logger.debug(f"RMSE train {name} = {rmse_train[i].round(2)}")

            # Spectral sampling
            self.logger.debug(
                f"Spectral sampling {name} with {self.n_spectral_points} spectral points."
            )
            if type(model.kern) == GPy.kern.Exponential:
                matern_nu = 1
            elif type(model.kern) == GPy.kern.Matern32:
                matern_nu = 3
            elif type(model.kern) == GPy.kern.Matern52:
                matern_nu = 5
            elif type(model.kern) == GPy.kern.RBF:
                matern_nu = np.inf
            else:
                raise TypeError(
                    "Spectral sample currently only works with Matern type kernels, including RBF."
                )

            for _ in range(self.n_retries):
                try:
                    rffs[i] = pyrff.sample_rff(
                        lengthscales=lengthscales[i],
                        scaling=np.sqrt(variances[i]),
                        noise=noises[i],
                        kernel_nu=matern_nu,
                        X=inputs_scaled.to_numpy(),
                        Y=outputs_scaled[[name]].to_numpy()[:, 0],
                        M=self.n_spectral_points,
                    )
                    break
                except np.linalg.LinAlgError as e:
                    self.logger.error(e)
                except ValueError as e:
                    self.logger.error(e)
            if rffs[i] is None:
                raise RuntimeError(
                    f"Spectral sampling failed after {self.n_retries} retries."
                )
            sample_f = lambda x: np.atleast_2d(rffs[i](x)).T

            rmse_train_spectral[i] = rmse(
                sample_f(inputs_scaled.to_numpy()),
                outputs_scaled[[name]].to_numpy(),
                mean=self.output_mean[name].values[0],
                std=self.output_std[name].values[0],
            )
            self.logger.debug(
                f"RMSE train spectral {name} = {rmse_train_spectral[i].round(2)}"
            )

            i += 1

        # Save spectral samples
        dp_results = get_summit_config_path() / "tsemo" / str(self.uuid_val)
        os.makedirs(dp_results, exist_ok=True)
        pyrff.save_rffs(rffs, pathlib.Path(dp_results, "models.h5"))

        # NSGAII internal optimisation
        self.logger.info("Optimizing models using NSGAII.")
        optimizer = NSGA2(pop_size=self.pop_size)
        problem = TSEMOInternalWrapper(pathlib.Path(dp_results, "models.h5"),
                                       self.domain,
                                       n_var=self.kern_dim)
        termination = get_termination("n_gen", self.generations)
        self.internal_res = minimize(problem,
                                     optimizer,
                                     termination,
                                     seed=1,
                                     verbose=False)
        X = DataSet(self.internal_res.X, columns=self.columns)
        y = DataSet(self.internal_res.F,
                    columns=[v.name for v in self.domain.output_variables])

        # Select points that give maximum hypervolume improvement
        if X.shape[0] != 0 and y.shape[0] != 0:
            self.hv_imp, indices = self._select_max_hvi(
                outputs_scaled, y, num_experiments)

            # Unscale data
            X = (X *
                 (self.inputs_max.to_numpy() - self.inputs_min.to_numpy()) +
                 self.inputs_min.to_numpy())
            y = y * self.output_std.to_numpy() + self.output_mean.to_numpy()

            # Join to get single dataset with inputs and outputs
            result = X.join(y)
            result = result.iloc[indices, :]

            # Do any necessary transformations back
            result[("strategy", "METADATA")] = "TSEMO"
            result = self.transform.un_transform(result,
                                                 transform_descriptors=True)

            # Add model hyperparameters as metadata columns
            self.iterations += 1
            i = 0
            for name, model in self.models.items():
                result[(f"{name}_variance", "METADATA")] = variances[i]
                result[(f"{name}_noise", "METADATA")] = noises[i]
                for var, l in zip(self.domain.input_variables,
                                  lengthscales[i]):
                    result[(f"{name}_{var.name}_lengthscale", "METADATA")] = l
                result[("iterations", "METADATA")] = self.iterations
                i += 1
            return result
        else:
            self.logger.warning("No suggestions found.")
            self.iterations += 1
            return None
Пример #5
0
 def load(self, filepath=None):
     if filepath is None:
         filepath = get_summit_config_path() / "tsemo" / str(self.uuid_val)
         os.makedirs(filepath, exist_ok=True)
         filepath = filepath / "models.h5"
     self.rff = pyrff.load_rffs(filepath)[0]
Пример #6
0
 def save(self, filepath=None):
     if filepath is None:
         filepath = get_summit_config_path() / "tsemo" / str(self.uuid_val)
         os.makedirs(filepath, exist_ok=True)
         filepath = filepath / "models.h5"
     pyrff.save_rffs([self.rff], filepath)