def __init__(self, mutable_params: Union[dict, list], abort_reward: int, kernel: Kern, gp_params: Dict[str, Any], ctrls: List[Controller], obs_template: Mapping[str, List[Union[List[str], np.ndarray]]], obs_varnames: List[str] = None, **kwargs): """ Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely increase the performance. :param mutable_params: safe inital controller parameters to adopt :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of limit exceeded :param kernel: kernel for the Gaussian process unsing GPy :param gp_params: kernel parameters like bounds and lengthscale :param ctrls: Controllers that are feed with the observations and exert actions on the environment :param obs_template: Template describing how the observation array should be transformed and passed to the internal controllers. The key must match the name field of an internal controller. The values are a list of: - list of strings - matching variable names of the state - must match self.obs_varnames - will be substituted by the values on runtime - will be passed as an np.array of floats to the controller - np.ndarray of floats (to be passed statically to the controller) - a mixture of static and dynamic values in one parameter is not supported for performance reasons. The values will be passed as parameters to the controllers step function. :param obs_varnames: list of variable names that match the values of the observations passed in the act function. Will be automatically set by the Runner class """ self.params = MutableParams( list(mutable_params.values()) if isinstance(mutable_params, dict ) else mutable_params) self.kernel = kernel self.bounds = gp_params['bounds'] self.noise_var = gp_params['noise_var'] self.prior_mean = gp_params['prior_mean'] self.safe_threshold = gp_params['safe_threshold'] self.explore_threshold = gp_params['explore_threshold'] self.abort_reward = abort_reward self.episode_reward = None self.optimizer = None self.inital_performance = None self.last_best_performance = None self.performance = None self._iterations = 0 super().__init__(ctrls, obs_template, obs_varnames, **kwargs) self.history.cols = ['J', 'Params']
def __init__(self, mutable_params: Union[dict, list], abort_reward: int, kernel: Kern, gp_params: Dict[str, Any], ctrls: Dict[str, Controller], observation_action_mapping: dict, history=EmptyHistory()): """ Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely increase the performance. :param mutable_params: safe inital controller parameters to adopt :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of limit exceeded :param kernel: kernel for the Gaussian process unsing GPy :param gp_params: kernel parameters like bounds and lengthscale :param ctrls: Controllers that are feed with the observations and exert actions on the environment :param observation_action_mapping: form controller keys to observation keys, whose observation values will be passed to the controller :param history:Storage of internal data """ self.params = MutableParams( list(mutable_params.values()) if isinstance(mutable_params, dict ) else mutable_params) self.kernel = kernel self.bounds = gp_params['bounds'] self.noise_var = gp_params['noise_var'] self.prior_mean = gp_params['prior_mean'] self.safe_threshold = gp_params['safe_threshold'] self.explore_threshold = gp_params['explore_threshold'] self.abort_reward = abort_reward self.episode_reward = None self.optimizer = None self.inital_Performance = None self._iterations = 0 super().__init__(ctrls, observation_action_mapping, history) self.history.cols = ['J', 'Params']
class SafeOptAgent(StaticControlAgent): def __init__(self, mutable_params: Union[dict, list], abort_reward: int, kernel: Kern, gp_params: Dict[str, Any], ctrls: Dict[str, Controller], observation_action_mapping: dict, history=EmptyHistory()): """ Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely increase the performance. :param mutable_params: safe inital controller parameters to adopt :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of limit exceeded :param kernel: kernel for the Gaussian process unsing GPy :param gp_params: kernel parameters like bounds and lengthscale :param ctrls: Controllers that are feed with the observations and exert actions on the environment :param observation_action_mapping: form controller keys to observation keys, whose observation values will be passed to the controller :param history:Storage of internal data """ self.params = MutableParams( list(mutable_params.values()) if isinstance(mutable_params, dict ) else mutable_params) self.kernel = kernel self.bounds = gp_params['bounds'] self.noise_var = gp_params['noise_var'] self.prior_mean = gp_params['prior_mean'] self.safe_threshold = gp_params['safe_threshold'] self.explore_threshold = gp_params['explore_threshold'] self.abort_reward = abort_reward self.episode_reward = None self.optimizer = None self.inital_Performance = None self._iterations = 0 super().__init__(ctrls, observation_action_mapping, history) self.history.cols = ['J', 'Params'] def reset(self): """ Resets the kernel, episodic reward and the optimizer """ # reinstantiate kernel kernel_params = self.kernel.to_dict() cls_name = kernel_params['class'] mod = importlib.import_module('.'.join(cls_name.split('.')[:-1])) cls = getattr(mod, cls_name.split('.')[-1]) remaining_params = { k: v for k, v in kernel_params.items() if k not in {'class', 'useGPU'} } self.kernel = cls(**remaining_params) self.params.reset() self.optimizer = None self.episode_reward = 0 self.inital_Performance = None self._iterations = 0 return super().reset() def observe(self, reward, terminated): """ Makes an observation of the enviroment. If terminated, then caclulates the performance and the next values for the parameters using safeopt :param reward: reward of the simulation step :param terminated: True if episode is over or aborted :return: """ self._iterations += 1 self.episode_reward += reward or 0 if terminated: # calculate MSE, divide Summed error by length of measurement self.episode_reward = self.episode_reward / self._iterations # safeopt update step self.update_params() # reset for new episode self.prepare_episode() # on other steps we don't need to do anything def update_params(self): """ Sets up the Gaussian process in the first episodes, updates the parameters in the following. """ if self.optimizer is None: # First Iteration # self.inital_Performance = 1 / self.episode_reward self.inital_Performance = self.episode_reward # Norm for Safe-point # J = 1 / self.episode_reward / self.inital_Performance J = self.inital_Performance # Define Mean "Offset": Like BK: Assume Mean = Threshold (BK = 0, now = 20% below first (safe) J: means: if # new Performance is 20 % lower than the inital we assume as unsafe) mf = GPy.core.Mapping(len(self.bounds), 1) mf.f = lambda x: self.prior_mean * J mf.update_gradients = lambda a, b: 0 mf.gradients_X = lambda a, b: 0 gp = GPy.models.GPRegression(np.array([self.params[:]]), np.array([[J]]), self.kernel, noise_var=self.noise_var, mean_function=mf) self.optimizer = SafeOptSwarm(gp, self.safe_threshold * J, bounds=self.bounds, threshold=self.explore_threshold * J) else: if np.isnan(self.episode_reward): # set r to doubled (negative!) initial reward self.episode_reward = self.abort_reward * self.inital_Performance # toDo: set reward to -inf and stop agent? # warning mit logger logger.warning( 'UNSAFE! Limit exceeded, epsiode abort, give a reward of {} times the' 'initial reward'.format(self.abort_reward)) J = self.episode_reward self.optimizer.add_new_data_point(self.params[:], J) self.history.append([J, self.params[:]]) self.params[:] = self.optimizer.optimize() def render(self): """ Renders the results for the performance """ plt.figure() self.optimizer.plot(1000) plt.show() def prepare_episode(self): """ Prepares the next episode; reset iteration counting variable and call superclass to reset controllers """ self._iterations = 0 super().prepare_episode()
class SafeOptAgent(StaticControlAgent): def __init__(self, mutable_params: Union[dict, list], abort_reward: int, kernel: Kern, gp_params: Dict[str, Any], ctrls: List[Controller], obs_template: Mapping[str, List[Union[List[str], np.ndarray]]], obs_varnames: List[str] = None, **kwargs): """ Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely increase the performance. :param mutable_params: safe inital controller parameters to adopt :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of limit exceeded :param kernel: kernel for the Gaussian process unsing GPy :param gp_params: kernel parameters like bounds and lengthscale :param ctrls: Controllers that are feed with the observations and exert actions on the environment :param obs_template: Template describing how the observation array should be transformed and passed to the internal controllers. The key must match the name field of an internal controller. The values are a list of: - list of strings - matching variable names of the state - must match self.obs_varnames - will be substituted by the values on runtime - will be passed as an np.array of floats to the controller - np.ndarray of floats (to be passed statically to the controller) - a mixture of static and dynamic values in one parameter is not supported for performance reasons. The values will be passed as parameters to the controllers step function. :param obs_varnames: list of variable names that match the values of the observations passed in the act function. Will be automatically set by the Runner class """ self.params = MutableParams( list(mutable_params.values()) if isinstance(mutable_params, dict ) else mutable_params) self.kernel = kernel self.bounds = gp_params['bounds'] self.noise_var = gp_params['noise_var'] self.prior_mean = gp_params['prior_mean'] self.safe_threshold = gp_params['safe_threshold'] self.explore_threshold = gp_params['explore_threshold'] self.abort_reward = abort_reward self.episode_reward = None self.optimizer = None self.inital_performance = None self.last_best_performance = None self.performance = None self._iterations = 0 super().__init__(ctrls, obs_template, obs_varnames, **kwargs) self.history.cols = ['J', 'Params'] def reset(self): """ Resets the kernel, episodic reward and the optimizer """ # reinstantiate kernel kernel_params = self.kernel.to_dict() cls_name = kernel_params['class'] mod = importlib.import_module('.'.join(cls_name.split('.')[:-1])) cls = getattr(mod, cls_name.split('.')[-1]) remaining_params = { k: v for k, v in kernel_params.items() if k not in {'class', 'useGPU'} } self.kernel = cls(**remaining_params) self.params.reset() self.optimizer = None self.episode_reward = 0 self.inital_performance = None self.last_best_performance = None self.performance = None self._iterations = 0 return super().reset() def observe(self, reward, terminated): """ Makes an observation of the enviroment. If terminated, then caclulates the performance and the next values for the parameters using safeopt :param reward: reward of the simulation step :param terminated: True if episode is over or aborted :return: """ self._iterations += 1 self.episode_reward += reward or 0 if terminated: # calculate MSE, divide Summed error by length of measurement self.performance = self.episode_reward / self._iterations # safeopt update step self.update_params() # reset for new episode self.prepare_episode() # on other steps we don't need to do anything def update_params(self): """ Sets up the Gaussian process in the first episodes, updates the parameters in the following. """ if self.optimizer is None: # First Iteration self.inital_performance = self.performance # Norm for Safe-point # J = 1 / self.episode_reward / self.inital_Performance self.last_best_performance = self.performance # Define Mean "Offset": Like BK: Assume Mean = Threshold (BK = 0, now = 20% below first (safe) J: means: if # new Performance is 20 % lower than the inital we assume as unsafe) mf = GPy.core.Mapping(len(self.bounds), 1) mf.f = lambda x: self.prior_mean * self.performance mf.update_gradients = lambda a, b: 0 mf.gradients_X = lambda a, b: 0 gp = GPy.models.GPRegression( np.array([self.params[:]]), # noqa np.array([[self.performance]]), self.kernel, noise_var=self.noise_var, mean_function=mf) self.optimizer = SafeOptSwarm( gp, self.safe_threshold * self.performance, bounds=self.bounds, threshold=self.explore_threshold * self.performance) else: if np.isnan(self.episode_reward): # set r to doubled (negative!) initial reward self.performance = self.abort_reward * self.inital_performance # toDo: set reward to -inf and stop agent? # warning mit logger logger.warning( 'UNSAFE! Limit exceeded, epsiode abort, give a reward of {} times the' 'initial reward'.format(self.abort_reward)) self.optimizer.add_new_data_point(self.params[:], self.performance) self.history.append([self.performance, self.params[:]]) self.params[:] = self.optimizer.optimize() if self.has_improved: # if performance has improved store the current last index of the df self.best_episode = self.history.df.shape[0] - 1 self.last_best_performance = self.performance def render(self) -> Figure: """ Renders the results for the performance """ figure, ax = plt.subplots() if self.optimizer.x.size > 3: # check if the dimensionality is less then 4 dimension logger.info( 'Plotting of GP landscape not possible for then 3 dimensions') return figure self.optimizer.plot(1000, figure=figure) # mark best performance in green y, x = self.history.df.loc[self.best_episode, ['J', 'Params']] if len(x) == 1: ax.scatter([x], [y], s=20 * 10, marker='x', linewidths=3, color='g') elif len(x) == 2: ax.plot(x[0], x[1], 'og') else: logger.warning('Choose appropriate number of control parameters') plt.show() return figure def prepare_episode(self): """ Prepares the next episode; reset iteration counting variable and call superclass to reset controllers """ self._iterations = 0 super().prepare_episode() @property def has_improved(self) -> bool: """ Defines if the performance increased or stays constant :return: True, if performance was increased or equal, else False """ return self.performance >= self.last_best_performance
# For 1D example, if Ki should be adjusted elif adjust == 'Ki': mutable_params = dict(currentI=MutableFloat(5)) current_dqp_iparams = PI_params(kP=0.005, kI=mutable_params['currentI'], limits=(-1, 1)) # For 2D example, choose Kp and Ki as mutable parameters elif adjust == 'Kpi': mutable_params = dict(currentP=MutableFloat(0.04), currentI=MutableFloat(11.8)) current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=mutable_params['currentI'], limits=(-1, 1)) # Define a current sourcing inverter as master inverter using the pi and droop parameters from above ctrl = MultiPhaseDQCurrentSourcingController(current_dqp_iparams, ts_sim=delta_t, ts_ctrl=undersample * delta_t, name='master', f_nom=net.freq_nom) i_ref = MutableParams([MutableFloat(f) for f in i_ref1]) ##################################### # Definition of the optimization agent # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example # Arguments described above # History is used to store results agent = SafeOptAgent(mutable_params, abort_reward, j_min, kernel, dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, safe_threshold=safe_threshold, explore_threshold=explore_threshold), [ctrl], dict(master=[[f'lc.inductor{k}.i' for k in '123'], i_ref]), history=FullHistory() ) #####################################
def run_experiment(len_kp, len_ki): if isfile(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt'): with open(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt', 'r') as f: return strtobool(f.read().strip()) rew = Reward(i_limit=iLimit, i_nominal=iNominal, mu_c=mu, max_episode_steps=max_episode_steps, obs_dict=[[f'lc.inductor{k}.i' for k in '123'], 'master.phase', [f'master.SPI{k}' for k in 'dq0']]) ##################################### # Definitions for the GP prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the # initial set noise_var = 0.001 # 0.001 ** 2 # measurement noise sigma_omega prior_var = 2 # prior variance of the GP bounds = None lengthscale = None if adjust == 'Kp': bounds = [(0.0001, 0.1)] # bounds on the input variable Kp lengthscale = [ .025 ] # length scale for the parameter variation [Kp] for the GP # For 1D example, if Ki should be adjusted if adjust == 'Ki': bounds = [(0, 20)] # bounds on the input variable Ki lengthscale = [ 10 ] # length scale for the parameter variation [Ki] for the GP # For 2D example, choose Kp and Ki as mutable parameters (below) and define bounds and lengthscale for both of them if adjust == 'Kpi': bounds = [(0.001, 0.07), (2, 150)] lengthscale = [0.012, 30.] df_len = pd.DataFrame({ 'lengthscale': lengthscale, 'bounds': bounds, 'balanced_load': balanced_load, 'barrier_param_mu': mu }) # The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times # the initial performance: safe_threshold = 0.8 means. Performance measurement for optimization are seen as # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) # parameter set safe_threshold = 0 j_min = cal_j_min(phase_shift, amp_dev) # Used for normalization # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop # expanding points eventually. # The following variable is multiplied with the first performance of the initial set by the factor below: explore_threshold = 0 # Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of # limit exceeded # has to be negative due to normalized performance (regarding J_init = 1) abort_reward = 100 * j_min # Definition of the kernel kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) ##################################### # Definition of the controllers mutable_params = None current_dqp_iparams = None if adjust == 'Kp': # mutable_params = parameter (Kp gain of the current controller of the inverter) to be optimized using # the SafeOpt algorithm mutable_params = dict(currentP=MutableFloat(0.04)) # Define the PI parameters for the current controller of the inverter current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=12, limits=(-1, 1)) # For 1D example, if Ki should be adjusted elif adjust == 'Ki': mutable_params = dict(currentI=MutableFloat(5)) current_dqp_iparams = PI_params(kP=0.005, kI=mutable_params['currentI'], limits=(-1, 1)) # For 2D example, choose Kp and Ki as mutable parameters elif adjust == 'Kpi': mutable_params = dict(currentP=MutableFloat(0.04), currentI=MutableFloat(11.8)) current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=mutable_params['currentI'], limits=(-1, 1)) # Define a current sourcing inverter as master inverter using the pi and droop parameters from above ctrl = MultiPhaseDQCurrentSourcingController(current_dqp_iparams, delta_t, undersampling=undersample, name='master', f_nom=net.freq_nom) i_ref = MutableParams([MutableFloat(f) for f in i_ref1]) ##################################### # Definition of the optimization agent # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example # Arguments described above # History is used to store results agent = SafeOptAgent( mutable_params, abort_reward, j_min, kernel, dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, safe_threshold=safe_threshold, explore_threshold=explore_threshold), [ctrl], dict(master=[[f'lc.inductor{k}.i' for k in '123'], i_ref]), history=FullHistory(), ) ##################################### # Definition of the environment using a FMU created by OpenModelica # (https://www.openmodelica.org/) # Using an inverter supplying a load # - using the reward function described above as callable in the env # - viz_cols used to choose which measurement values should be displayed (here, only the 3 currents across the # inductors of the inverters are plotted. Labels and grid is adjusted using the PlotTmpl (For more information, # see UserGuide) # - inputs to the models are the connection points to the inverters (see user guide for more details) # - model outputs are the the 3 currents through the inductors and the 3 voltages across the capacitors if include_simulate: # Defining unbalanced loads sampling from Gaussian distribution with sdt = 0.2*mean # r_load = Load(R, 0.1 * R, balanced=balanced_load, tolerance=0.1) # l_load = Load(L, 0.1 * L, balanced=balanced_load, tolerance=0.1) # i_noise = Noise([0, 0, 0], [0.0023, 0.0015, 0.0018], 0.0005, 0.32) # if no noise should be included: r_load = Load(R, 0 * R, balanced=balanced_load) l_load = Load(L, 0 * L, balanced=balanced_load) def reset_loads(): r_load.reset() l_load.reset() plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, show_plots=show_plots) def ugly_foo(t): if t >= .05: i_ref[:] = i_ref2 else: i_ref[:] = i_ref1 return partial(l_load.give_value, n=2)(t) env = gym.make( 'openmodelica_microgrid_gym:ModelicaEnv_test-v1', # reward_fun=Reward().rew_fun, reward_fun=rew.rew_fun_c, # time_step=delta_t, viz_cols=[ PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], callback=plotter.xylables_i_abc, color=[['b', 'r', 'g'], ['b', 'r', 'g']], style=[[None], ['--']]), PlotTmpl([[f'master.m{i}' for i in 'abc']], callback=lambda fig: plotter.update_axes( fig, title='Simulation', ylabel='$m_{\mathrm{abc}}\,/\,\mathrm{}$')), PlotTmpl([[f'master.CVI{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], callback=plotter.xylables_i_dq0, color=[['b', 'r', 'g'], ['b', 'r', 'g']], style=[[None], ['--']]) ], log_level=logging.INFO, viz_mode='episode', max_episode_steps=max_episode_steps, model_params={ 'lc.resistor1.R': partial(r_load.give_value, n=0), 'lc.resistor2.R': partial(r_load.give_value, n=1), 'lc.resistor3.R': partial(r_load.give_value, n=2), 'lc.inductor1.L': partial(l_load.give_value, n=0), 'lc.inductor2.L': partial(l_load.give_value, n=1), 'lc.inductor3.L': ugly_foo }, model_path='../../omg_grid/grid.paper.fmu', # model_path='../omg_grid/omg_grid.Grids.Paper_SC.fmu', net=net, history=FullHistory(), action_time_delay=1 * undersample) runner = MonteCarloRunner(agent, env) runner.run(num_episodes, n_mc=n_MC, visualise=True, prepare_mc_experiment=reset_loads) with open(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt', 'w') as f: print(f'{agent.unsafe}', file=f) return agent.unsafe