def __init__(self, gridobj, env, rewardClass=None, observationClass=CompleteObservation, with_forecast=True): """ Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid """ SerializableObservationSpace.__init__( self, gridobj, observationClass=observationClass) # TODO DOCUMENTATION !!! self.with_forecast = with_forecast # print("ObservationSpace init with rewardClass: {}".format(rewardClass)) self.parameters = copy.deepcopy(env.parameters) # for the observation, I switch between the _parameters for the environment and for the simulation self.parameters.ENV_DC = self.parameters.FORECAST_DC if rewardClass is None: self.rewardClass = env.rewardClass else: self.rewardClass = rewardClass # helpers self.action_helper_env = env.helper_action_env self.reward_helper = RewardHelper(rewardClass=self.rewardClass) self.reward_helper.initialize(env) other_rewards = { k: v.rewardClass for k, v in env.other_rewards.items() } # TODO here: have another backend maybe self._backend_obs = env.backend.copy() _ObsEnv_class = _ObsEnv.init_grid(self._backend_obs) self.obs_env = _ObsEnv_class( backend_instanciated=self._backend_obs, obsClass=self.observationClass, parameters=env.parameters, reward_helper=self.reward_helper, action_helper=self.action_helper_env, thermal_limit_a=env._thermal_limit_a, legalActClass=env.legalActClass, donothing_act=env.helper_action_player(), other_rewards=other_rewards, completeActionClass=env.helper_action_env.actionClass, helper_action_class=env.helper_action_class, helper_action_env=env.helper_action_env) for k, v in self.obs_env.other_rewards.items(): v.initialize(env) self._empty_obs = self.observationClass( obs_env=self.obs_env, action_helper=self.action_helper_env) self._update_env_time = 0.
def __init__(self, gridobj, env, rewardClass=None, observationClass=CompleteObservation, actionClass=None, with_forecast=True): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid """ if actionClass is None: from grid2op.Action import CompleteAction actionClass = CompleteAction SerializableObservationSpace.__init__(self, gridobj, observationClass=observationClass) self.with_forecast = with_forecast self._simulate_parameters = copy.deepcopy(env.parameters) if rewardClass is None: self._reward_func = env._reward_helper.template_reward else: self._reward_func = rewardClass # helpers self.action_helper_env = env._helper_action_env self.reward_helper = RewardHelper(reward_func=self._reward_func) self.reward_helper.initialize(env) other_rewards = {k: v.rewardClass for k, v in env.other_rewards.items()} # TODO here: have another backend maybe self._backend_obs = env.backend.copy() _ObsEnv_class = _ObsEnv.init_grid(type(env.backend), force_module=_ObsEnv.__module__) setattr(sys.modules[_ObsEnv.__module__], _ObsEnv_class.__name__, _ObsEnv_class) self.obs_env = _ObsEnv_class(backend_instanciated=self._backend_obs, obsClass=observationClass, # do not put self.observationClass otherwise it's initialized twice parameters=self._simulate_parameters, reward_helper=self.reward_helper, action_helper=self.action_helper_env, thermal_limit_a=env.get_thermal_limit(), legalActClass=copy.deepcopy(env._legalActClass), other_rewards=other_rewards, helper_action_class=env._helper_action_class, helper_action_env=env._helper_action_env, epsilon_poly=env._epsilon_poly, tol_poly=env._tol_poly, ) for k, v in self.obs_env.other_rewards.items(): v.initialize(env) self._empty_obs = self._template_obj self._update_env_time = 0.