示例#1
0
 def test_issue_187(self):
     """test the range of the reward class"""
     for env_name in grid2op.list_available_test_env():
         if env_name == "blank":
             continue
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
             with grid2op.make(env_name,
                               test=True,
                               reward_class=RedispReward) as env:
                 obs = env.reset()
                 obs, reward, done, info = env.step(env.action_space())
                 assert reward <= env.reward_range[
                     1], f"error for reward_max for {env_name}"
                 assert reward >= env.reward_range[
                     0], f"error for reward_min for {env_name}"
示例#2
0
    def test_custom_reward(self):
        """test i can generate the reward and use it in the envs"""
        reward_cls = RedispReward.generate_class_custom_params(
            alpha_redisph=2,
            min_load_ratio=0.15,
            worst_losses_ratio=0.05,
            min_reward=-10.,
            reward_illegal_ambiguous=0.,
            least_losses_ratio=0.015)

        for env_name in grid2op.list_available_test_env():
            if env_name == "blank":
                continue
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                with grid2op.make(env_name, test=True,
                                  reward_class=reward_cls) as env:
                    obs = env.reset()
                    obs, reward, done, info = env.step(env.action_space())
                    # test that reward is in the correct range
                    assert reward <= env.reward_range[
                        1], f"error reward > reward_max for {env_name}"
                    assert reward >= env.reward_range[
                        0], f"error reward < reward_min for {env_name}"

                    # test the parameters are effectively changed

                    # what should be computed
                    _alpha_redisph = dt_float(2)
                    _min_load_ratio = dt_float(0.15)
                    _worst_losses_ratio = dt_float(0.05)
                    _min_reward = dt_float(-10.)
                    _reward_illegal_ambiguous = dt_float(0.)
                    _least_losses_ratio = dt_float(0.015)

                    worst_marginal_cost = np.max(env.gen_cost_per_MW)
                    worst_load = dt_float(np.sum(env.gen_pmax))
                    # it's not the worst, but definitely an upper bound
                    worst_losses = dt_float(_worst_losses_ratio) * worst_load
                    worst_redisp = _alpha_redisph * np.sum(
                        env.gen_pmax)  # not realistic, but an upper bound
                    max_regret = (worst_losses +
                                  worst_redisp) * worst_marginal_cost
                    reward_min = dt_float(_min_reward)

                    least_loads = dt_float(
                        worst_load *
                        _min_load_ratio)  # half the capacity of the grid
                    least_losses = dt_float(_least_losses_ratio *
                                            least_loads)  # 1.5% of losses
                    least_redisp = dt_float(0.0)  # lower_bound is 0
                    base_marginal_cost = np.min(
                        env.gen_cost_per_MW[env.gen_cost_per_MW > 0.])
                    min_regret = (least_losses +
                                  least_redisp) * base_marginal_cost
                    reward_max = dt_float(
                        (max_regret - min_regret) / least_loads)
                    assert abs(env.reward_range[1] - reward_max) <= self.tol, \
                            f"wrong reward max computed for {env_name}"
                    assert abs(env.reward_range[0] - reward_min) <= self.tol, \
                            f"wrong reward min computed for {env_name}"
示例#3
0
 def get_list_env(self):
     res = grid2op.list_available_test_env()
     res.append(ENV_WITH_ALARM_NAME)
     return res
示例#4
0
 def get_list_env(self):
     res = grid2op.list_available_test_env()
     # env_with_alarm = os.path.join(PATH_DATA_TEST, "l2rpn_neurips_2020_track1_with_alert")
     # res.append(env_with_alarm)
     return res