Exemplo n.º 1
0
    def test_train_eval_multiprocess(self):
        # test only done for this baselines because the feature is coded in base class in DeepQAgent
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env_init = grid2op.make("rte_case5_example", test=True)
            env = make_multi_env(env_init=env_init, nb_env=2)
            li_attr_obs_X = ["prod_p", "load_p", "rho"]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes = [100, 50, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes,
                'activs':
                ["relu" for _ in sizes],  # all relu activation function
                "list_attr_obs": li_attr_obs_X
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_dqn(env,
                      name=nm_,
                      iterations=100,
                      save_path=tmp_dir,
                      load_path=None,
                      logs_dir=tmp_dir,
                      training_param=tp,
                      verbose=False,
                      kwargs_converters=kwargs_converters,
                      kwargs_archi=kwargs_archi)

            baseline_2 = eval_dqn(env_init,
                                  name=nm_,
                                  load_path=tmp_dir,
                                  logs_path=tmp_dir,
                                  nb_episode=1,
                                  nb_process=1,
                                  max_steps=30,
                                  verbose=False,
                                  save_gif=False)
Exemplo n.º 2
0
               chronics_class=MultifolderWithCache
               )

    if env.name == "l2rpn_wcci_2020":
        env.chronics_handler.real_data.set_filter(lambda x: re.match(".*Scenario_february_.*$", x) is not None)
        env.chronics_handler.real_data.reset()
    elif env.name == "l2rpn_case14_sandbox":
        # all data can be loaded into memory
        # env.chronics_handler.real_data.set_filter(lambda x: True)
        env.chronics_handler.real_data.reset()

    # env.chronics_handler.real_data.
    env_init = env
    if args.nb_env > 1:
        from l2rpn_baselines.utils import make_multi_env
        env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

    tp = TrainingParam()
    # NN training
    tp.lr = 1e-5
    tp.lr_decay_steps = 300000
    tp.minibatch_size = 32 * int(args.nb_env)
    tp.update_freq = tp.minibatch_size / 2

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = None  # None to deactivate it
    tp.min_iter = None
    tp.update_nb_iter = None  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = None  # None to deactivate it