Пример #1
0
        env.chronics_handler.real_data.set_filter(lambda x: re.match(".*Scenario_february_.*$", x) is not None)
        env.chronics_handler.real_data.reset()
    elif env.name == "l2rpn_case14_sandbox":
        # all data can be loaded into memory
        # env.chronics_handler.real_data.set_filter(lambda x: True)
        env.chronics_handler.real_data.reset()

    # env.chronics_handler.real_data.
    env_init = env
    if args.nb_env > 1:
        from l2rpn_baselines.utils import make_multi_env
        env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

    tp = TrainingParam()
    # NN training
    tp.lr = 1e-5
    tp.lr_decay_steps = 300000
    tp.minibatch_size = 32 * int(args.nb_env)
    tp.update_freq = tp.minibatch_size / 2

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = None  # None to deactivate it
    tp.min_iter = None
    tp.update_nb_iter = None  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = None  # None to deactivate it

    # experience replay
    tp.buffer_size = 1000000
Пример #2
0
        # from grid2op.Environment import MultiEnvironment
        # env = MultiEnvironment(int(args.nb_env), env)
        # # TODO hack i'll fix in 1.0.0
        # env.action_space = env_init.action_space
        # env.observation_space = env_init.observation_space
        # env.fast_forward_chronics = lambda x: None
        # env.chronics_handler = env_init.chronics_handler
        # env.current_obs = env_init.current_obs
        # env.set_ff()
        from l2rpn_baselines.utils import make_multi_env
        env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

    tp = TrainingParam()

    # NN training
    tp.lr = 1e-4
    tp.lr_decay_steps = 30000
    tp.minibatch_size = 32
    tp.update_freq = 16

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = 100  # None to deactivate it
    tp.min_iter = 10
    tp.update_nb_iter = 100  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = 3  # None to deactivate it

    # experience replay
    tp.buffer_size = 1000000