Exemplo n.º 1
0
    def test_loadback(self):
        tp = TrainingParam()
        tmp_dir = tempfile.mkdtemp()
        tp.save_as_json(tmp_dir, "test.json")

        tp2 = TrainingParam.from_json(os.path.join(tmp_dir, "test.json"))
        assert tp2 == tp
Exemplo n.º 2
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            li_attr_obs_X = [
                "day_of_week", "hour_of_day", "minute_of_hour", "prod_p",
                "prod_v", "load_p", "load_q", "actual_dispatch",
                "target_dispatch", "topo_vect", "time_before_cooldown_line",
                "time_before_cooldown_sub", "rho", "timestep_overflow",
                "line_status"
            ]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes = [100, 50, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes,
                'activs':
                ["relu" for _ in sizes],  # all relu activation function
                "list_attr_obs": li_attr_obs_X
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_d3qn(env,
                       name=nm_,
                       iterations=100,
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
                       nb_env=1,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
                       kwargs_archi=kwargs_archi)

            baseline_2 = eval_d3qn(env,
                                   name=nm_,
                                   load_path=tmp_dir,
                                   logs_path=tmp_dir,
                                   nb_episode=1,
                                   nb_process=1,
                                   max_steps=30,
                                   verbose=False,
                                   save_gif=False)
Exemplo n.º 3
0
 def _aux_test_attr(self, attr, val):
     """
     test that i can modify an attribut and then load the training parameters the correct way
     """
     tp = TrainingParam()
     setattr(tp, attr, val)
     tmp_dir = tempfile.mkdtemp()
     tp.save_as_json(tmp_dir, "test.json")
     tp2 = TrainingParam.from_json(os.path.join(tmp_dir, "test.json"))
     assert tp2 == tp, "error for attributes {}".format(attr)
Exemplo n.º 4
0
 def test_get_epsilon(self):
     tp = TrainingParam()
     tp.final_epsilon = None
     eps = tp.get_next_epsilon(1)
     assert eps == 0.
     tp.final_epsilon = 0.01
     tp.initial_epsilon = None
     eps = tp.get_next_epsilon(1)
     assert eps == 0.
     tp.initial_epsilon = 0.01
     tp.final_epsilon = 0.01
     eps = tp.get_next_epsilon(1)
     assert eps == 0.01
Exemplo n.º 5
0
    def __init__(self,
                 action_size,
                 observation_size,
                 lr=1e-5,
                 learning_rate_decay_steps=1000,
                 learning_rate_decay_rate=0.95,
                 training_param=TrainingParam()):
        BaseDeepQ.__init__(self, action_size, observation_size, lr,
                           learning_rate_decay_steps, learning_rate_decay_rate,
                           training_param)
        # TODO add as meta param the number of "Q" you want to use (here 2)
        # TODO add as meta param size and types of the networks
        self.average_reward = 0
        self.life_spent = 1
        self.qvalue_evolution = np.zeros((0, ))
        self.Is_nan = False

        self.model_value_target = None
        self.model_value = None
        self.model_Q = None
        self.model_Q2 = None
        self.model_policy = None

        self.construct_q_network()
        self.previous_size = 0
        self.previous_eyes = None
        self.previous_arange = None
        self.previous_size_train = 0
        self.previous_eyes_train = None
Exemplo n.º 6
0
    def __init__(self, nn_params, training_param=None, verbose=False):
        if training_param is None:
            training_param = TrainingParam()
        BaseDeepQ.__init__(self, nn_params, training_param, verbose=verbose)

        # TODO add as meta param the number of "Q" you want to use (here 2)
        # TODO add as meta param size and types of the networks
        self.average_reward = 0
        self.life_spent = 1
        self.qvalue_evolution = np.zeros((0, ))
        self.Is_nan = False

        self.model_value_target = None
        self.model_value = None
        self.model_Q = None
        self.model_Q2 = None
        self.model_policy = None

        self.construct_q_network()
        self.previous_size = 0
        self.previous_eyes = None
        self.previous_arange = None
        self.previous_size_train = 0
        self.previous_eyes_train = None

        # optimizers and learning rate
        self.schedule_lr_policy = None
        self.optimizer_policy = None
        self.schedule_lr_Q = None
        self.optimizer_Q = None
        self.schedule_lr_Q2 = None
        self.optimizer_Q2 = None
        self.schedule_lr_value = None
        self.optimizer_value = None
Exemplo n.º 7
0
 def __init__(self, nn_params, training_param=None):
     if training_param is None:
         training_param = TrainingParam()
     BaseDeepQ.__init__(self, nn_params, training_param)
     self._custom_objects = {"LtauBis": LtauBis}
     self.construct_q_network()
     self._max_global_norm_grad = training_param.max_global_norm_grad
     self._max_value_grad = training_param.max_value_grad
     self._max_loss = training_param.max_loss
Exemplo n.º 8
0
 def __init__(self,
              nn_params,
              training_param=None):
     if training_param is None:
         training_param = TrainingParam()
     BaseDeepQ.__init__(self,
                        nn_params,
                        training_param)
     self.schedule_lr_model = None
     self.construct_q_network()
Exemplo n.º 9
0
 def __init__(self,
              action_size,
              observation_size,
              lr=1e-5,
              learning_rate_decay_steps=1000,
              learning_rate_decay_rate=0.95,
              training_param=TrainingParam()):
     BaseDeepQ.__init__(self, action_size, observation_size, lr,
                        learning_rate_decay_steps, learning_rate_decay_rate,
                        training_param)
     self.construct_q_network()
Exemplo n.º 10
0
    def test_train_eval(self):
        if has_SACOld is not None:
            raise ImportError(
                f"TestSACOld is not available with error:\n{has_SACOld}")
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            li_attr_obs_X = ["prod_p", "load_p", "rho"]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes_q = [100, 50, 10]  # sizes of each hidden layers
            sizes_v = [100, 100]  # sizes of each hidden layers
            sizes_pol = [100, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes_q,
                'activs': ["relu" for _ in range(len(sizes_q))],
                "list_attr_obs": li_attr_obs_X,
                "sizes_value": sizes_v,
                "activs_value": ["relu" for _ in range(len(sizes_v))],
                "sizes_policy": sizes_pol,
                "activs_policy": ["relu" for _ in range(len(sizes_pol))]
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_sacold(env,
                         name=nm_,
                         iterations=100,
                         save_path=tmp_dir,
                         load_path=None,
                         logs_dir=tmp_dir,
                         training_param=tp,
                         verbose=False,
                         kwargs_converters=kwargs_converters,
                         kwargs_archi=kwargs_archi)

            baseline_2 = eval_sacold(env,
                                     name=nm_,
                                     load_path=tmp_dir,
                                     logs_path=tmp_dir,
                                     nb_episode=1,
                                     nb_process=1,
                                     max_steps=30,
                                     verbose=False,
                                     save_gif=False)
Exemplo n.º 11
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            # neural network architecture
            li_attr_obs_X = ["prod_p", "load_p", "rho"]
            li_attr_obs_Tau = ["line_status"]
            sizes = [100, 50, 10]

            x_dim = NNParam.get_obs_size(env, li_attr_obs_X)
            tau_dims = [
                NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau
            ]

            kwargs_archi = {
                'sizes': sizes,
                'activs': ["relu" for _ in sizes],
                'x_dim': x_dim,
                'tau_dims': tau_dims,
                'tau_adds': [0.0 for _ in range(len(tau_dims))],
                'tau_mults': [1.0 for _ in range(len(tau_dims))],
                "list_attr_obs": li_attr_obs_X,
                "list_attr_obs_tau": li_attr_obs_Tau
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_leap(env,
                       name=nm_,
                       iterations=100,
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
                       kwargs_archi=kwargs_archi)

            baseline_2 = eval_leap(env,
                                   name=nm_,
                                   load_path=tmp_dir,
                                   logs_path=tmp_dir,
                                   nb_episode=1,
                                   nb_process=1,
                                   max_steps=30,
                                   verbose=False,
                                   save_gif=False)
Exemplo n.º 12
0
    def test_train_eval_multiprocess(self):
        # test only done for this baselines because the feature is coded in base class in DeepQAgent
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env_init = grid2op.make("rte_case5_example", test=True)
            env = make_multi_env(env_init=env_init, nb_env=2)
            li_attr_obs_X = ["prod_p", "load_p", "rho"]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes = [100, 50, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes,
                'activs':
                ["relu" for _ in sizes],  # all relu activation function
                "list_attr_obs": li_attr_obs_X
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_dqn(env,
                      name=nm_,
                      iterations=100,
                      save_path=tmp_dir,
                      load_path=None,
                      logs_dir=tmp_dir,
                      training_param=tp,
                      verbose=False,
                      kwargs_converters=kwargs_converters,
                      kwargs_archi=kwargs_archi)

            baseline_2 = eval_dqn(env_init,
                                  name=nm_,
                                  load_path=tmp_dir,
                                  logs_path=tmp_dir,
                                  nb_episode=1,
                                  nb_process=1,
                                  max_steps=30,
                                  verbose=False,
                                  save_gif=False)
Exemplo n.º 13
0
    def __init__(self, nn_params, training_param=None):
        if training_param is None:
            training_param = TrainingParam()
        BaseDeepQ.__init__(self, nn_params, training_param)
        self._custom_objects = {"LtauBis": LtauBis}
        self._max_global_norm_grad = training_param.max_global_norm_grad
        self._max_value_grad = training_param.max_value_grad
        self._max_loss = training_param.max_loss

        self.train_lr = 1.0

        # added
        self.encoded_state = None
        self.grid_model = None
        self._schedule_grid_model = None
        self._optimizer_grid_model = None
        self._qnet_variables = []
        self.grid_model_losses_npy = None

        self.construct_q_network()
Exemplo n.º 14
0
 def __init__(self,
              action_size,
              observation_size,
              tau_dim_start,
              tau_dim_end,
              add_tau,
              lr=0.00001,
              learning_rate_decay_steps=1000,
              learning_rate_decay_rate=0.95,
              training_param=TrainingParam()):
     BaseDeepQ.__init__(self,
                        action_size,
                        observation_size,
                        lr,
                        learning_rate_decay_steps=learning_rate_decay_steps,
                        learning_rate_decay_rate=learning_rate_decay_rate,
                        training_param=training_param)
     self.tau_dim_start = tau_dim_start
     self.tau_dim_end = tau_dim_end
     self.add_tau = add_tau
     self.custom_objects = {"Ltau": Ltau}
     self.construct_q_network()
Exemplo n.º 15
0
def train(env,
          name=DEFAULT_NAME,
          iterations=1,
          save_path=None,
          load_path=None,
          logs_dir=None,
          training_param=None,
          filter_action_fun=None,
          verbose=True,
          kwargs_converters={},
          kwargs_archi={}):
    """
    This function implements the "training" part of the balines "DuelQLeapNet".

    Parameters
    ----------
    env: :class:`grid2op.Environment`
        Then environment on which you need to train your agent.

    name: ``str```
        The name of your agent.

    iterations: ``int``
        For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps.

    save_path: ``str``
        Where do you want to save your baseline.

    load_path: ``str``
        If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded
        some of the argument provided to this function will not be used.

    logs_dir: ``str``
        Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.

    training_param: :class:`l2rpn_baselines.utils.TrainingParam`
        The parameters describing the way you will train your model.

    filter_action_fun: ``function``
        A function to filter the action space. See
        `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_
        documentation.

    verbose: ``bool``
        If you want something to be printed on the terminal (a better logging strategy will be put at some point)

    kwargs_converters: ``dict``
        A dictionary containing the key-word arguments pass at this initialization of the
        :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent.

    kwargs_archi: ``dict``
        Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline.

    Returns
    -------

    baseline: :class:`DuelQLeapNet`
        The trained baseline.


    .. _Example-leapnet:

    Examples
    ---------
    Here is an example on how to train a DuelQLeapNet baseline.

    First define a python script, for example

    .. code-block:: python

        import grid2op
        from grid2op.Reward import L2RPNReward
        from l2rpn_baselines.utils import TrainingParam
        from l2rpn_baselines.DuelQLeapNet import train, LeapNet_NNParam

        # define the environment
        env = grid2op.make("l2rpn_case14_sandbox",
                           reward_class=L2RPNReward)

        # use the default training parameters
        tp = TrainingParam()

        # this will be the list of what part of the observation I want to keep
        # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes
        li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                         "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                         "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]

        # neural network architecture
        li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                         "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                         "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"]
        # compared to the other baseline, we have different inputs at different place, this is how we split it
        li_attr_obs_Tau = ["rho", "line_status"]
        sizes = [800, 800, 800, 494, 494, 494]

        # nn architecture
        x_dim = LeapNet_NNParam.get_obs_size(env, li_attr_obs_X)
        tau_dims = [LeapNet_NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau]

        kwargs_archi = {'sizes': sizes,
                        'activs': ["relu" for _ in sizes],
                        'x_dim': x_dim,
                        'tau_dims': tau_dims,
                        'tau_adds': [0.0 for _ in range(len(tau_dims))],  # add some value to taus
                        'tau_mults': [1.0 for _ in range(len(tau_dims))],  # divide by some value for tau (after adding)
                        "list_attr_obs": li_attr_obs_X,
                        "list_attr_obs_tau": li_attr_obs_Tau
                        }

        # select some part of the action
        # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter
        kwargs_converters = {"all_actions": None,
                             "set_line_status": False,
                             "change_bus_vect": True,
                             "set_topo_vect": False
                             }
        # define the name of the model
        nm_ = "AnneOnymous"
        save_path = "/WHERE/I/SAVED/THE/MODEL"
        logs_dir = "/WHERE/I/SAVED/THE/LOGS"
        try:
            train(env,
                  name=nm_,
                  iterations=10000,
                  save_path=save_path,
                  load_path=None,
                  logs_dir=logs_dir,
                  training_param=tp,
                  kwargs_converters=kwargs_converters,
                  kwargs_archi=kwargs_archi)
        finally:
            env.close()

    """

    # Limit gpu usage
    try:
        physical_devices = tf.config.list_physical_devices('GPU')
        if len(physical_devices) > 0:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except AttributeError:
        # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p
        try:
            physical_devices = tf.config.experimental.list_physical_devices(
                'GPU')
            if len(physical_devices) > 0:
                tf.config.experimental.set_memory_growth(
                    physical_devices[0], True)
        except Exception:
            warnings.warn(_WARN_GPU_MEMORY)
    except Exception:
        warnings.warn(_WARN_GPU_MEMORY)

    if training_param is None:
        training_param = TrainingParam()

    # get the size of the action space
    kwargs_archi["action_size"] = DuelQLeapNet.get_action_size(
        env.action_space, filter_action_fun, kwargs_converters)
    kwargs_archi["observation_size"] = 0  # this is not used anyway
    if load_path is not None:
        # TODO test that
        path_model, path_target_model = DuelQLeapNet_NN.get_path_model(
            load_path, name)
        print(
            "INFO: Reloading a model, the architecture parameters will be ignored"
        )
        nn_archi = LeapNet_NNParam.from_json(
            os.path.join(path_model, "nn_architecture.json"))
    else:
        nn_archi = LeapNet_NNParam(**kwargs_archi)

    baseline = DuelQLeapNet(action_space=env.action_space,
                            nn_archi=nn_archi,
                            name=name,
                            istraining=True,
                            filter_action_fun=filter_action_fun,
                            verbose=verbose,
                            **kwargs_converters)

    if load_path is not None:
        print("INFO: Reloading a model, training parameters will be ignored")
        baseline.load(load_path)
        training_param = baseline._training_param

    baseline.train(env,
                   iterations,
                   save_path=save_path,
                   logdir=logs_dir,
                   training_param=training_param)
Exemplo n.º 16
0
def train(env,
          name=DEFAULT_NAME,
          iterations=1,
          save_path=None,
          load_path=None,
          logs_dir=None,
          training_param=None,
          filter_action_fun=None,
          verbose=True,
          kwargs_converters={},
          kwargs_archi={}):
    """
    This function implements the "training" part of the baselines "SAC". This is the "old" implementation
    that most likely had bugs. We keep it here for backward compatibility, but it is not recommended to
    use it on new projects.

    Parameters
    ----------
    env: :class:`grid2op.Environment`
        Then environment on which you need to train your agent.

    name: ``str```
        The name of your agent.

    iterations: ``int``
        For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps.

    save_path: ``str``
        Where do you want to save your baseline.

    load_path: ``str``
        If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded
        some of the argument provided to this function will not be used.

    logs_dir: ``str``
        Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.

    training_param: :class:`l2rpn_baselines.utils.TrainingParam`
        The parameters describing the way you will train your model.

    filter_action_fun: ``function``
        A function to filter the action space. See
        `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_
        documentation.

    verbose: ``bool``
        If you want something to be printed on the terminal (a better logging strategy will be put at some point)

    kwargs_converters: ``dict``
        A dictionary containing the key-word arguments pass at this initialization of the
        :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent.

    kwargs_archi: ``dict``
        Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline.

    Returns
    -------

    baseline: :class:`DuelQLeapNet`
        The trained baseline.


    .. _Example-leapnetenc:

    Examples
    ---------
    Here is an example on how to train a DuelQLeapNet baseline.

    First define a python script, for example

    .. code-block:: python

        import grid2op
        from grid2op.Reward import L2RPNReward
        from l2rpn_baselines.utils import TrainingParam
        from l2rpn_baselines.LeapNetEncoded import train

        # define the environment
        env = grid2op.make("l2rpn_case14_sandbox",
                           reward_class=L2RPNReward)

        # use the default training parameters
        tp = TrainingParam()

        # nn architecture
        li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"]
        li_attr_obs_input_q = ["time_before_cooldown_line",
                               "time_before_cooldown_sub",
                               "actual_dispatch",
                               "target_dispatch",
                               "day_of_week",
                               "hour_of_day",
                               "minute_of_hour",
                               "rho"]
        li_attr_obs_Tau = ["line_status", "timestep_overflow"]
        list_attr_gm_out = ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v"] + li_attr_obs_X

        kwargs_archi = {'sizes': [],
                        'activs': [],
                        'x_dim': -1,

                        "list_attr_obs": li_attr_obs_X,
                        "list_attr_obs_tau": li_attr_obs_Tau,
                        "list_attr_obs_x": li_attr_obs_X,
                        "list_attr_obs_input_q": li_attr_obs_input_q,
                        "list_attr_obs_gm_out": list_attr_gm_out,

                        'dim_topo': env.dim_topo,

                        "sizes_enc": (50, 50, 50, 50),
                        "sizes_main": (300, 300, 300),
                        "sizes_out_gm": (100, ),
                        "sizes_Qnet": (200, 200, 200)
                        }

        nm_ = args.name if args.name is not None else DEFAULT_NAME
        try:
            train(env,
                  name=nm_,
                  iterations=args.num_train_steps,
                  save_path=args.save_path,
                  load_path=args.load_path,
                  logs_dir=args.logs_dir,
                  training_param=tp,
                  kwargs_converters=kwargs_converters,
                  kwargs_archi=kwargs_archi,
                  verbose=True)
        finally:
            env.close()

    """

    # Limit gpu usage
    try:
        physical_devices = tf.config.list_physical_devices('GPU')
        if len(physical_devices) > 0:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except AttributeError:
         # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p
        try:
            physical_devices = tf.config.experimental.list_physical_devices('GPU')
            if len(physical_devices) > 0:
                tf.config.experimental.set_memory_growth(physical_devices[0], True)
        except Exception:
            warnings.warn(_WARN_GPU_MEMORY)
    except Exception:
        warnings.warn(_WARN_GPU_MEMORY)

    if training_param is None:
        training_param = TrainingParam()

    # get the size of the action space
    kwargs_archi["action_size"] = LeapNetEncoded.get_action_size(env.action_space, filter_action_fun, kwargs_converters)
    kwargs_archi["observation_size"] = 0  # this is not used anyway
    if load_path is not None:
        # TODO test that
        path_model, path_target_model = LeapNetEncoded_NN.get_path_model(load_path, name)
        print("INFO: Reloading a model, the architecture parameters will be ignored")
        nn_archi = LeapNetEncoded_NNParam.from_json(os.path.join(path_model, "nn_architecture.json"))
    else:
        nn_archi = LeapNetEncoded_NNParam(**kwargs_archi)
        # because i was lazy enough not to copy paste all the dimensions there
        nn_archi.compute_dims(env)
        # because i want data approximately reduced (for the learning process to be smoother)
        nn_archi.center_reduce(env)

    baseline = LeapNetEncoded(action_space=env.action_space,
                            nn_archi=nn_archi,
                            name=name,
                            istraining=True,
                            filter_action_fun=filter_action_fun,
                            verbose=verbose,
                            **kwargs_converters
                            )

    if load_path is not None:
        print("INFO: Reloading a model, training parameters will be ignored")
        baseline.load(load_path)
        training_param = baseline._training_param

    baseline.train(env,
                   iterations,
                   save_path=save_path,
                   logdir=logs_dir,
                   training_param=training_param)
Exemplo n.º 17
0
    if env.name == "l2rpn_wcci_2020":
        env.chronics_handler.real_data.set_filter(lambda x: re.match(".*Scenario_february_.*$", x) is not None)
        env.chronics_handler.real_data.reset()
    elif env.name == "l2rpn_case14_sandbox":
        # all data can be loaded into memory
        # env.chronics_handler.real_data.set_filter(lambda x: True)
        env.chronics_handler.real_data.reset()

    # env.chronics_handler.real_data.
    env_init = env
    if args.nb_env > 1:
        from l2rpn_baselines.utils import make_multi_env
        env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

    tp = TrainingParam()
    # NN training
    tp.lr = 1e-5
    tp.lr_decay_steps = 300000
    tp.minibatch_size = 32 * int(args.nb_env)
    tp.update_freq = tp.minibatch_size / 2

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = None  # None to deactivate it
    tp.min_iter = None
    tp.update_nb_iter = None  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = None  # None to deactivate it

    # experience replay
Exemplo n.º 18
0
        lambda x: re.match(".*((0003)|(0072)|(0057))$", x) is not None)
    env.chronics_handler.real_data.reset_cache()
    # env.chronics_handler.real_data.
    env_init = env
    if args.nb_env > 1:
        from grid2op.Environment import MultiEnvironment
        env = MultiEnvironment(int(args.nb_env), env)
        # TODO hack i'll fix in 0.9.0
        env.action_space = env_init.action_space
        env.observation_space = env_init.observation_space
        env.fast_forward_chronics = lambda x: None
        env.chronics_handler = env_init.chronics_handler
        env.current_obs = env_init.current_obs
        env.set_ff()

    tp = TrainingParam()

    # NN training
    tp.lr = 1e-4
    tp.lr_decay_steps = 30000
    tp.minibatch_size = 256
    tp.update_freq = 128

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = 2
    tp.min_iter = 10
    tp.update_nb_iter(2)

    # oversampling hard scenarios
    tp.oversampling_rate = 3
Exemplo n.º 19
0
def train(env,
          name=DEFAULT_NAME,
          iterations=1,
          save_path=None,
          load_path=None,
          logs_dir=None,
          nb_env=1,
          training_param=None,
          filter_action_fun=None,
          verbose=True,
          kwargs_converters={},
          kwargs_archi={}):
    """
    This function implements the "training" part of the balines "DeepQSimple".

    Parameters
    ----------
    env: :class:`grid2op.Environment`
        Then environment on which you need to train your agent.

    name: ``str```
        The name of your agent.

    iterations: ``int``
        For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps.

    save_path: ``str``
        Where do you want to save your baseline.

    load_path: ``str``
        If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded
        some of the argument provided to this function will not be used.

    logs_dir: ``str``
        Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.

    nb_env: ``int``
        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.

    verbose: ``bool``
        If you want something to be printed on the terminal (a better logging strategy will be put at some point)

    training_param: :class:`l2rpn_baselines.utils.TrainingParam`
        The parameters describing the way you will train your model.

    filter_action_fun: ``function``
        A function to filter the action space. See
        `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_
        documentation.

    kwargs_converters: ``dict``
        A dictionary containing the key-word arguments pass at this initialization of the
        :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent.

    kwargs_archi: ``dict``
        Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline.

    Returns
    -------

    baseline: :class:`DeepQSimple`
        The trained baseline.

    Examples
    ---------
    Here is an example on how to train a DeepSimple baseline.

    First define a python script, for example

    .. code-block:: python

        import grid2op
        from grid2op.Reward import L2RPNReward
        from l2rpn_baselines.utils import TrainingParam
        from l2rpn_baselines.SAC import train

        # define the environment
        env = grid2op.make("l2rpn_case14_sandbox",
                           reward_class=L2RPNReward)

        # use the default training parameters
        tp = TrainingParam()

        # this will be the list of what part of the observation I want to keep
        # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes
        li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                         "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                         "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]

        # neural network architecture
        observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X)
        sizes_q = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
        sizes_v = [800, 800]  # sizes of each hidden layers
        sizes_pol = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
        kwargs_archi = {'observation_size': observation_size,
                        'sizes': sizes_q,
                        'activs': ["relu" for _ in range(len(sizes_q))],
                        "list_attr_obs": li_attr_obs_X,
                        "sizes_value": sizes_v,
                        "activs_value": ["relu" for _ in range(len(sizes_v))],
                        "sizes_policy": sizes_pol,
                        "activs_policy": ["relu" for _ in range(len(sizes_pol))]
                        }

        # select some part of the action
        # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter
        kwargs_converters = {"all_actions": None,
                             "set_line_status": False,
                             "change_bus_vect": True,
                             "set_topo_vect": False
                             }
        # define the name of the model
        nm_ = "AnneOnymous"
        try:
            train(env,
                  name=nm_,
                  iterations=10000,
                  save_path="/WHERE/I/SAVED/THE/MODEL",
                  load_path=None,
                  logs_dir="/WHERE/I/SAVED/THE/LOGS",
                  nb_env=1,
                  training_param=tp,
                  kwargs_converters=kwargs_converters,
                  kwargs_archi=kwargs_archi)
        finally:
            env.close()

    """

    # Limit gpu usage
    try:
        physical_devices = tf.config.list_physical_devices('GPU')
        if len(physical_devices) > 0:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except AttributeError:
        # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p
        try:
            physical_devices = tf.config.experimental.list_physical_devices(
                'GPU')
            if len(physical_devices) > 0:
                tf.config.experimental.set_memory_growth(
                    physical_devices[0], True)
        except Exception:
            warnings.warn(_WARN_GPU_MEMORY)
    except Exception:
        warnings.warn(_WARN_GPU_MEMORY)

    if training_param is None:
        training_param = TrainingParam()

    # compute the proper size for the converter
    kwargs_archi["action_size"] = SAC.get_action_size(env.action_space,
                                                      filter_action_fun,
                                                      kwargs_converters)

    if load_path is not None:
        path_model, path_target_model = SAC_NN.get_path_model(load_path, name)
        if verbose:
            print(
                "INFO: Reloading a model, the architecture parameters provided will be ignored"
            )
        nn_archi = SAC_NNParam.from_json(
            os.path.join(path_model, "nn_architecture.json"))
    else:
        nn_archi = SAC_NNParam(**kwargs_archi)

    baseline = SAC(action_space=env.action_space,
                   nn_archi=nn_archi,
                   name=name,
                   istraining=True,
                   nb_env=nb_env,
                   verbose=verbose,
                   **kwargs_converters)

    if load_path is not None:
        if verbose:
            print(
                "INFO: Reloading a model, training parameters will be ignored")
        baseline.load(load_path)
        training_param = baseline._training_param

    baseline.train(env,
                   iterations,
                   save_path=save_path,
                   logdir=logs_dir,
                   training_param=training_param)
Exemplo n.º 20
0
 def test_save(self):
     tp = TrainingParam()
     tmp_dir = tempfile.mkdtemp()
     tp.save_as_json(tmp_dir, "test.json")
Exemplo n.º 21
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        if has_LeapNetEncoded is not None:
            raise ImportError(
                f"TestLeapNetEncoded is not available with error:\n{has_LeapNetEncoded}"
            )
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_line_status": True,
                "change_bus_vect": False,
                "set_topo_vect": False,
                "redispacth": False
            }

            # nn architecture
            li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"]
            li_attr_obs_input_q = [
                "time_before_cooldown_line", "time_before_cooldown_sub",
                "actual_dispatch", "target_dispatch", "day_of_week",
                "hour_of_day", "minute_of_hour", "rho"
            ]
            li_attr_obs_Tau = ["line_status", "timestep_overflow"]
            list_attr_gm_out = [
                "a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q",
                "load_v"
            ] + li_attr_obs_X

            kwargs_archi = {
                'sizes': [],
                'activs': [],
                'x_dim': -1,
                "list_attr_obs": li_attr_obs_X,
                "list_attr_obs_tau": li_attr_obs_Tau,
                "list_attr_obs_x": li_attr_obs_X,
                "list_attr_obs_input_q": li_attr_obs_input_q,
                "list_attr_obs_gm_out": list_attr_gm_out,
                'dim_topo': env.dim_topo,
                "sizes_enc": (10, 10, 10, 10),
                "sizes_main": (50, ),
                "sizes_out_gm": (50, ),
                "sizes_Qnet": (
                    50,
                    50,
                )
            }
            nm_ = "AnneOnymous"
            train_leapenc(env,
                          name=nm_,
                          iterations=100,
                          save_path=tmp_dir,
                          load_path=None,
                          logs_dir=tmp_dir,
                          training_param=tp,
                          verbose=False,
                          kwargs_converters=kwargs_converters,
                          kwargs_archi=kwargs_archi)

            baseline_2 = eval_leapenc(env,
                                      name=nm_,
                                      load_path=tmp_dir,
                                      logs_path=tmp_dir,
                                      nb_episode=1,
                                      nb_process=1,
                                      max_steps=30,
                                      verbose=False,
                                      save_gif=False)