def evaluate(env, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, verbose=DEFAULT_VERBOSE, save_gif=False): # Limit gpu usage physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices): tf.config.experimental.set_memory_growth(physical_devices[0], True) runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose # Run # Create agent agent = RDQNAgent(env.observation_space, env.action_space, is_training=False) # Load weights from file agent.load(load_path) # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # Print model summary if verbose: stringlist = [] agent.Qmain.model.summary(print_fn=lambda x: stringlist.append(x)) short_model_summary = "\n".join(stringlist) print(short_model_summary) # Run os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=verbose) # Print summary if verbose: print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) if save_gif: save_log_gif(logs_path, res) return res
def evaluate(env, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, verbose=False, save_gif=False): runner_params = env.get_params_for_runner() runner_params["verbose"] = args.verbose # Build runner runner = Runner(**runner_params, agentClass=DoNothingAgent) # Run os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=True) # Print summary print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) if save_gif: save_log_gif(logs_path, res)
def evaluate(env, name=DEFAULT_NAME, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, verbose=False, save_gif=False): """ How to evaluate the performances of the trained SAC agent (old implementation). Please use the new implementation instead. Parameters ---------- env: :class:`grid2op.Environment` The environment on which you evaluate your agent. name: ``str`` The name of the trained baseline load_path: ``str`` Path where the agent has been stored logs_path: ``str`` Where to write the results of the assessment nb_episode: ``str`` How many episodes to run during the assessment of the performances nb_process: ``int`` On how many process the assessment will be made. (setting this > 1 can lead to some speed ups but can be unstable on some plaform) max_steps: ``int`` How many steps at maximum your agent will be assessed verbose: ``bool`` Currently un used save_gif: ``bool`` Whether or not you want to save, as a gif, the performance of your agent. It might cause memory issues (might take a lot of ram) and drastically increase computation time. Returns ------- agent: :class:`l2rpn_baselines.utils.DeepQAgent` The loaded agent that has been evaluated thanks to the runner. res: ``list`` The results of the Runner on which the agent was tested. Examples ------- You can evaluate a DeepQSimple this way: .. code-block:: python from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward from l2rpn_baselines.SACOld import eval # Create dataset env env = make("l2rpn_case14_sandbox", reward_class=L2RPNSandBoxScore, other_rewards={ "reward": L2RPNReward }) # Call evaluation interface evaluate(env, name="MyAwesomeAgent", load_path="/WHERE/I/SAVED/THE/MODEL", logs_path=None, nb_episode=10, nb_process=1, max_steps=-1, verbose=False, save_gif=False) """ # Limit gpu usage physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices): tf.config.experimental.set_memory_growth(physical_devices[0], True) runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose if load_path is None: raise RuntimeError( "Cannot evaluate a model if there is nothing to be loaded.") path_model, path_target_model = SACOld_NN.get_path_model(load_path, name) nn_archi = SACOld_NNParam.from_json( os.path.join(path_model, "nn_architecture.json")) # Run # Create agent agent = SACOld(action_space=env.action_space, name=name, store_action=nb_process == 1, nn_archi=nn_archi, observation_space=env.observation_space) # Load weights from file agent.load(load_path) # Print model summary stringlist = [] agent.deep_q.model_value.summary(print_fn=lambda x: stringlist.append(x)) short_model_summary = "\n".join(stringlist) if verbose: print("Value model: {}".format(short_model_summary)) # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # Run os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=verbose) # Print summary if verbose: print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format( nb_time_step, max_ts) print(msg_tmp) if len(agent.dict_action): # I output some of the actions played print("The agent played {} different action".format( len(agent.dict_action))) for id_, (nb, act, types) in agent.dict_action.items(): print("Action with ID {} was played {} times".format(id_, nb)) print("{}".format(act)) print("-----------") if save_gif: if verbose: print("Saving the gif of the episodes") save_log_gif(logs_path, res) return agent, res
def evaluate(env, load_path=".", logs_path=None, nb_episode=1, nb_process=1, max_steps=-1, verbose=False, save_gif=False, **kwargs): """ In order to submit a valid basline, it is mandatory to provide a "evaluate" function with the same signature as this one. Parameters ---------- env: :class:`grid2op.Environment.Environment` The environment on which the baseline will be evaluated. load_path: ``str`` The path where the model is stored. This is used by the agent when calling "agent.load) logs_path: ``str`` The path where the agents results will be stored. nb_episode: ``int`` Number of episodes to run for the assessment of the performance. By default it's 1. nb_process: ``int`` Number of process to be used for the assessment of the performance. Should be an integer greater than 1. By defaults it's 1. max_steps: ``int`` Maximum number of timestep each episode can last. It should be a positive integer or -1. -1 means that the entire episode is run (until the chronics is out of data or until a game over). By default it's -1. verbose: ``bool`` verbosity of the output save_gif: ``bool`` Whether or not to save a gif into each episode folder corresponding to the representation of the said episode. kwargs: Other key words arguments that you are free to use for either building the agent save it etc. Returns ------- ``None`` """ runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose # Instantiate agent agent = PandapowerOPFAgent( env.action_space, env.init_grid_path, # load initial pandapower grid acceptable_loading_pct= 98.0, # which transformer and line loading is acceptable min_loss_reduction_mwt= 10., # how big should a loss reduction be to justify an action opf_type="pypower" ) # if you have PowerModels.jl installed, use "powermodels" # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # start the runner res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=False) # Print summary print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "\tFor chronics located at {}\n".format(chron_name) msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward) msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format( nb_time_step, max_ts) print(msg_tmp) if save_gif: save_log_gif(load_path, res)
def evaluate(env, name=DEFAULT_NAME, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, verbose=False, save_gif=False): # Limit gpu usage physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices): tf.config.experimental.set_memory_growth(physical_devices[0], True) runner_params = env.get_params_for_runner() runner_params["verbose"] = args.verbose # Run # Create agent agent = SAC(action_space=env.action_space, name=name, store_action=nb_process == 1) # force creation of the neural networks obs = env.reset() _ = agent.act(obs, 0., False) # Load weights from file agent.load(load_path) # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # Print model summary stringlist = [] agent.deep_q.model.summary(print_fn=lambda x: stringlist.append(x)) short_model_summary = "\n".join(stringlist) print(short_model_summary) # Run os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=True) if len(agent.dict_action): # I output some of the actions played print("The agent played {} different action".format( len(agent.dict_action))) for id_, (nb, act) in agent.dict_action.items(): print("Action with ID {} was played {} times".format(id_, nb)) print("{}".format(act)) print("-----------") # Print summary print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) if save_gif: print("Saving the gif of the episodes") save_log_gif(logs_path, res)
def evaluate(env, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, grid="IEEE14", #IEEE14,IEEE118_3 (WCCI or Neurips Track Robustness), IEEE118 seed=None, verbose=False, save_gif=False): """ Parameters ---------- env: :class:`grid2op.Environment.Environment` The environment on which the baseline will be evaluated. load_path: ``str`` The path where the model is stored. This is used by the agent when calling "agent.load) logs_path: ``str`` The path where the agents results will be stored. nb_episode: ``int`` Number of episodes to run for the assessment of the performance. By default it's 1. nb_process: ``int`` Number of process to be used for the assessment of the performance. Should be an integer greater than 1. By defaults it's 1. max_steps: ``int`` Maximum number of timestep each episode can last. It should be a positive integer or -1. -1 means that the entire episode is run (until the chronics is out of data or until a game over). By default it's -1. grid: ``string`` Name identifier of the environment grid. Used for local optimisation of choices seed: ``int`` seed info for reproducibility purposes verbose: ``bool`` verbosity of the output save_gif: ``bool`` Whether or not to save a gif into each episode folder corresponding to the representation of the said episode. Returns ------- ``None`` """ runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose # Build runner agent = ExpertAgent(env.action_space, env.observation_space, "Template", grid) runner = Runner(**runner_params, agentClass=None, agentInstance=agent ) env_seeds = None agent_seeds = None if seed is not None: np.random.seed(seed) max_int = np.iinfo(dt_int).max env_seeds = list(np.random.randint(max_int, size=int(nb_episode))) agent_seeds = list(np.random.randint(max_int, size=int(nb_episode))) # Run os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, env_seeds=env_seeds, agent_seeds=agent_seeds, pbar=True) # Print summary logging.info("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal reward: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) logging.info(msg_tmp) if save_gif: save_log_gif(logs_path, res) return res
def evaluate(env, load_path=".", logs_path=None, nb_episode=1, nb_process=1, max_steps=-1, verbose=False, save_gif=False, **kwargs): """ In order to submit a valid basline, it is mandatory to provide a "evaluate" function with the same signature as this one. Parameters ---------- env: :class:`grid2op.Environment.Environment` The environment on which the baseline will be evaluated. load_path: ``str`` The path where the model is stored. This is used by the agent when calling "agent.load) logs_path: ``str`` The path where the agents results will be stored. nb_episode: ``int`` Number of episodes to run for the assessment of the performance. By default it's 1. nb_process: ``int`` Number of process to be used for the assessment of the performance. Should be an integer greater than 1. By defaults it's 1. max_steps: ``int`` Maximum number of timestep each episode can last. It should be a positive integer or -1. -1 means that the entire episode is run (until the chronics is out of data or until a game over). By default it's -1. verbose: ``bool`` verbosity of the output save_gif: ``bool`` Whether or not to save a gif into each episode folder corresponding to the representation of the said episode. kwargs: Other key words arguments that you are free to use for either building the agent save it etc. Returns ------- ``None`` """ runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose # Create the agent (this piece of code can change) agent = Template(env.action_space, env.observation_space, "Template") # Load weights from file (for example) agent.load(load_path) # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # you can do stuff with your model here # start the runner res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=False) # Print summary print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "\tFor chronics located at {}\n".format(chron_name) msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward) msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format( nb_time_step, max_ts) print(msg_tmp) if save_gif: save_log_gif(logs_path, res)