Exemplo n.º 1
0
    def update_action(n_clicks, action_dict, agent, scenario):
        episode = make_episode(agent, scenario)
        if action_dict is None:
            raise PreventUpdate
        action_dict = json.loads(
            action_dict.replace("(", "[").replace(")", "]"))

        # Temporary implementation for testing purposes
        p = Parameters()
        p.NO_OVERFLOW_DISCONNECTION = False
        env = make(
            r"D:\Projects\RTE-Grid2Viz\Grid2Op\grid2op\data\rte_case14_realistic",
            test=True,
            param=p,
        )
        env.seed(0)

        params_for_runner = env.get_params_for_runner()
        params_to_fetch = ["init_grid_path"]
        params_for_reboot = {
            key: value
            for key, value in params_for_runner.items()
            if key in params_to_fetch
        }
        params_for_reboot["parameters"] = p

        episode_reboot = EpisodeReboot.EpisodeReboot()
        agent_path = r"D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/do-nothing-baseline"
        episode_reboot.load(
            env.backend,
            data=episode,
            agent_path=agent_path,
            name=episode.episode_name,
            env_kwargs=params_for_reboot,
        )
        current_time_step = 0
        obs, reward, *_ = episode_reboot.go_to(1)
        agent = DoNothingAgent(action_space=episode_reboot.env.action_space)
        act = agent.act(obs, reward)
        act = act.update(action_dict)
        obs, *_ = obs.simulate(action=act, time_step=0)
        network_graph = make_network(episode).plot_obs(observation=obs)
        return json.dumps(action_dict, indent=1), network_graph
Exemplo n.º 2
0
    nb_env = 8  # change that to adapt to your system
    NB_STEP = 1000  # number of step for each environment

    agent = DoNothingAgent(env.action_space)
    multi_envs = MultiEnvironment(env=env, nb_env=nb_env)

    obs = multi_envs.reset()
    rews = [env.reward_range[0] for i in range(nb_env)]
    dones = [False for i in range(nb_env)]

    total_reward = 0.
    for i in tqdm(range(NB_STEP)):
        acts = [None for _ in range(nb_env)]
        for env_act_id in range(nb_env):
            acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id],
                                         dones[env_act_id])
        obs, rews, dones, infos = multi_envs.step(acts)
        total_reward += np.sum(rews)
        len(rews)

    multi_envs.close()

    ob = env.reset()
    rew = env.reward_range[0]
    done = False
    total_reward_single = 0
    for i in tqdm(range(NB_STEP)):
        act = agent.act(ob, rew, done)
        ob, rew, done, info = env.step(act)
        if done:
            ob = env.reset()
Exemplo n.º 3
0
def study(env,
          name=DEFAULT_NAME,
          load_path=None,
          logs_path=DEFAULT_LOGS_DIR,
          nb_episode=DEFAULT_NB_EPISODE,
          nb_process=DEFAULT_NB_PROCESS,
          max_steps=DEFAULT_MAX_STEPS,
          verbose=False,
          save_gif=False):
    """study the prediction of the grid_model"""

    # Limit gpu usage
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices):
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    runner_params = env.get_params_for_runner()
    runner_params["verbose"] = verbose

    if load_path is None:
        raise RuntimeError(
            "Cannot evaluate a model if there is nothing to be loaded.")
    path_model, path_target_model = LeapNetEncoded_NN.get_path_model(
        load_path, name)
    nn_archi = LeapNetEncoded_NNParam.from_json(
        os.path.join(path_model, "nn_architecture.json"))

    # Run
    # Create agent
    agent = LeapNetEncoded(action_space=env.action_space,
                           name=name,
                           store_action=nb_process == 1,
                           nn_archi=nn_archi,
                           observation_space=env.observation_space)

    # Load weights from file
    agent.load(load_path)

    # Print model summary
    stringlist = []
    agent.deep_q._model.summary(print_fn=lambda x: stringlist.append(x))
    short_model_summary = "\n".join(stringlist)
    if verbose:
        print(short_model_summary)

    from grid2op.Agent import RandomAgent
    from grid2op.Agent import DoNothingAgent
    policy_agent = DoNothingAgent(env.action_space)
    policy_agent.seed(0)

    env.set_id(0)
    res = {k: ([], []) for k in nn_archi.list_attr_obs_gm_out}
    with tqdm(desc="step") as pbar:
        for i in range(nb_episode):
            obs = env.reset()
            reward = env.reward_range[0]
            done = False
            while not done:
                obs_converted = agent.convert_obs(obs)
                data_nn, true_output_grid = agent.deep_q._make_x_tau(
                    obs_converted)

                for i, (var_n, add, mult) in enumerate(
                        zip(nn_archi.list_attr_obs_gm_out,
                            nn_archi.gm_out_adds, nn_archi.gm_out_mults)):
                    tmp = true_output_grid[i]
                    tmp = tmp / mult - add
                    true_output_grid[i] = tmp

                pred = agent.deep_q.grid_model.predict(data_nn, batch_size=1)
                real_pred = []
                for i, (var_n, add, mult) in enumerate(
                        zip(nn_archi.list_attr_obs_gm_out,
                            nn_archi.gm_out_adds, nn_archi.gm_out_mults)):
                    tmp = pred[i]
                    tmp = tmp / mult - add
                    real_pred.append(tmp)

                for i, var_n in enumerate(nn_archi.list_attr_obs_gm_out):
                    res[var_n][0].append(real_pred[i].reshape(-1))
                    res[var_n][1].append(true_output_grid[i].reshape(-1))

                obs, reward, done, info = env.step(
                    policy_agent.act(obs, reward, done))
                pbar.update(1)

    print("Results")
    from sklearn.metrics import mean_squared_error
    for var_n, (pred, true) in res.items():
        true = np.array(true)
        pred = np.array(pred)
        RMSE = mean_squared_error(y_true=true,
                                  y_pred=pred,
                                  multioutput="raw_values",
                                  squared=False)
        print("RMSE for {}: {:.2f} % variance".format(
            var_n, 100. * np.mean(RMSE / np.std(true))))
    return agent