Python evaluate_agent примеры использования

Язык программирования: Python

Пространство имен/Пакет: mprl.utilities

Метод/Функция: evaluate_agent

Примеров на hotexamples.com: 4

Python evaluate_agent - 4 примера найдено. Это лучшие примеры Python кода для mprl.utilities.evaluate_agent, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: test_mprl.py Проект: NREL/MPRL

    def test_calibrated_agent(self):
        """Does the calibrated agent work as expected?"""

        # Initialize engine
        eng = engines.ContinuousTwoZoneEngine(
            nsteps=100,
            use_qdot=True,
            fuel="PRF100",
            rxnmech="llnl_gasoline_surrogate_323.xml",
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
        )

        # Initialize the agent
        env = DummyVecEnv([lambda: eng])
        agent = agents.CalibratedAgent(env)
        agent.learn()

        # Evaluate the agent
        t0 = time.time()
        df, total_reward = utilities.evaluate_agent(env, agent)
        elapsed = time.time() - t0
        utilities.plot_df(env, df, idx=0, name="calibrated")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002195212151)
        npt.assert_allclose(np.linalg.norm(df.p), 22012100.17143623)
        npt.assert_allclose(np.linalg.norm(df["T"]), 14210.47662980)
        npt.assert_allclose(np.linalg.norm(df.rewards), 104.47362155)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.04144044)
        npt.assert_allclose(np.linalg.norm(df.qdot), 97686.91574242)
        print(f"Wall time for CalibratedAgent = {elapsed} seconds")

Пример #2

Показать файл

    def generate_expert_traj(self, fname):
        df, total_reward = utilities.evaluate_agent(self.env, self)
        episode_starts = [False for _ in range(len(df))]
        episode_starts[-1] = True

        numpy_dict = {
            "actions": df[self.eng.action.actions].values,
            "obs": df[self.env.get_attr("observables", indices=0)[0]].values,
            "rewards": df.rewards.values,
            "episode_returns": np.array([total_reward]),
            "episode_starts": episode_starts,
        }
        np.savez(fname, **numpy_dict)

        return numpy_dict

Пример #3

Показать файл

Файл: test_mprl.py Проект: NREL/MPRL

    def test_exhaustive_agent(self):
        """Does the exhaustive agent work as expected?"""

        # Initialize engine
        eng = engines.DiscreteTwoZoneEngine(
            nsteps=101,
            mdot=0.1,
            max_minj=2.5e-5,
            fuel="dodecane",
            rxnmech="dodecane_lu_nox.cti",
            ename="Isooctane_MBT_DI_50C_Summ.xlsx",
            reward=rw.Reward(negative_reward=-0.05),
        )
        env = DummyVecEnv([lambda: eng])
        variables = eng.observables + eng.internals + eng.histories
        df = pd.DataFrame(
            columns=list(dict.fromkeys(variables + eng.action.actions + ["rewards"]))
        )

        # Initialize the agent
        agent = agents.ExhaustiveAgent(env)
        agent.learn()

        # Evaluate the agent
        t0 = time.time()
        df, total_reward = utilities.evaluate_agent(env, agent)
        elapsed = time.time() - t0
        utilities.plot_df(env, df, idx=1, name="exhaustive")

        # Test
        npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495)
        npt.assert_allclose(np.linalg.norm(df.p), 25431213.9403193)
        npt.assert_allclose(np.linalg.norm(df["T"]), 13611.58370927)
        npt.assert_allclose(np.linalg.norm(df.rewards), 101.41373957)
        npt.assert_allclose(np.linalg.norm(df.mdot), 0.1)
        print(f"Wall time for ExhaustiveAgent = {elapsed} seconds")

Пример #4

Показать файл

                tensorboard_log=logdir,
            )
        agent.learn(
            total_timesteps=agent_params["number_episodes"].value *
            (eng_params["nsteps"].value - 1) * agent_params["nranks"].value,
            callback=callback,
        )
    elif agent_params["agent"].value == "manual":
        env = DummyVecEnv([lambda: eng])
        agent = agents.ManualAgent(env)
        agent.learn(agent_params["injection_cas"].value,
                    agent_params["qdot_cas"].value)

    # Save, evaluate, and plot the agent
    pfx = os.path.join(logdir, "agent")
    agent.save(pfx)
    env = DummyVecEnv([lambda: eng])
    df, total_reward = utilities.evaluate_agent(env, agent)

    df.to_csv(pfx + ".csv", index=False)
    utilities.plot_df(env, df, idx=0, name=agent_params["agent"].value)
    utilities.save_plots(pfx + ".pdf")

    # Plot the training history
    logs = pd.read_csv(logname)
    utilities.plot_training(logs, os.path.join(logdir, "logger.pdf"))

    # output timer
    end = time.time() - start
    print(f"Elapsed time {timedelta(seconds=end)} (or {end} seconds)")