def test_calibrated_agent(self): """Does the calibrated agent work as expected?""" # Initialize engine eng = engines.ContinuousTwoZoneEngine( nsteps=100, use_qdot=True, fuel="PRF100", rxnmech="llnl_gasoline_surrogate_323.xml", ename="Isooctane_MBT_DI_50C_Summ.xlsx", ) # Initialize the agent env = DummyVecEnv([lambda: eng]) agent = agents.CalibratedAgent(env) agent.learn() # Evaluate the agent t0 = time.time() df, total_reward = utilities.evaluate_agent(env, agent) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=0, name="calibrated") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002195212151) npt.assert_allclose(np.linalg.norm(df.p), 22012100.17143623) npt.assert_allclose(np.linalg.norm(df["T"]), 14210.47662980) npt.assert_allclose(np.linalg.norm(df.rewards), 104.47362155) npt.assert_allclose(np.linalg.norm(df.mdot), 0.04144044) npt.assert_allclose(np.linalg.norm(df.qdot), 97686.91574242) print(f"Wall time for CalibratedAgent = {elapsed} seconds")
def generate_expert_traj(self, fname): df, total_reward = utilities.evaluate_agent(self.env, self) episode_starts = [False for _ in range(len(df))] episode_starts[-1] = True numpy_dict = { "actions": df[self.eng.action.actions].values, "obs": df[self.env.get_attr("observables", indices=0)[0]].values, "rewards": df.rewards.values, "episode_returns": np.array([total_reward]), "episode_starts": episode_starts, } np.savez(fname, **numpy_dict) return numpy_dict
def test_exhaustive_agent(self): """Does the exhaustive agent work as expected?""" # Initialize engine eng = engines.DiscreteTwoZoneEngine( nsteps=101, mdot=0.1, max_minj=2.5e-5, fuel="dodecane", rxnmech="dodecane_lu_nox.cti", ename="Isooctane_MBT_DI_50C_Summ.xlsx", reward=rw.Reward(negative_reward=-0.05), ) env = DummyVecEnv([lambda: eng]) variables = eng.observables + eng.internals + eng.histories df = pd.DataFrame( columns=list(dict.fromkeys(variables + eng.action.actions + ["rewards"])) ) # Initialize the agent agent = agents.ExhaustiveAgent(env) agent.learn() # Evaluate the agent t0 = time.time() df, total_reward = utilities.evaluate_agent(env, agent) elapsed = time.time() - t0 utilities.plot_df(env, df, idx=1, name="exhaustive") # Test npt.assert_allclose(np.linalg.norm(df.V), 0.002205916821815495) npt.assert_allclose(np.linalg.norm(df.p), 25431213.9403193) npt.assert_allclose(np.linalg.norm(df["T"]), 13611.58370927) npt.assert_allclose(np.linalg.norm(df.rewards), 101.41373957) npt.assert_allclose(np.linalg.norm(df.mdot), 0.1) print(f"Wall time for ExhaustiveAgent = {elapsed} seconds")
tensorboard_log=logdir, ) agent.learn( total_timesteps=agent_params["number_episodes"].value * (eng_params["nsteps"].value - 1) * agent_params["nranks"].value, callback=callback, ) elif agent_params["agent"].value == "manual": env = DummyVecEnv([lambda: eng]) agent = agents.ManualAgent(env) agent.learn(agent_params["injection_cas"].value, agent_params["qdot_cas"].value) # Save, evaluate, and plot the agent pfx = os.path.join(logdir, "agent") agent.save(pfx) env = DummyVecEnv([lambda: eng]) df, total_reward = utilities.evaluate_agent(env, agent) df.to_csv(pfx + ".csv", index=False) utilities.plot_df(env, df, idx=0, name=agent_params["agent"].value) utilities.save_plots(pfx + ".pdf") # Plot the training history logs = pd.read_csv(logname) utilities.plot_training(logs, os.path.join(logdir, "logger.pdf")) # output timer end = time.time() - start print(f"Elapsed time {timedelta(seconds=end)} (or {end} seconds)")