def setUp(self) -> None: self.temp_dir = tempfile.TemporaryDirectory() episodes = 80 seeds = [0, 1, 3, 4, 5] experiment_name = "test_env" logger = Logger( output_path=Path(self.temp_dir.name), experiment_name=experiment_name, step_write_frequency=None, episode_write_frequency=None, ) benchmark = SigmoidBenchmark() env = benchmark.get_benchmark() agent = RandomAgent(env) logger.set_env(env) env_logger = logger.add_module(env) for seed in seeds: env.seed(seed) logger.set_additional_info(seed=seed) logger.reset_episode() for episode in range(episodes): state = env.reset() done = False reward = 0 step = 0 while not done: action = agent.act(state, reward) env_logger.log( "logged_step", step, ) env_logger.log( "logged_episode", episode, ) next_state, reward, done, _ = env.step(action) env_logger.log( "reward", reward, ) env_logger.log( "done", done, ) agent.train(next_state, reward) state = next_state logger.next_step() step += 1 agent.end_episode(state, reward) logger.next_episode() env.close() logger.close() self.log_file = env_logger.log_file.name
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed): bench = getattr(benchmarks, benchmark_name)() for s in seeds: if fixed > 1: experiment_name = f"random_fixed{fixed}_{s}" else: experiment_name = f"random_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = DynamicRandomAgent(env, fixed) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_optimal(results_path, benchmark_name, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() if benchmark_name == "LubyBenchmark": policy = optimal_luby elif benchmark_name == "SigmoidBenchmark": policy = optimal_sigmoid elif benchmark_name == "FastDownwardBenchmark": policy = optimal_fd elif benchmark_name == "CMAESBenchmark": policy = csa else: print("No comparison policy found for this benchmark") return for s in seeds: if benchmark_name == "CMAESBenchmark": experiment_name = f"csa_{s}" else: experiment_name = f"optimal_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = GenericAgent(env, policy) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_dacbench(results_path, agent_method, num_episodes): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark """ for b in map(benchmarks.__dict__.get, benchmarks.__all__): print(f"Evaluating {b.__name__}") for i in range(10): print(f"Seed {i}/10") bench = b() env = bench.get_benchmark(seed=i) logger = Logger( experiment_name=f"seed_{i}", output_path=Path(results_path) / f"{b.__name__}", ) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=i) env = PerformanceTrackingWrapper(env, logger=perf_logger) agent = agent_method(env) logger.add_agent(agent) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_static(results_path, benchmark_name, action, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: logger = Logger( experiment_name=f"static_{action}_{s}", output_path=results_path / benchmark_name, ) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = StaticAgent(env, action) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s, action=action) run_benchmark(env, agent, num_episodes, logger) logger.close()
parser.add_argument("--benchmarks", nargs="+", type=str, default=None, help="Benchmarks to run PPO for") parser.add_argument("--timesteps", type=int, default=1000000, help="Number of timesteps to run") parser.add_argument( "--seeds", nargs="+", type=int, default=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], help="Seeds for evaluation", ) args = parser.parse_args() for b in args.benchmarks: for s in args.seeds: logger = Logger(experiment_name=f"PPO_{b}_s{s}", output_path=Path(args.outdir)) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.set_additional_info(seed=s) config = {"seed": s, "logger": perf_logger, "benchmark": b} env = make_benchmark(config) model = PPO2("MlpPolicy", env) logging = LoggerCallback(logger) model.learn(total_timesteps=args.timesteps, callback=logging) logger.close()