示例#1
0
    def setUp(self) -> None:
        self.temp_dir = tempfile.TemporaryDirectory()

        episodes = 80
        seeds = [0, 1, 3, 4, 5]
        experiment_name = "test_env"
        logger = Logger(
            output_path=Path(self.temp_dir.name),
            experiment_name=experiment_name,
            step_write_frequency=None,
            episode_write_frequency=None,
        )

        benchmark = SigmoidBenchmark()
        env = benchmark.get_benchmark()
        agent = RandomAgent(env)
        logger.set_env(env)

        env_logger = logger.add_module(env)
        for seed in seeds:
            env.seed(seed)
            logger.set_additional_info(seed=seed)
            logger.reset_episode()

            for episode in range(episodes):
                state = env.reset()
                done = False
                reward = 0
                step = 0
                while not done:
                    action = agent.act(state, reward)
                    env_logger.log(
                        "logged_step",
                        step,
                    )
                    env_logger.log(
                        "logged_episode",
                        episode,
                    )
                    next_state, reward, done, _ = env.step(action)
                    env_logger.log(
                        "reward",
                        reward,
                    )
                    env_logger.log(
                        "done",
                        done,
                    )
                    agent.train(next_state, reward)
                    state = next_state
                    logger.next_step()

                    step += 1
                agent.end_episode(state, reward)
                logger.next_episode()

        env.close()
        logger.close()

        self.log_file = env_logger.log_file.name
示例#2
0
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        if fixed > 1:
            experiment_name = f"random_fixed{fixed}_{s}"
        else:
            experiment_name = f"random_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = DynamicRandomAgent(env, fixed)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
示例#3
0
def run_optimal(results_path,
                benchmark_name,
                num_episodes,
                seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    if benchmark_name == "LubyBenchmark":
        policy = optimal_luby
    elif benchmark_name == "SigmoidBenchmark":
        policy = optimal_sigmoid
    elif benchmark_name == "FastDownwardBenchmark":
        policy = optimal_fd
    elif benchmark_name == "CMAESBenchmark":
        policy = csa
    else:
        print("No comparison policy found for this benchmark")
        return

    for s in seeds:
        if benchmark_name == "CMAESBenchmark":
            experiment_name = f"csa_{s}"
        else:
            experiment_name = f"optimal_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)

        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = GenericAgent(env, policy)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
示例#4
0
文件: runner.py 项目: mwever/DACBench
def run_dacbench(results_path, agent_method, num_episodes):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    """

    for b in map(benchmarks.__dict__.get, benchmarks.__all__):
        print(f"Evaluating {b.__name__}")
        for i in range(10):
            print(f"Seed {i}/10")
            bench = b()
            env = bench.get_benchmark(seed=i)

            logger = Logger(
                experiment_name=f"seed_{i}",
                output_path=Path(results_path) / f"{b.__name__}",
            )
            perf_logger = logger.add_module(PerformanceTrackingWrapper)
            logger.add_benchmark(bench)
            logger.set_env(env)
            logger.set_additional_info(seed=i)

            env = PerformanceTrackingWrapper(env, logger=perf_logger)
            agent = agent_method(env)
            logger.add_agent(agent)

            run_benchmark(env, agent, num_episodes, logger)

            logger.close()
示例#5
0
def run_static(results_path,
               benchmark_name,
               action,
               num_episodes,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        logger = Logger(
            experiment_name=f"static_{action}_{s}",
            output_path=results_path / benchmark_name,
        )
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = StaticAgent(env, action)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s, action=action)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
示例#6
0
parser.add_argument("--benchmarks",
                    nargs="+",
                    type=str,
                    default=None,
                    help="Benchmarks to run PPO for")
parser.add_argument("--timesteps",
                    type=int,
                    default=1000000,
                    help="Number of timesteps to run")
parser.add_argument(
    "--seeds",
    nargs="+",
    type=int,
    default=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
    help="Seeds for evaluation",
)
args = parser.parse_args()

for b in args.benchmarks:
    for s in args.seeds:
        logger = Logger(experiment_name=f"PPO_{b}_s{s}",
                        output_path=Path(args.outdir))
        perf_logger = logger.add_module(PerformanceTrackingWrapper)
        logger.set_additional_info(seed=s)
        config = {"seed": s, "logger": perf_logger, "benchmark": b}
        env = make_benchmark(config)
        model = PPO2("MlpPolicy", env)
        logging = LoggerCallback(logger)
        model.learn(total_timesteps=args.timesteps, callback=logging)
        logger.close()