Python load_environment示例，rl_agents.agents.common.factory.load_environment Python示例

示例#1

0

显示文件

文件： experiments.py 项目： chenpei-ws/rl-agents

def evaluate(environment_config, agent_config, options):
    """
        Evaluate an agent interacting with an environment.

    :param environment_config: the path of the environment configuration file
    :param agent_config: the path of the agent configuration file
    :param options: the evaluation options
    """
    logger.configure(LOGGING_CONFIG)
    if options['--verbose']:
        logger.configure(VERBOSE_CONFIG)
    env = load_environment(environment_config)
    agent = load_agent(agent_config, env)
    run_directory = Path(agent_config).with_suffix(
        '').name if options['--name-from-config'] else None
    options['--seed'] = int(
        options['--seed']) if options['--seed'] is not None else None
    evaluation = Evaluation(env,
                            agent,
                            run_directory=run_directory,
                            num_episodes=int(options['--episodes']),
                            sim_seed=options['--seed'],
                            recover=options['--recover']
                            or options['--recover-from'],
                            display_env=not options['--no-display'],
                            display_agent=not options['--no-display'],
                            display_rewards=not options['--no-display'])
    if options['--train']:
        evaluation.train()
    elif options['--test']:
        evaluation.test()
    else:
        evaluation.close()
    return os.path.relpath(evaluation.monitor.directory)

示例#2

0

显示文件

def evaluate(environment_config, agent_config, options):
    """
        Evaluate an agent interacting with an environment.

    :param environment_config: the path of the environment configuration file
    :param agent_config: the path of the agent configuration file
    :param options: the evaluation options
    """
    gym.logger.set_level(
        gym.logger.DEBUG if options['--verbose'] else gym.logger.INFO)
    env = load_environment(environment_config)
    agent = load_agent(agent_config, env)
    run_directory = Path(agent_config).with_suffix(
        '').name if options['--name-from-config'] else None
    options['--seed'] = int(
        options['--seed']) if options['--seed'] is not None else None
    evaluation = Evaluation(env,
                            agent,
                            run_directory=run_directory,
                            num_episodes=int(options['--episodes']),
                            sim_seed=options['--seed'],
                            recover=options['--recover'],
                            display_env=not options['--no-display'],
                            display_agent=not options['--no-display'],
                            display_rewards=not options['--no-display'])
    if options['--train']:
        evaluation.train()
    elif options['--test']:
        evaluation.test()
    else:
        evaluation.close()
    if options['--analyze'] and not options['<benchmark>']:
        RunAnalyzer([evaluation.monitor.directory])
    return os.path.relpath(evaluation.monitor.directory)

示例#3

0

显示文件

def make_configure_env(**kwargs):
    # env = gym.make(kwargs["id"])

    # environment_config = "exp_merge_complex_base_ma.json"
    environment_config = "exp_merge_complex_sa.json"
    env = load_environment(environment_config)
    env.configure(env_kwargs["config"])

    env.reset()
    return env

示例#4

0

显示文件

文件： planners_evaluation.py 项目： amarildolikmeta/alphazero_singleplayer

def evaluate(experiment):
    # Prepare workspace
    seed, budget, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env_name, env_config = env_config
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["budget"] = int(budget)
    agent = agent_factory(env, agent_config)

    # Evaluate
    print("Evaluating agent {} with budget {} on seed {}".format(
        agent_name, budget, seed))
    evaluation = Evaluation(env,
                            agent,
                            directory=Path("out") / "planners" / agent_name,
                            num_episodes=1,
                            sim_seed=seed,
                            display_env=False,
                            display_agent=False,
                            display_rewards=False)
    evaluation.test()
    rewards = evaluation.monitor.stats_recorder.episode_rewards_[0]
    length = evaluation.monitor.stats_recorder.episode_lengths[0]
    total_reward = np.sum(rewards)
    return_ = np.sum([gamma**t * rewards[t] for t in range(len(rewards))])

    # Save results
    result = {
        "env": env_name,
        "agent": agent_name,
        "budget": budget,
        "seed": seed,
        "total_reward": total_reward,
        "return": return_,
        "length": length
    }
    if race_strategy:
        result["pit_count"] = evaluation.pits

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f,
                  sep=',',
                  encoding='utf-8',
                  header=f.tell() == 0,
                  index=False)

示例#5

0

显示文件

    def collect_samples(environment_config, agent_config, count, start_time,
                        seed, model_path, batch):
        """
            Collect interaction samples of an agent / environment pair.

            Note that the last episode may not terminate, when enough samples have been collected.

        :param dict environment_config: the environment configuration
        :param dict agent_config: the agent configuration
        :param int count: number of samples to collect
        :param start_time: the initial local time of the agent
        :param seed: the env/agent seed
        :param model_path: the path to load the agent model from
        :param batch: index of the current batch
        :return: a list of trajectories, i.e. lists of Transitions
        """
        env = load_environment(environment_config)
        env.seed(seed)

        if batch == 0:  # Force pure exploration during first batch
            agent_config["exploration"]["final_temperature"] = 1
        agent_config["device"] = "cpu"
        agent = load_agent(agent_config, env)
        agent.load(model_path)
        agent.seed(seed)
        agent.set_time(start_time)

        state = env.reset()
        episodes = []
        trajectory = []
        for _ in range(count):
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)
            trajectory.append(
                Transition(state, action, reward, next_state, done, info))
            if done:
                state = env.reset()
                episodes.append(trajectory)
                trajectory = []
            else:
                state = next_state
        if trajectory:  # Unfinished episode
            episodes.append(trajectory)
        env.close()
        return episodes

示例#6

0

显示文件

文件： planners_evaluation_confidence.py 项目： eleurent/planning-gap-complexity

def evaluate(experiment):
    # Prepare workspace
    seed, accuracy, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["accuracy"] = float(accuracy)
    agent_config["budget"] = 10**9
    agent = agent_factory(env, agent_config)

    logger.debug("Evaluating agent {} with budget {} on seed {}".format(agent_name, budget, seed))

    # Compute true value
    env.seed(seed)
    observation = env.reset()
    vi = agent_factory(env, agent_configs()["value_iteration"])
    best_action = vi.act(observation)
    action = agent.act(observation)
    q = vi.state_action_value
    simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action]
    gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2]

    if hasattr(agent.planner, "budget_used"):
        budget = agent.planner.budget_used

    # Save results
    result = {
        "agent": agent_name,
        "budget": budget,
        "accuracy": agent.planner.config["accuracy"],
        "horizon": agent.planner.config["horizon"],
        "seed": seed,
        "simple_regret": simple_regret,
        "gap": gap
    }

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)

示例#7

0

显示文件

文件： planners_visualization.py 项目： wwxFromTju/rl-agents

                 max_depth=100).plot(out / "{}.svg".format(agent_name),
                                     title=agent_name)
        plt.show()


def compare_trajs(env, seed=0):
    trajs = {}
    for agent_name in agents.keys():
        env.seed(seed)
        env.reset()
        agent = evaluate(env, agent_name, seed=seed)
        trajs[agent_name] = get_trajs(agent.planner.root, env)

    palette = itertools.cycle(sns.color_palette())
    for agent, agent_trajs in trajs.items():
        color = next(palette)
        for traj in agent_trajs:
            x, y = zip(*traj)
            plt.plot(x, y, color=color, linestyle='dotted', linewidth=0.5)
    plt.savefig(out / "trajectories.png")
    plt.show()


if __name__ == "__main__":
    gym.logger.set_level(gym.logger.DEBUG)

    # env = DynamicsEnv()
    # env = gym.make("highway-v0")
    env = load_environment(env_zero_one)
    compare_trees(env, seed=5)

示例#8

0

显示文件

文件： socialattentiondqn.py 项目： aarjunsrinivasan/Multi-Agent-Reinforcement-Learning

Prepare environment, agent, and evaluation process.

We use a policy architecture based on social attention, see [[Leurent and Mercat, 2019]](https://arxiv.org/abs/1911.12250).
"""

# Commented out IPython magic to ensure Python compatibility.
from rl_agents.trainer.evaluation import Evaluation
from rl_agents.agents.common.factory import load_agent, load_environment

# Get the environment and agent configurations from the rl-agents repository
# %cd /content/rl-agents/scripts/
env_config = 'configs/IntersectionEnv/env.json'
agent_config = 'configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json'

env = load_environment(env_config)
agent = load_agent(agent_config, env)
evaluation = Evaluation(env, agent, num_episodes=3000, display_env=False)
print(f"Ready to train {agent} on {env}")
"""Run tensorboard locally to visualize training."""

# Commented out IPython magic to ensure Python compatibility.
# %tensorboard --logdir "{evaluation.directory}"
"""Start training. This should take about an hour."""

evaluation.train()
"""Progress can be visualised in the tensorboard cell above, which should update every 30s (or manually). You may need to click the *Fit domain to data* buttons below each graph.

## Testing

Run the learned policy for a few episodes.

示例#9

0

显示文件

文件： planners_visualization.py 项目： amarildolikmeta/alphazero_singleplayer

    if not axes:
        fig, axes = plt.subplots()
        for trajectory in trajectories:
            x, y = zip(*trajectory)
            plt.plot(x,
                     y,
                     linestyle='dotted',
                     linewidth=0.5,
                     label=agent_name,
                     color=color)
    return axes


if __name__ == "__main__":
    configure("configs/verbose.json", gym_level=gym.logger.DEBUG)
    selected_env = load_environment(envs["gridenv"])
    selected_agents = [
        # "deterministic",
        "state_aware",
        # "kl-olop"
    ]
    selected_agents = {k: v for k, v in agents.items() if k in selected_agents}
    budget = 4 * (4**6 - 1) / (4 - 1)
    # budget = 200
    compare_agents(selected_env,
                   selected_agents,
                   budget=budget,
                   show_tree=True,
                   show_states=True,
                   show_trajs=False)

示例#10

0

显示文件

def evaluate(experiment):
    # Prepare workspace
    seed, budget, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["budget"] = int(budget)
    agent = agent_factory(env, agent_config)

    logger.debug("Evaluating agent {} with budget {} on seed {}".format(
        agent_name, budget, seed))

    # Compute true value
    compute_regret = True
    compute_return = False
    if compute_regret:
        env.seed(seed)
        observation = env.reset()
        vi = agent_factory(env, agent_configs()["value_iteration"])
        best_action = vi.act(observation)
        action = agent.act(observation)
        q = vi.state_action_value
        simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action]
        gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2]
    else:
        simple_regret = 0
        gap = 0

    if compute_return:
        # Evaluate
        evaluation = Evaluation(env,
                                agent,
                                directory=Path("out") / "planners" /
                                agent_name,
                                num_episodes=1,
                                sim_seed=seed,
                                display_env=False,
                                display_agent=False,
                                display_rewards=False)
        evaluation.test()
        rewards = evaluation.monitor.stats_recorder.episode_rewards_[0]
        length = evaluation.monitor.stats_recorder.episode_lengths[0]
        total_reward = np.sum(rewards)
        cum_discount = lambda signal: np.sum(
            [gamma**t * signal[t] for t in range(len(signal))])
        return_ = cum_discount(rewards)
        mean_return = np.mean(
            [cum_discount(rewards[t:]) for t in range(len(rewards))])
    else:
        length = 0
        total_reward = 0
        return_ = 0
        mean_return = 0

    # Save results
    result = {
        "agent": agent_name,
        "budget": budget,
        "seed": seed,
        "total_reward": total_reward,
        "return": return_,
        "mean_return": mean_return,
        "length": length,
        "simple_regret": simple_regret,
        "gap": gap
    }

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f,
                  sep=',',
                  encoding='utf-8',
                  header=f.tell() == 0,
                  index=False)

示例#11

0

显示文件

def evaluate(experiment):
    # Prepare workspace
    seed, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent = load_agent(agent_config, env)

    # Evaluate
    print("Evaluating agent {} on seed {}".format(agent_name, seed))
    evaluation = Evaluation(env,
                            agent,
                            directory=path.parent / agent_name,
                            num_episodes=1,
                            sim_seed=seed,
                            display_env=True,
                            display_agent=True,
                            display_rewards=False)
    estimate_value = False
    if estimate_value:
        rewards, values, terminal = [], [], False
        evaluation.seed(episode=0)
        evaluation.reset()
        evaluation.training = False
        gamma = 0.99 or agent.config["gamma"]
        while not terminal:
            # Estimate state value
            oracle_env = safe_deepcopy_env(agent.env)
            oracle = load_agent(agent_configs()["oracle"], oracle_env)
            oracle_done, oracle_rewards = False, []
            while not oracle_done:
                action = oracle.act(None)
                _, oracle_reward, oracle_done, _ = oracle_env.step(action)
                oracle_rewards.append(oracle_reward)
            value = np.sum([
                gamma**t * oracle_rewards[t]
                for t in range(len(oracle_rewards))
            ])
            values.append(value)

            reward, terminal = evaluation.step()
            rewards.append(reward)
        evaluation.close()

        returns = [
            np.sum(
                [gamma**t * rewards[k + t] for t in range(len(rewards[k:]))])
            for k in range(len(rewards))
        ]

        # Save intermediate results
        df = pd.DataFrame({
            "agent": agent_name,
            "time": range(len(rewards)),
            "seed": [seed] * len(rewards),
            "reward": rewards,
            "return": returns,
            "value": values
        })
    else:
        evaluation.test()
        rewards = evaluation.monitor.stats_recorder.episode_rewards_[0]
        length = evaluation.monitor.stats_recorder.episode_lengths[0]
        total_reward = np.sum(rewards)

        cum_discount = lambda signal, gamma: np.sum(
            [gamma**t * signal[t] for t in range(len(signal))])
        return_ = cum_discount(rewards, 0.9)
        return_undisc = cum_discount(rewards, 0.99)
        result = {
            "agent": agent_name,
            "seed": seed,
            "total_reward": total_reward,
            "return": return_,
            "return_undisc": return_undisc,
            "length": length,
        }
        df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f,
                  sep=',',
                  encoding='utf-8',
                  header=f.tell() == 0,
                  index=False)

示例#12

0

显示文件

文件： experiments.py 项目： rvalienter90/rl-agents

def evaluate(environment_config, agent_config, options):
    """
        Evaluate an agent interacting with an environment.

    :param environment_config: the path of the environment configuration file
    :param agent_config: the path of the agent configuration file
    :param options: the evaluation options
    """
    logger.configure(LOGGING_CONFIG)
    if options['--verbose']:
        logger.configure(VERBOSE_CONFIG)

    run_directory = None
    if options['--name-from-config']:
        run_directory = "{}_{}_{}".format(
            Path(agent_config).with_suffix('').name,
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S'), os.getpid())
    options['--seed'] = int(
        options['--seed']) if options['--seed'] is not None else None

    env = load_environment(environment_config)
    if agent_config == "None":
        agent_config = env.config["agent_config"]
        if "auto_tau" in agent_config["exploration"] and (
                agent_config["exploration"]["auto_tau"]):
            agent_config["exploration"]["tau"] = env.config[
                "policy_frequency"] * env.config["duration"] * int(
                    options['--episodes'] *
                    env.config["controlled_vehicles"]) / 50
    agent = load_agent(agent_config, env)
    # TODO diferent display options for agent, env, rewards
    if options['--offscreen_rendering']:
        env.config['offscreen_rendering'] = True

    evaluation_train = Evaluation(env,
                                  agent,
                                  run_directory=run_directory,
                                  num_episodes=int(options['--episodes']),
                                  sim_seed=options['--seed'],
                                  recover=options['--recover']
                                  or options['--recover-from'],
                                  display_env=not options['--no-display'],
                                  display_agent=not options['--no-display'],
                                  display_rewards=not options['--no-display'],
                                  training=options['--train'],
                                  options=options)

    if options['--train']:
        evaluation_train.train()
    else:
        evaluation_train.close()

    if options['--test']:
        agent_test = load_agent(agent_config, env)
        if options['--train']:
            agent_test = evaluation_train.agent
        evaluation_test = Evaluation(
            env,
            agent_test,
            run_directory=run_directory,
            num_episodes=int(options['--episodes_test']),
            sim_seed=options['--seed'],
            recover=options['--recover'] or options['--recover-from'],
            display_env=not options['--no-display'],
            display_agent=not options['--no-display'],
            display_rewards=not options['--no-display'],
            training=False,
            options=options)

        evaluation_test.test()