Пример #1
0
def test_random_agent():
    from agentos.agents import RandomAgent
    from gym.envs.classic_control import CartPoleEnv

    environment = CartPoleEnv()
    environment.reset()
    agent = RandomAgent(environment=environment)
    done = agent.advance()
    assert not done, "CartPole never finishes after one random step."
    run_agent(agent)
Пример #2
0
if __name__ == "__main__":
    """Create a mouse agent and see what it learns as its best guess of the
    size of cookies it is seeing."""
    import argparse

    parser = argparse.ArgumentParser(description=(
        "Run a MouseAgent that learns by looking at cookies "
        "using Friston's Free Energy principle. This agent "
        "is an implementation of the tutorial by Rafal Bogacz at "
        "https://sciencedirect.com/science/article/pii/S0022249615000759"))
    parser.add_argument("--max-iters", type=int, default=150)
    parser.add_argument("-p", "--plot-results", action="store_true")
    args = parser.parse_args()
    print(f"Running mouse agent  for {args.max_iters} steps...")
    print("------------------------------------------------")
    mouse = Mouse(CookieSensorEnv())
    agentos.run_agent(mouse, max_iters=args.max_iters)
    if args.plot_results:
        plt.figure(figsize=(15, 10))
        for k, v in mouse_stats.items():
            if k != "belief_light_var" and k != "belief_size_var":
                plt.plot(v, label=k)

        for k, v in env_stats.items():
            plt.plot(v, label=k)

        plt.legend()
        plt.title("Mouse beliefs over time")
        plt.show()
Пример #3
0
        return int(
            max(0, round(self.nn(np.array(obs)[np.newaxis]).numpy()[0][0])))


class RandomTFAgent(agentos.Agent):
    def __init__(self, environment, policy):
        super().__init__(environment=environment, policy=policy)
        self.ret_vals = []

    def advance(self):
        trajs = agentos.rollout(self.policy, self.environment, max_steps=2000)
        self.ret_vals.append(sum(trajs.rewards))


if __name__ == "__main__":
    from gym.envs.classic_control import CartPoleEnv

    random_nn_agent = RandomTFAgent(
        environment=CartPoleEnv,
        policy=SingleLayerTFPolicy(
            CartPoleEnv().action_space,
            CartPoleEnv().observation_space,
        ),
    )
    agentos.run_agent(random_nn_agent, max_iters=10)
    print(f"Agent done!\n"
          f"Num rollouts: {len(random_nn_agent.ret_vals)}\n"
          f"Avg return: {np.mean(random_nn_agent.ret_vals)}\n"
          f"Max return: {max(random_nn_agent.ret_vals)}\n"
          f"Median return: {np.median(random_nn_agent.ret_vals)}\n")
Пример #4
0
    def __init__(self, environment, policy):
        super().__init__(env=environment, policy=policy)
        self.policy = policy
        self.environment = environment
        self.first_obs = self.env.reset()

    def advance(self):
        print("Training")
        self.learn()
        print("Evaluating")
        t = agentos.rollout(self.policy,
                            self.environment.__class__,
                            max_steps=200)
        print(f"Finished evaluating policy, return: {sum(t.rewards)}")

    def learn(self):
        self.policy.improve(self.environment)


if __name__ == "__main__":
    from gym.envs.classic_control import CartPoleEnv

    env_class = CartPoleEnv

    my_agent = OnlineBatchAgent(
        environment=env_class(),
        policy=EpsilonGreedyTFPolicy(env_class().action_space,
                                     env_class().observation_space),
    )
    agentos.run_agent(my_agent, max_iters=100)
Пример #5
0
from collections import deque
from env import MultiChatEnv
from env_utils import CommandLineClient
from numpy import random as np_random


class ChatBot(agentos.Agent):
    """A simple chatbot that speaks by parroting back things it has heard."""
    def __init__(self, env):
        super().__init__(env)
        self.memory = deque(maxlen=2048)
        self.reply_flag = False

    def advance(self):
        msg = ""
        if self.reply_flag:
            msg = np_random.choice(self.memory)
            self.reply_flag = False
        obs, reward, done, _ = self.env.step(msg)
        if obs:
            self.memory.append(obs)
            self.reply_flag = True


if __name__ == "__main__":
    env_generator = MultiChatEnv()
    agentos.run_agent(ChatBot, env_generator, 1, as_thread=True)

    cmd_line = CommandLineClient(env_generator())
    cmd_line.start()
Пример #6
0
        description="Run reinforce with a simple TF policy on gym CartPole. "
        "One rollout per call to agent.advance(), "
        "200 steps per rollout.", )
    parser.add_argument(
        "max_iters",
        type=int,
        metavar="MAX_ITERS",
        help="How many times to call advance() on agent.",
    )
    parser.add_argument("--rollouts_per_iter", type=int, default=1)
    parser.add_argument("--max_steps_per_rollout", type=int, default=200)
    parser.add_argument("--discount_rate", type=float, default=0.9)
    args = parser.parse_args()
    reinforce_agent = ReinforceAgent(
        CartPoleEnv(),
        TwoLayerTFPolicy(),
        rollouts_per_iter=args.rollouts_per_iter,
        max_steps_per_rollout=args.max_steps_per_rollout,
        discount_rate=args.discount_rate,
    )
    agentos.run_agent(
        reinforce_agent,
        max_iters=args.max_iters,
    )
    print("Agent done!")
    if reinforce_agent.ret_vals:
        print(f"Num rollouts: {len(reinforce_agent.ret_vals)}\n"
              f"Avg return: {np.mean(reinforce_agent.ret_vals)}\n"
              f"Max return: {max(reinforce_agent.ret_vals)}\n"
              f"Median return: {np.median(reinforce_agent.ret_vals)}\n")
Пример #7
0
def run(agent_file, hz, max_iters):
    """Run an agent by calling advance() on it until it returns True"""
    agent = load_agent_from_path(agent_file)
    agentos.run_agent(agent, hz=hz, max_iters=max_iters)
Пример #8
0
    parser = argparse.ArgumentParser("Run an RLlibAgent.")
    parser.add_argument(
        "env_module",
        metavar="ENV_MODULE",
        type=str,
        help="The python module of env, will be imported. "
        "Must be on pythonpath. If this is empty string, "
        "ENV_CLASSNAME is assumed to be a Gym Env id "
        "instead of a classname (e.g., CartPole-v1)",
    )
    parser.add_argument(
        "env_classname",
        metavar="ENV_CLASSNAME",
        type=str,
        help="The env class for agent to use.",
    )
    parser.add_argument(
        "algorithm",
        metavar="ALGO",
        type=str,
        help="The name of an RLlib algo. For list of algos, "
        "see https://github.com/ray-project/ray/blob/"
        "master/rllib/agents/registry.py",
    )
    args = parser.parse_args()
    env = args.env_classname
    if args.env_module:
        module = importlib.import_module(args.env_module)
        env = getattr(module, args.env_classname)
    run_agent(RLlibAgent, env, algo_name=args.algorithm)