def main():
    parser = argparse.ArgumentParser(description="Watch a continuous agent.")
    parser.add_argument("env", help="ID of the Environment")
    parser.add_argument("filename", help="File where the model was saved.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=120,
        help="Playback speed",
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env = GymEnvironment(args.env, device=args.device)
    elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names:
        env = PybulletEnvironment(args.env, device=args.device)
    else:
        env = GymEnvironment(args.env, device=args.device)

    load_and_watch(args.filename, env, fps=args.fps)
Пример #2
0
def main():
    # run on gpu
    device = 'cuda'

    def get_agents(preset):
        agents = [
            getattr(preset, agent_name)
            for agent_name in classic_control.__all__
        ]
        return [agent(device=device) for agent in agents]

    SlurmExperiment(get_agents(atari),
                    AtariEnvironment('Breakout', device=device),
                    2e7,
                    sbatch_args={'partition': '1080ti-long'})

    SlurmExperiment(get_agents(classic_control),
                    GymEnvironment('CartPole-v0', device=device),
                    100000,
                    sbatch_args={'partition': '1080ti-short'})

    SlurmExperiment(get_agents(continuous),
                    GymEnvironment('LunarLanderContinuous-v2', device=device),
                    500000,
                    sbatch_args={'partition': '1080ti-short'})
Пример #3
0
 def setUp(self):
     self.env = GymEnvironment('LunarLanderContinuous-v2')
     self.env.reset()
     self.parallel_env = DuplicateEnvironment([
         GymEnvironment('LunarLanderContinuous-v2'),
         GymEnvironment('LunarLanderContinuous-v2'),
     ])
     self.parallel_env.reset()
def main():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (e.g. 'lander', 'cheetah')")
    parser.add_argument(
        "agent", help="Name of the agent (e.g. ddpg). See presets for available agents."
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument(
        "--frames", type=int, default=2e6, help="The number of training frames."
    )
    parser.add_argument(
        "--render", action="store_true", default=False, help="Render the environment."
    )
    parser.add_argument(
        "--logdir", default='runs', help="The base logging directory."
    )
    parser.add_argument("--writer", default='tensorboard', help="The backend used for tracking experiment metrics.")
    parser.add_argument(
        '--hyperparameters',
        default=[],
        nargs='*',
        help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc."
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env = GymEnvironment(args.env, device=args.device)
    elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names:
        env = PybulletEnvironment(args.env, device=args.device)
    else:
        env = GymEnvironment(args.env, device=args.device)

    agent_name = args.agent
    agent = getattr(continuous, agent_name)
    agent = agent.device(args.device)

    # parse hyperparameters
    hyperparameters = {}
    for hp in args.hyperparameters:
        key, value = hp.split('=')
        hyperparameters[key] = type(agent.default_hyperparameters[key])(value)
    agent = agent.hyperparameters(**hyperparameters)

    run_experiment(
        agent,
        env,
        frames=args.frames,
        render=args.render,
        logdir=args.logdir,
        writer=args.writer,
    )
def main():
    device = 'cpu'
    timesteps = 40000
    run_experiment(
        [dqn(), a2c()],
        [
            GymEnvironment('CartPole-v0', device),
            GymEnvironment('Acrobot-v1', device)
        ],
        timesteps,
    )
    plot_returns_100('runs', timesteps=timesteps)
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = GymEnvironment('CartPole-v0')
     self.experiment = MockExperiment(a2c(), self.env, quiet=True)
     for i, env in enumerate(self.experiment._envs):
         env.seed(i)
Пример #7
0
def main():
    parser = argparse.ArgumentParser(description="Watch a continuous agent.")
    parser.add_argument("env", help="ID of the Environment")
    parser.add_argument("dir",
                        help="Directory where the agent's model was saved.")
    parser.add_argument(
        "--device",
        default="cpu",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=120,
        help="Playback speed",
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env_id = ENVS[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent = TimeFeature(GreedyAgent.load(args.dir, env))
    watch(agent, env, fps=args.fps)
def main():
    parser = argparse.ArgumentParser(
        description="Run a classic control benchmark.")
    parser.add_argument("env", help="Name of the env (e.g. CartPole-v1).")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. dqn). See presets for available agents.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument("--frames",
                        type=int,
                        default=20000,
                        help="The number of training frames.")
    parser.add_argument("--render",
                        type=bool,
                        default=False,
                        help="Render the environment.")
    args = parser.parse_args()

    env = GymEnvironment(args.env, device=args.device)
    agent_name = args.agent
    agent = getattr(classic_control, agent_name)

    run_experiment(agent(device=args.device),
                   env,
                   args.frames,
                   render=args.render)
def run_atari():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. actor_critic). See presets for available agents.",
    )
    parser.add_argument(
        "--frames", type=int, default=2e6, help="The number of training frames"
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--render", default=False, help="Whether to render the environment."
    )
    args = parser.parse_args()

    if args.env in envs:
        env_id = envs[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent_name = args.agent
    agent = getattr(continuous, agent_name)

    experiment = Experiment(
        agent(device=args.device), env, frames=args.frames, render=args.render
    )
 def setUp(self):
     self.agent = MockAgent()
     self.env = GymEnvironment('PongNoFrameskip-v4')
     self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                   self.env,
                                   noop_max=0,
                                   frame_stack=1)
 def test_reset(self):
     env = GymEnvironment('CartPole-v0')
     state = env.reset()
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 0)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
 def test_reset_preconstructed_env(self):
     env = GymEnvironment(gym.make('CartPole-v0'))
     state = env.reset()
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 0)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
Пример #13
0
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = GymEnvironment('CartPole-v0')
     self.env.seed(0)
     self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True)
     self.experiment._env.seed(0)
Пример #14
0
    def __init__(self):
        super().__init__()

        if Settings.CUDA:
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device)
        self.agent = None
 def test_step_until_done(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     for _ in range(100):
         state = env.step(1)
         if state.done:
             break
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertTrue(state.done)
     self.assertEqual(state.mask, 0)
Пример #16
0
def main():
    DEVICE = 'cpu'
    # DEVICE = 'cuda' # uncomment for gpu support
    timesteps = 40000
    run_experiment(
        [
            # DQN with default hyperparameters
            dqn.device(DEVICE),
            # DQN with a custom hyperparameters and a custom name.
            dqn.device(DEVICE).hyperparameters(replay_buffer_size=100
                                               ).name('dqn-small-buffer'),
            # A2C with a custom name
            a2c.device(DEVICE).name('not-dqn')
        ],
        [
            GymEnvironment('CartPole-v0', DEVICE),
            GymEnvironment('Acrobot-v1', DEVICE)
        ],
        timesteps,
    )
    plot_returns_100('runs', timesteps=timesteps)
Пример #17
0
def run():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument("agent", help="Name of the agent (e.g. cacla). See presets for available agents")

    parser.add_argument(
        "--episodes", type=int, default=2000, help="The number of training episodes"
    )
    parser.add_argument(
        "--frames", type=int, default=6e10, help="The number of training frames"
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--render", default=False, help="Whether to render the environment."
    )
    args = parser.parse_args()

    # create the environment
    env = GymEnvironment(args.env, device=args.device)

    agent_name = args.agent
    agent = getattr(presets, agent_name)

    # configure desired baseline (run sequentially)
    run_baseline = False
    baseline_agent_name = "cacla"
    baseline_agent = getattr(presets, baseline_agent_name)

    result_dir = create_result_dir(agent_name, args.env)

    num_repeats = 20
    for i in range(num_repeats):
        # run the experiment
        OptimisationExperiment(
            agent(device=args.device),
            env,
            episodes=args.episodes,
            frames=args.frames,
            render=args.render,
            writer=_make_writer(agent_name, env.name, True, result_dir),
            write_episode_return=True
        )

        if run_baseline:
            # run the baseline agent for comparison
            OptimisationExperiment(
                baseline_agent(device=args.device), env, episodes=args.episodes, frames=args.frames, render=args.render
            )
Пример #18
0
def watch():
    parser = argparse.ArgumentParser(description="Run an Atari benchmark.")
    parser.add_argument(
        "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1")
    parser.add_argument("dir",
                        help="Directory where the agent's model was saved.")
    parser.add_argument(
        "--device",
        default="cpu",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    args = parser.parse_args()
    env = GymEnvironment(args.env, device=args.device)
    load_and_watch(args.dir, env)
 def __init__(self, env, frames=None, episodes=None):
     if frames is None:
         frames = np.inf
     if episodes is None:
         episodes = np.inf
     if isinstance(env, str):
         self.env = GymEnvironment(env)
     else:
         self.env = env
     self._max_frames = frames
     self._max_episodes = episodes
     self._agent = None
     self._episode = None
     self._frames = None
     self._writer = None
     self._render = None
     self._console = None
Пример #20
0
def make_env(using_lander_reward_shaping=False):
    env = gym.make('LunarLanderContinuous-v2')
    env.action_space = spaces.Discrete(n_act_dim)

    #override the step function. Before run the originally step function, run disc_to_cont to convert
    #the discrete action to continous action.
    env.unwrapped._step_orig = env.unwrapped.step

    def _step(self, action):
        obs, r, done, info = self._step_orig(disc_to_cont(action))
        return obs, r, done, info

    env.unwrapped.step = types.MethodType(_step, env.unwrapped)
    env.unwrapped.using_lander_reward_shaping = using_lander_reward_shaping

    env = GymEnvironment(env, device="cuda")
    return env
Пример #21
0
def evaluate_dqn_all(num_test_episodes):
    from all.experiments.watch import GreedyAgent
    from all.environments import GymEnvironment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    agent = GreedyAgent.load('models', env)
    num_crashed = 0
    num_arrived = 0
    action = None
    iteration = 0

    rlstats = StatsAggregator()
    episode_reward = 0

    def add_reward(state):
        return {"reward": episode_reward}

    rlstats.add_custom_stat_callback(add_reward)
    rewards = []

    while iteration < num_test_episodes:
        if env.done:
            actualEnv = env.env
            stats = actualEnv.get_stats()
            if len(stats["position_history"]) != 0:
                rlstats.add_episode_stats(stats)
                num_crashed += stats["crashed"]
                num_arrived += stats["merged"]
                iteration += 1
                print(iteration)
                rewards.append(episode_reward)
                episode_reward = 0
            env.reset()
        else:
            env.step(action)
        action = agent.eval(env.state, env.reward)
        episode_reward += env.reward

    logging.info("Rewards: {}".format(rewards))
    rlstats.print_stats()
def main():
    parser = argparse.ArgumentParser(description="Run an Atari benchmark.")
    parser.add_argument(
        "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1")
    parser.add_argument("filename", help="File where the model was saved.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=60,
        help="Playback speed",
    )
    args = parser.parse_args()
    env = GymEnvironment(args.env, device=args.device)
    load_and_watch(args.filename, env, fps=args.fps)
Пример #23
0
def train_dqn_all():
    from all.environments import GymEnvironment
    from all.presets.classic_control import ddqn
    from all.experiments import SingleEnvExperiment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    preset = ddqn(device=device,
                  lr=Settings.LEARNING_RATE,
                  initial_exploration=Settings.EPS_START,
                  final_exploration=Settings.EPS_END)
    experiment = SingleEnvExperiment(preset, env)
    experiment.train(1E6)
    default_log_dir = experiment._writer.log_dir
    copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
    rmtree(default_log_dir)
def main():
    parser = argparse.ArgumentParser(
        description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. ddpg). See presets for available agents."
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument("--frames",
                        type=int,
                        default=2e6,
                        help="The number of training frames.")
    parser.add_argument("--render",
                        type=bool,
                        default=False,
                        help="Render the environment.")
    parser.add_argument("--logdir",
                        default='runs',
                        help="The base logging directory.")
    args = parser.parse_args()

    if args.env in ENVS:
        env_id = ENVS[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent_name = args.agent
    agent = getattr(continuous, agent_name)

    run_experiment(agent(device=args.device),
                   env,
                   frames=args.frames,
                   render=args.render,
                   logdir=args.logdir)
Пример #25
0
    def fitness(self, individual):
        print("Running individual: {0}".format(individual))

        returns = []
        # TODO - average past fitness values
        # TODO - loop env
        run_idxs = []
        for i in range(2):
            # create the environment and agent
            env = GymEnvironment(self.args.env, device=self.args.device)
            experiment = OptimisationExperiment(
                self.agent(device=args.device,
                           lr_v=individual[0],
                           lr_pi=individual[1],
                           trace_decay=individual[2],
                           log=args.log), env,
                episodes=args.episodes,
                frames=args.frames,
                render=args.render,
                log=args.log,
                quiet=True,
                write_loss=False,
                write_episode_return=args.log,
                writer=self._make_writer(self.agent_name, env.name, self._write_loss, self.result_dir),
            )
            episodes_returns = np.array(experiment.runner.rewards)     # returns against episodes
            solved_return_value = np.array([100.0 for x in range(len(episodes_returns))])
            fitness = sum(abs(solved_return_value - episodes_returns))
            returns.append(fitness)
            run_idxs.append(copy(self.run_count))
            self.run_count += 1
        print("runs fitnesses: {0}".format(returns))
        avg_fitness = sum(returns) / len(returns)

        for idx in run_idxs:
            self.f_fitness_run_map.write("{0}\t{1}\t{2}\n".format(self.individual_id, idx, avg_fitness))
            self.f_fitness_run_map.flush()

        self.individual_id += 1
        return avg_fitness
Пример #26
0
def run_classic():
    parser = argparse.ArgumentParser(description="Run a classic control benchmark.")
    parser.add_argument("env", help="Name of the env (e.g. CartPole-v1)")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. sarsa). See presets for available agents.",
    )
    parser.add_argument(
        "--episodes", type=int, default=2000, help="The number of training frames"
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    args = parser.parse_args()

    env = GymEnvironment(args.env, device=args.device)
    agent_name = args.agent
    agent = getattr(classic_control, agent_name)

    Experiment(agent(device=args.device), env, episodes=args.episodes)
 def test_same_as_duplicate(self):
     n_envs = 3
     torch.manual_seed(42)
     env1 = DuplicateEnvironment([GymEnvironment('CartPole-v0') for i in range(n_envs)])
     env2 = GymVectorEnvironment(make_vec_env(n_envs), "CartPole-v0")
     env1.seed(42)
     env2.seed(42)
     state1 = env1.reset()
     state2 = env2.reset()
     self.assertEqual(env1.name, env2.name)
     self.assertEqual(env1.action_space.n, env2.action_space.n)
     self.assertEqual(env1.observation_space.shape, env2.observation_space.shape)
     self.assertEqual(env1.num_envs, 3)
     self.assertEqual(env2.num_envs, 3)
     act_space = env1.action_space
     for i in range(2):
         self.assertTrue(torch.all(torch.eq(state1.observation, state2.observation)))
         self.assertTrue(torch.all(torch.eq(state1.reward, state2.reward)))
         self.assertTrue(torch.all(torch.eq(state1.done, state2.done)))
         self.assertTrue(torch.all(torch.eq(state1.mask, state2.mask)))
         actions = torch.tensor([act_space.sample() for i in range(n_envs)])
         state1 = env1.step(actions)
         state2 = env2.step(actions)
def main():
    device = 'cuda'

    frames = int(1e7)

    agents = [
        ddpg(last_frame=frames),
        ppo(last_frame=frames),
        sac(last_frame=frames)
    ]

    envs = [
        GymEnvironment(env, device) for env in [
            'AntBulletEnv-v0', "HalfCheetahBulletEnv-v0",
            'HumanoidBulletEnv-v0', 'HopperBulletEnv-v0',
            'Walker2DBulletEnv-v0'
        ]
    ]

    SlurmExperiment(agents,
                    envs,
                    frames,
                    sbatch_args={'partition': '1080ti-long'})
Пример #29
0
def run_atari():
    parser = argparse.ArgumentParser(
        description='Run a classic control benchmark.')
    parser.add_argument('env', help='Name of the env (e.g. CartPole-v1)')
    parser.add_argument(
        'agent', help="Name of the agent (e.g. sarsa). See presets for available agents.")
    parser.add_argument('--episodes', type=int, default=1000,
                        help='The number of training frames')
    parser.add_argument(
        '--device', default='cuda',
        help='The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)'
    )
    args = parser.parse_args()

    env = GymEnvironment(args.env, device=args.device)
    agent_name = args.agent
    agent = getattr(classic_control, agent_name)

    experiment = Experiment(
        env,
        episodes=args.episodes
    )
    experiment.run(agent(device=args.device), label=agent_name)
Пример #30
0
def resume_dqn_all():
    from all.presets.classic_control import ddqn
    from all.environments import GymEnvironment
    from all.experiments import SingleEnvExperiment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment('sumo-jerk-v0', device=device)
    lr = 1e-5
    agent = ddqn(device=device, lr=lr)
    q_module = torch.load(os.path.join('models', "q.pt"),
                          map_location='cpu').to(device)

    experiment = SingleEnvExperiment(agent, env)
    agent = experiment._agent
    old_q = agent.q
    old_q.model.load_state_dict(q_module.state_dict())
    experiment.train(frames=1e6)
    default_log_dir = experiment._writer.log_dir
    copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
    rmtree(default_log_dir)