예제 #1
0
파일: ddqn.py 프로젝트: ericl/yarlp
def main():
    env = NormalizedGymEnv('BeamRiderNoFrameskip-v4', is_atari=True)

    # agent = DDQNAgent(
    #     env=env,
    #     discount_factor=0.99,
    #     policy_network_params={'dueling': True},
    #     double_q=True,
    #     prioritized_replay=True,
    #     max_timesteps=int(10e6),
    #     seed=0)
    # agent.train()

    env = NormalizedGymEnv('CartPole-v0')
    net = partial(mlp, hidden_units=[64])
    agent = DDQNAgent(env=env,
                      policy_network=net,
                      policy_network_params={},
                      policy_learning_rate=1e-3,
                      max_timesteps=100000,
                      buffer_size=50000,
                      checkpoint_freq=1000,
                      exploration_fraction=0.3,
                      exploration_final_eps=0.05,
                      prioritized_replay=True,
                      double_q=True,
                      train_freq=1,
                      target_network_update_freq=500,
                      learning_start_timestep=1000,
                      seed=65)
    agent.train()
예제 #2
0
def test_continuous_action_space_norm():
    env = NormalizedGymEnv('MountainCarContinuous-v0')
    env.reset()
    env.step([0.1])
    env.close()
    assert GymEnv.env_action_space_is_discrete(env) is False
    assert GymEnv.get_env_action_space_dim(env) == 1
예제 #3
0
def test_discrete_action_space_norm():
    env = NormalizedGymEnv('CartPole-v0')
    env.reset()
    env.step(1)
    env.close()
    assert GymEnv.env_action_space_is_discrete(env) is True
    assert GymEnv.get_env_action_space_dim(env) == 2
예제 #4
0
파일: job.py 프로젝트: ericl/yarlp
 def _get_env(self, job_dir):
     env_name = self._spec_dict['env']['name']
     kwargs = {
         k: v
         for k, v in self._spec_dict['env'].items()
         if k != 'name' and k != 'is_parallel'
     }
     kwargs['video'] = self._video
     if not self._spec_dict['env'].get('is_parallel', False):
         env = NormalizedGymEnv(env_name,
                                log_dir=job_dir,
                                force_reset=True,
                                **kwargs)
     else:
         env = ParallelEnvs(env_name,
                            log_dir=job_dir,
                            force_reset=True,
                            **kwargs)
     if 'timestep_limit' in self._spec_dict['env']:
         env.spec.timestep_limit = self._spec_dict['env']['timestep_limit']
     return env
예제 #5
0
파일: cem.py 프로젝트: ericl/yarlp
def main():
    env = NormalizedGymEnv(
        # 'MountainCarContinuous-v0'
        'CartPole-v1'
        # 'Acrobot-v1'
        # 'Pendulum-v0'
        # 'HalfCheetah-v1'
    )
    agent = CEMAgent(env,
                     discount_factor=0.99,
                     n_weight_samples=100,
                     init_var=1.,
                     best_pct=0.2,
                     policy_network=None,
                     policy_network_params={},
                     model_file_path=None,
                     min_std=1e-6,
                     init_std=1.0,
                     adaptive_std=False,
                     seed=5)
    agent.train(100)
예제 #6
0
파일: trpo.py 프로젝트: ericl/yarlp
def main():
    env = NormalizedGymEnv(
        'MountainCarContinuous-v0',
        # 'Walker2d-v1',
        # 'Swimmer-v1',
        # 'CartPole-v1',
        # 'Acrobot-v1',
        # 'Pendulum-v0',
        # 'PongNoFrameskip-v4',
        # normalize_obs=True,
        # is_atari=True
    )

    print(env.action_space)

    agent = TRPOAgent(env,
                      policy_network=mlp,
                      seed=123,
                      baseline_train_iters=5,
                      baseline_model_learning_rate=1e-3,
                      baseline_network=mlp)
    agent.train(max_timesteps=1000000)
예제 #7
0
파일: reinforce.py 프로젝트: ericl/yarlp
def main():
    env = NormalizedGymEnv(
        'MountainCarContinuous-v0'
        # 'CartPole-v1'
        # 'Walker2d-v1'
        # 'Acrobot-v1'
        # 'Pendulum-v0'
        # 'HalfCheetah-v1'
        # normalize_obs=True
    )

    print(env.action_space)

    agent = REINFORCEAgent(
        env=env, discount_factor=0.99,
        policy_network=mlp,
        policy_learning_rate=0.01,
        entropy_weight=0,
        baseline_train_iters=5,
        baseline_model_learning_rate=1e-3,
        baseline_network=mlp,
        # baseline_network=None,
        seed=5)
    agent.train(10000, n_steps=1024)
예제 #8
0
"""
    Regression tests for the REINFORCE agent on OpenAI gym environments
"""

import pytest
import numpy as np
import shutil
from yarlp.utils.env_utils import NormalizedGymEnv
from yarlp.agent.ddqn_agent import DDQNAgent

env = NormalizedGymEnv('PongNoFrameskip-v4', is_atari=True)


def test_ddqn():
    agent = DDQNAgent(env,
                      max_timesteps=10,
                      learning_start_timestep=1,
                      train_freq=5,
                      batch_size=1)
    agent.train()


def test_seed():
    agent = DDQNAgent(env, seed=143, max_timesteps=2)
    agent.train()
    ob, *_ = agent.replay_buffer.sample(1)

    agent = DDQNAgent(env, seed=143, max_timesteps=2)
    agent.train()
    ob2, *_ = agent.replay_buffer.sample(1)
예제 #9
0
def test_norm_reward():
    env = NormalizedGymEnv('MountainCarContinuous-v0', normalize_rewards=True)
    env.reset()
    [env.step([0.01]) for _ in range(env.spec.timestep_limit + 1)]