示例#1
0
def ppo_continuous(game, tag=""):
    config = PPOConfig()
    config.num_workers = 16
    config.task_fn = lambda: Task(
        game, num_envs=config.num_workers, single_process=False)
    config.eval_env = Task(game)
    config.optimizer_fn = lambda params: Adam(params, 3e-4, eps=1e-5)
    config.network_fn = lambda: GaussianActorCriticNet(
        config.state_dim,
        config.action_dim,
        actor_body=FCBody(config.state_dim, gate=F.tanh),
        critic_body=FCBody(config.state_dim, gate=F.tanh))

    # config.state_normalizer =Mea

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.95
    config.entropy_weight = 0.01
    config.rollout_length = 2048
    config.gradient_clip = 0.5
    config.optimization_epochs = 10
    config.mini_batch_size = 64
    config.ppo_ratio_clip = 0.2
    config.log_interval = 2048
    config.max_steps = int(1e6)
    PPOAgent(config).run_steps(tag=f'{tag}{ppo_continuous.__name__}-{game}')
示例#2
0
def categorical_dqn_cart_pole():
    game = 'CartPole-v0'
    config = CategoricalDQNConfig()
    config.task_fn = lambda: Task(game)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: RMSprop(params, 0.001)
    config.network_fn = lambda: CategoricalNet(config.action_dim, config.
                                               categorical_n_atoms,
                                               FCBody(config.state_dim))

    config.batch_size = 10
    config.replay_fn = lambda: ReplayBuffer(config.eval_env,
                                            memory_size=int(1e4))
    config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4)

    config.discount = 0.99
    config.target_network_update_freq = 200
    config.exploration_steps = 100
    config.categorical_v_max = 100
    config.categorical_v_min = -100
    config.categorical_n_atoms = 50
    config.rollout_length = 4
    config.gradient_clip = 5

    config.max_steps = 1e6
    CategoricalDQNAgent(config).run_steps(
        tag=f'{categorical_dqn_cart_pole.__name__}-{game}')
示例#3
0
def ddpg_continuous(game, tag=""):
    config = DDPGConfig()
    config.task_fn = lambda: Task(game)
    config.eval_env = Task(game)

    config.network_fn = lambda: DeterministicActorCriticNet(
        config.state_dim,
        config.action_dim,
        actor_body=FCBody(config.state_dim, (400, 300), gate=F.relu),
        critic_body=TwoLayerFCBodyWithAction(
            config.state_dim, config.action_dim, (400, 300), gate=F.relu),
        actor_opt_fn=lambda params: Adam(params, lr=1e-4),
        critic_opt_fn=lambda params: Adam(params, lr=1e-3))

    config.batch_size = 64
    config.replay_fn = lambda: ReplayBuffer(config.eval_env,
                                            memory_size=int(1e6))
    config.random_process_fn = lambda: OrnsteinUhlenbeckProcess(
        size=(config.action_dim, ), std=LinearSchedule(0.2))

    config.discount = 0.99
    config.min_memory_size = 64
    config.target_network_mix = 1e-3
    config.max_steps = int(1e6)
    DDPGAgent(config).run_steps(tag=f'{tag}{ddpg_continuous.__name__}-{game}')
示例#4
0
def a2c_continuous(game, tag=""):
    config = A2CConfig()
    config.num_workers = 16
    config.task_fn = lambda: Task(
        game, num_envs=config.num_workers, single_process=True)
    config.eval_env = Task(game)
    config.optimizer_fn = lambda params: RMSprop(params, lr=0.0007)
    config.network_fn = lambda: GaussianActorCriticNet(
        config.state_dim,
        config.action_dim,
        actor_body=FCBody(config.state_dim),
        critic_body=FCBody(config.state_dim))

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 1.0
    config.entropy_weight = 0.01
    config.rollout_length = 5
    config.gradient_clip = 5
    config.max_steps = int(1e6)
    A2CAgent(config).run_steps(tag=f'{tag}{a2c_continuous.__name__}-{game}')
示例#5
0
def vpg_cart_pole(game):
    config = VPGConfig()
    config.num_workers = 5
    config.task_fn = lambda: Task(game, num_envs=config.num_workers)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: Adam(params, lr=1e-3)
    config.network_fn = lambda: CategoricalActorCriticNet(
        config.state_dim, config.action_dim, FCBody(config.state_dim))

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.97
    config.entropy_weight = 0.001
    config.rollout_length = 4000
    config.gradient_clip = 5
    config.logger = get_logger(tag=vpg_cart_pole.__name__)
    run_steps(VPGAgent(config))
示例#6
0
def option_critic_cart_pole():
    game = 'CartPole-v0'
    config = OptionCriticConfig()
    config.num_workers = 8
    config.task_fn = lambda: Task(game, num_envs=config.num_workers, single_process=True)
    config.eval_env = Task(game)
    
    config.optimizer_fn = lambda params: RMSprop(params, 0.001)
    config.network_fn = lambda: OptionCriticNet(FCBody(config.state_dim), config.action_dim, num_options=2)
    config.random_option_prob = LinearSchedule(1.0, 0.01, 1e4)
    
    config.discount = 0.99
    config.target_network_update_freq = 200
    config.rollout_length = 5
    config.termination_regularizer = 0.01
    config.entropy_weight = 0.01
    config.gradient_clip = 5
    config.max_steps = 1e6
    OptionCriticAgent(config).run_steps(tag=f'{option_critic_cart_pole.__name__}-{game}')
示例#7
0
def a2c_cart_pole():
    game = 'CartPole-v0'
    config = A2CConfig()
    config.num_workers = 16
    config.task_fn = lambda: Task(
        game, num_envs=config.num_workers, single_process=True)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: Adam(params, lr=1e-3)
    config.network_fn = lambda: CategoricalActorCriticNet(
        config.state_dim, config.action_dim, FCBody(config.state_dim))

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.95
    config.entropy_weight = 0.01
    config.rollout_length = 5
    config.gradient_clip = 0.5
    config.max_steps = 1e6
    A2CAgent(config).run_steps(tag=f'{a2c_cart_pole.__name__}-{game}')
示例#8
0
def nstepdqn_cart_pole():
    game = 'CartPole-v0'
    config = NStepDQNConfig()
    config.num_workers = 16
    config.task_fn = lambda: Task(
        game, num_envs=config.num_workers, single_process=True)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: RMSprop(params, 0.001)
    config.network_fn = lambda: VanillaNet(config.action_dim,
                                           FCBody(config.state_dim))
    # config.network_fn = lambda: DuelingNet(config.action_dim, FCBody(config.state_dim))

    config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4)
    config.discount = 0.99
    config.target_network_update_freq = 200
    config.double_q = True
    config.rollout_length = 1
    config.gradient_clip = 5
    config.max_steps = 1e6
    NStepDQNAgent(config).run_steps(
        tag=f'{nstepdqn_pixel_atari.__name__}-{game}')
示例#9
0
def dqn_cart_pole():
    game = 'CartPole-v0'
    config = DQNConfig()
    config.task_fn = lambda: Task(game)
    config.eval_env = Task(game)
    
    config.optimizer_fn = lambda params: RMSprop(params, 0.001)
    config.network_fn = lambda: VanillaNet(config.action_dim, FCBody(config.state_dim))
    # config.network_fn = lambda: DuelingNet(config.action_dim, FCBody(config.state_dim))
    
    config.batch_size = 10
    config.replay_fn = lambda: ReplayBuffer(config.eval_env, memory_size=int(1e4))
    
    config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4)
    config.discount = 0.99
    config.target_network_update_freq = 200
    config.exploration_steps = 1000
    config.double_q = True
    config.rollout_length = 4
    config.gradient_clip = 5
    config.eval_interval = int(5e3)
    config.max_steps = 1e6
    DQNAgent(config).run_steps(tag=f'{dqn_cart_pole.__name__}-{game}')
示例#10
0
def ppo_cart_pole():
    game = 'CartPole-v0'
    config = PPOConfig()
    config.num_workers = 8
    config.task_fn = lambda: Task(
        game, num_envs=config.num_workers, single_process=True)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: RMSprop(params, 0.001)
    config.network_fn = lambda: CategoricalActorCriticNet(
        config.state_dim, config.action_dim, FCBody(config.state_dim))

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.95
    config.entropy_weight = 0.001
    config.gradient_clip = 5
    config.rollout_length = 128
    config.optimization_epochs = 10
    config.mini_batch_size = 32 * 5
    config.ppo_ratio_clip = 0.2
    config.max_steps = 1e6
    PPOAgent(config).run_steps(tag=f'{ppo_cart_pole.__name__}-{game}')