def ppo_continuous(game, tag=""): config = PPOConfig() config.num_workers = 16 config.task_fn = lambda: Task( game, num_envs=config.num_workers, single_process=False) config.eval_env = Task(game) config.optimizer_fn = lambda params: Adam(params, 3e-4, eps=1e-5) config.network_fn = lambda: GaussianActorCriticNet( config.state_dim, config.action_dim, actor_body=FCBody(config.state_dim, gate=F.tanh), critic_body=FCBody(config.state_dim, gate=F.tanh)) # config.state_normalizer =Mea config.discount = 0.99 config.use_gae = True config.gae_tau = 0.95 config.entropy_weight = 0.01 config.rollout_length = 2048 config.gradient_clip = 0.5 config.optimization_epochs = 10 config.mini_batch_size = 64 config.ppo_ratio_clip = 0.2 config.log_interval = 2048 config.max_steps = int(1e6) PPOAgent(config).run_steps(tag=f'{tag}{ppo_continuous.__name__}-{game}')
def categorical_dqn_cart_pole(): game = 'CartPole-v0' config = CategoricalDQNConfig() config.task_fn = lambda: Task(game) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, 0.001) config.network_fn = lambda: CategoricalNet(config.action_dim, config. categorical_n_atoms, FCBody(config.state_dim)) config.batch_size = 10 config.replay_fn = lambda: ReplayBuffer(config.eval_env, memory_size=int(1e4)) config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4) config.discount = 0.99 config.target_network_update_freq = 200 config.exploration_steps = 100 config.categorical_v_max = 100 config.categorical_v_min = -100 config.categorical_n_atoms = 50 config.rollout_length = 4 config.gradient_clip = 5 config.max_steps = 1e6 CategoricalDQNAgent(config).run_steps( tag=f'{categorical_dqn_cart_pole.__name__}-{game}')
def ddpg_continuous(game, tag=""): config = DDPGConfig() config.task_fn = lambda: Task(game) config.eval_env = Task(game) config.network_fn = lambda: DeterministicActorCriticNet( config.state_dim, config.action_dim, actor_body=FCBody(config.state_dim, (400, 300), gate=F.relu), critic_body=TwoLayerFCBodyWithAction( config.state_dim, config.action_dim, (400, 300), gate=F.relu), actor_opt_fn=lambda params: Adam(params, lr=1e-4), critic_opt_fn=lambda params: Adam(params, lr=1e-3)) config.batch_size = 64 config.replay_fn = lambda: ReplayBuffer(config.eval_env, memory_size=int(1e6)) config.random_process_fn = lambda: OrnsteinUhlenbeckProcess( size=(config.action_dim, ), std=LinearSchedule(0.2)) config.discount = 0.99 config.min_memory_size = 64 config.target_network_mix = 1e-3 config.max_steps = int(1e6) DDPGAgent(config).run_steps(tag=f'{tag}{ddpg_continuous.__name__}-{game}')
def a2c_continuous(game, tag=""): config = A2CConfig() config.num_workers = 16 config.task_fn = lambda: Task( game, num_envs=config.num_workers, single_process=True) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, lr=0.0007) config.network_fn = lambda: GaussianActorCriticNet( config.state_dim, config.action_dim, actor_body=FCBody(config.state_dim), critic_body=FCBody(config.state_dim)) config.discount = 0.99 config.use_gae = True config.gae_tau = 1.0 config.entropy_weight = 0.01 config.rollout_length = 5 config.gradient_clip = 5 config.max_steps = int(1e6) A2CAgent(config).run_steps(tag=f'{tag}{a2c_continuous.__name__}-{game}')
def vpg_cart_pole(game): config = VPGConfig() config.num_workers = 5 config.task_fn = lambda: Task(game, num_envs=config.num_workers) config.eval_env = Task(game) config.optimizer_fn = lambda params: Adam(params, lr=1e-3) config.network_fn = lambda: CategoricalActorCriticNet( config.state_dim, config.action_dim, FCBody(config.state_dim)) config.discount = 0.99 config.use_gae = True config.gae_tau = 0.97 config.entropy_weight = 0.001 config.rollout_length = 4000 config.gradient_clip = 5 config.logger = get_logger(tag=vpg_cart_pole.__name__) run_steps(VPGAgent(config))
def option_critic_cart_pole(): game = 'CartPole-v0' config = OptionCriticConfig() config.num_workers = 8 config.task_fn = lambda: Task(game, num_envs=config.num_workers, single_process=True) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, 0.001) config.network_fn = lambda: OptionCriticNet(FCBody(config.state_dim), config.action_dim, num_options=2) config.random_option_prob = LinearSchedule(1.0, 0.01, 1e4) config.discount = 0.99 config.target_network_update_freq = 200 config.rollout_length = 5 config.termination_regularizer = 0.01 config.entropy_weight = 0.01 config.gradient_clip = 5 config.max_steps = 1e6 OptionCriticAgent(config).run_steps(tag=f'{option_critic_cart_pole.__name__}-{game}')
def a2c_cart_pole(): game = 'CartPole-v0' config = A2CConfig() config.num_workers = 16 config.task_fn = lambda: Task( game, num_envs=config.num_workers, single_process=True) config.eval_env = Task(game) config.optimizer_fn = lambda params: Adam(params, lr=1e-3) config.network_fn = lambda: CategoricalActorCriticNet( config.state_dim, config.action_dim, FCBody(config.state_dim)) config.discount = 0.99 config.use_gae = True config.gae_tau = 0.95 config.entropy_weight = 0.01 config.rollout_length = 5 config.gradient_clip = 0.5 config.max_steps = 1e6 A2CAgent(config).run_steps(tag=f'{a2c_cart_pole.__name__}-{game}')
def nstepdqn_cart_pole(): game = 'CartPole-v0' config = NStepDQNConfig() config.num_workers = 16 config.task_fn = lambda: Task( game, num_envs=config.num_workers, single_process=True) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, 0.001) config.network_fn = lambda: VanillaNet(config.action_dim, FCBody(config.state_dim)) # config.network_fn = lambda: DuelingNet(config.action_dim, FCBody(config.state_dim)) config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4) config.discount = 0.99 config.target_network_update_freq = 200 config.double_q = True config.rollout_length = 1 config.gradient_clip = 5 config.max_steps = 1e6 NStepDQNAgent(config).run_steps( tag=f'{nstepdqn_pixel_atari.__name__}-{game}')
def dqn_cart_pole(): game = 'CartPole-v0' config = DQNConfig() config.task_fn = lambda: Task(game) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, 0.001) config.network_fn = lambda: VanillaNet(config.action_dim, FCBody(config.state_dim)) # config.network_fn = lambda: DuelingNet(config.action_dim, FCBody(config.state_dim)) config.batch_size = 10 config.replay_fn = lambda: ReplayBuffer(config.eval_env, memory_size=int(1e4)) config.random_action_prob = LinearSchedule(1.0, 0.1, 1e4) config.discount = 0.99 config.target_network_update_freq = 200 config.exploration_steps = 1000 config.double_q = True config.rollout_length = 4 config.gradient_clip = 5 config.eval_interval = int(5e3) config.max_steps = 1e6 DQNAgent(config).run_steps(tag=f'{dqn_cart_pole.__name__}-{game}')
def ppo_cart_pole(): game = 'CartPole-v0' config = PPOConfig() config.num_workers = 8 config.task_fn = lambda: Task( game, num_envs=config.num_workers, single_process=True) config.eval_env = Task(game) config.optimizer_fn = lambda params: RMSprop(params, 0.001) config.network_fn = lambda: CategoricalActorCriticNet( config.state_dim, config.action_dim, FCBody(config.state_dim)) config.discount = 0.99 config.use_gae = True config.gae_tau = 0.95 config.entropy_weight = 0.001 config.gradient_clip = 5 config.rollout_length = 128 config.optimization_epochs = 10 config.mini_batch_size = 32 * 5 config.ppo_ratio_clip = 0.2 config.max_steps = 1e6 PPOAgent(config).run_steps(tag=f'{ppo_cart_pole.__name__}-{game}')