config.batch_size = 32 config.replay_fn = lambda: ReplayBuffer( config.eval_env, memory_size=int(1e6), stack=config.history_length) config.random_action_prob = LinearSchedule(1.0, 0.01, 1e6) config.state_normalizer = ImageNormalizer() config.reward_normalizer = SignNormalizer() config.discount = 0.99 config.target_network_update_freq = 10000 config.exploration_steps = 50000 config.categorical_v_max = 10 config.categorical_v_min = -10 config.categorical_n_atoms = 51 config.rollout_length = 4 config.gradient_clip = 0.5 config.max_steps = 2e7 CategoricalDQNAgent(config).run_steps( tag=f'{tag}{categorical_dqn_pixel_atari.__name__}-{game}') if __name__ == '__main__': random_seed() select_device(0) # game = 'MountainCar-v0' game = 'BreakoutNoFrameskip-v4' # categorical_dqn_cart_pole() categorical_dqn_pixel_atari(game, "bench-")
from drl.util.torch_utils import random_seed, select_device def vpg_cart_pole(game): config = VPGConfig() config.num_workers = 5 config.task_fn = lambda: Task(game, num_envs=config.num_workers) config.eval_env = Task(game) config.optimizer_fn = lambda params: Adam(params, lr=1e-3) config.network_fn = lambda: CategoricalActorCriticNet( config.state_dim, config.action_dim, FCBody(config.state_dim)) config.discount = 0.99 config.use_gae = True config.gae_tau = 0.97 config.entropy_weight = 0.001 config.rollout_length = 4000 config.gradient_clip = 5 config.logger = get_logger(tag=vpg_cart_pole.__name__) run_steps(VPGAgent(config)) if __name__ == '__main__': random_seed(0) select_device(0) # game = 'MountainCar-v0' game = 'CartPole-v0' # game = 'BreakoutNoFrameskip-v4' vpg_cart_pole(game)