Exemplo n.º 1
0
    config.batch_size = 32
    config.replay_fn = lambda: ReplayBuffer(
        config.eval_env, memory_size=int(1e6), stack=config.history_length)
    config.random_action_prob = LinearSchedule(1.0, 0.01, 1e6)

    config.state_normalizer = ImageNormalizer()
    config.reward_normalizer = SignNormalizer()

    config.discount = 0.99
    config.target_network_update_freq = 10000
    config.exploration_steps = 50000
    config.categorical_v_max = 10
    config.categorical_v_min = -10
    config.categorical_n_atoms = 51
    config.rollout_length = 4
    config.gradient_clip = 0.5
    config.max_steps = 2e7
    CategoricalDQNAgent(config).run_steps(
        tag=f'{tag}{categorical_dqn_pixel_atari.__name__}-{game}')


if __name__ == '__main__':
    random_seed()
    select_device(0)
    # game = 'MountainCar-v0'

    game = 'BreakoutNoFrameskip-v4'
    # categorical_dqn_cart_pole()
    categorical_dqn_pixel_atari(game, "bench-")
Exemplo n.º 2
0
from drl.util.torch_utils import random_seed, select_device


def vpg_cart_pole(game):
    config = VPGConfig()
    config.num_workers = 5
    config.task_fn = lambda: Task(game, num_envs=config.num_workers)
    config.eval_env = Task(game)

    config.optimizer_fn = lambda params: Adam(params, lr=1e-3)
    config.network_fn = lambda: CategoricalActorCriticNet(
        config.state_dim, config.action_dim, FCBody(config.state_dim))

    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.97
    config.entropy_weight = 0.001
    config.rollout_length = 4000
    config.gradient_clip = 5
    config.logger = get_logger(tag=vpg_cart_pole.__name__)
    run_steps(VPGAgent(config))


if __name__ == '__main__':
    random_seed(0)
    select_device(0)
    # game = 'MountainCar-v0'
    game = 'CartPole-v0'
    # game = 'BreakoutNoFrameskip-v4'
    vpg_cart_pole(game)