Пример #1
0
def test_sanity_env_v6():
    seed_everything()
    loss_fn = loss_functions['mse']
    optimiser = optimisers['adam']
    env = SanityEnvV6(max_num_steps=3, correct_timestep=2)

    dqn_agent = DQNAgent(
        learning_rate=1e-2,
        discount_rate=0.99,
        final_layer_neurons=64,
        action_space_dim=env.action_space.n,
        observation_space_dim=env.observation_space.shape[0],
        value_net_layer_spec=(64, ),
        random_seed=RANDOM_SEED,
        loss_fn=loss_fn,
        optimiser=optimiser,
        cuda=False,
        target_update_steps=10,
    )

    trainer = Trainer(env=env,
                      agent=dqn_agent,
                      memory_buffer=MemoryBuffer(buffer_length=100),
                      timestep_to_start_learning=20,
                      batch_size=16,
                      random_seed=RANDOM_SEED,
                      max_num_steps=1000000,
                      train_every_n_steps=2,
                      write_to_tensorboard=False)
    trainer.run(num_episodes=500)

    assert trainer.loss_values.max() < 1e-5, f'Loss is too high on {env.name}'
Пример #2
0
def test_sanity_env_v2():
    seed_everything()
    loss_fn = loss_functions['mse']
    optimiser = optimisers['adam']
    env = SanityEnvV2()

    dqn_agent = DQNAgent(
        learning_rate=5e-3,
        discount_rate=0.99,
        final_layer_neurons=8,
        action_space_dim=env.action_space.n,
        observation_space_dim=env.observation_space.shape[0],
        value_net_layer_spec=(8, ),
        random_seed=RANDOM_SEED,
        loss_fn=loss_fn,
        optimiser=optimiser,
        cuda=False,
        gradient_clipping_value=True,
        gradient_clipping_threshold=1,
        target_update_steps=10,
    )

    trainer = Trainer(env=env,
                      agent=dqn_agent,
                      memory_buffer=MemoryBuffer(buffer_length=100),
                      timestep_to_start_learning=20,
                      batch_size=16,
                      random_seed=RANDOM_SEED,
                      max_num_steps=1000000,
                      train_every_n_steps=1,
                      write_to_tensorboard=False)
    trainer.run(num_episodes=350)

    assert trainer.loss_values.max() < 1e-5, 'Loss is too high on sanity env 2'
Пример #3
0
# set seeds
if args.seed is not None:
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)

layers_spec = tuple(int(x) for x in args.layers_spec.split('_'))

if args.agent == 'DQN':
    agent = DQNAgent(
        learning_rate=args.lr,
        discount_rate=args.discount,
        action_space_dim=env.action_space.n,
        observation_space_dim=env.observation_space.shape[0],
        value_net_layer_spec=layers_spec,
        final_layer_neurons=args.final_layer_neurons,
        num_outputs=env.action_space.n,
        random_seed=args.seed,
        loss_fn=loss_fn,
        optimiser=optimiser,
        cuda=args.cuda,
        target_update_steps=args.target_update_steps,
    )
elif args.agent == 'AC':
    agent = ActorCriticAgent(
        action_space_dim=env.action_space.n,
        observation_space_dim=env.observation_space.shape[0],
        policy_learning_rate=args.lr,
        value_learning_rate=args.lr,
        discount_rate=args.discount,
        policy_net_layers_spec=layers_spec,
        value_net_layers_spec=layers_spec,