def test_sanity_env_v6(): seed_everything() loss_fn = loss_functions['mse'] optimiser = optimisers['adam'] env = SanityEnvV6(max_num_steps=3, correct_timestep=2) dqn_agent = DQNAgent( learning_rate=1e-2, discount_rate=0.99, final_layer_neurons=64, action_space_dim=env.action_space.n, observation_space_dim=env.observation_space.shape[0], value_net_layer_spec=(64, ), random_seed=RANDOM_SEED, loss_fn=loss_fn, optimiser=optimiser, cuda=False, target_update_steps=10, ) trainer = Trainer(env=env, agent=dqn_agent, memory_buffer=MemoryBuffer(buffer_length=100), timestep_to_start_learning=20, batch_size=16, random_seed=RANDOM_SEED, max_num_steps=1000000, train_every_n_steps=2, write_to_tensorboard=False) trainer.run(num_episodes=500) assert trainer.loss_values.max() < 1e-5, f'Loss is too high on {env.name}'
def test_sanity_env_v2(): seed_everything() loss_fn = loss_functions['mse'] optimiser = optimisers['adam'] env = SanityEnvV2() dqn_agent = DQNAgent( learning_rate=5e-3, discount_rate=0.99, final_layer_neurons=8, action_space_dim=env.action_space.n, observation_space_dim=env.observation_space.shape[0], value_net_layer_spec=(8, ), random_seed=RANDOM_SEED, loss_fn=loss_fn, optimiser=optimiser, cuda=False, gradient_clipping_value=True, gradient_clipping_threshold=1, target_update_steps=10, ) trainer = Trainer(env=env, agent=dqn_agent, memory_buffer=MemoryBuffer(buffer_length=100), timestep_to_start_learning=20, batch_size=16, random_seed=RANDOM_SEED, max_num_steps=1000000, train_every_n_steps=1, write_to_tensorboard=False) trainer.run(num_episodes=350) assert trainer.loss_values.max() < 1e-5, 'Loss is too high on sanity env 2'
# set seeds if args.seed is not None: torch.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) layers_spec = tuple(int(x) for x in args.layers_spec.split('_')) if args.agent == 'DQN': agent = DQNAgent( learning_rate=args.lr, discount_rate=args.discount, action_space_dim=env.action_space.n, observation_space_dim=env.observation_space.shape[0], value_net_layer_spec=layers_spec, final_layer_neurons=args.final_layer_neurons, num_outputs=env.action_space.n, random_seed=args.seed, loss_fn=loss_fn, optimiser=optimiser, cuda=args.cuda, target_update_steps=args.target_update_steps, ) elif args.agent == 'AC': agent = ActorCriticAgent( action_space_dim=env.action_space.n, observation_space_dim=env.observation_space.shape[0], policy_learning_rate=args.lr, value_learning_rate=args.lr, discount_rate=args.discount, policy_net_layers_spec=layers_spec, value_net_layers_spec=layers_spec,