示例#1
0
# Here we combine the same improvements from Rainbow, but use QR instead of C51
# Note that we are still using a DistributionalQNetwork, but this network uses n as the number of quantiles rather than the number of atoms
q_func = nn.DistributionalQNetwork([64],
                                   env.action_space.n,
                                   n=75,
                                   noisy_net=True,
                                   dueling=[32])
epsilon_scheduler = dqn.annealing_schedules.Constant(0)
action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler)
loss = dqn.algorithms.QuantileRegressionLoss()
update_target = dqn.algorithms.HardUpdate()
alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=100000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=100)

agent.train(env, num_timesteps=num_steps, render=False)
agent.save('save/qr_dqn')
示例#2
0
文件: train.py 项目: zalzala/rl-intro
        action = agent.get_action(state)

        # Interact with the environment and observe new state and reward
        next_state, reward, terminated, info = environment.step(action)

        # Huge negative reward if failed
        if terminated:
            reward = -100

        # Remember agent's experience: state / action / reward / next state
        next_state = np.reshape(next_state, [1, n_state_features])
        agent.remember(state, action, reward, next_state, terminated)

        # Change the current state
        state = next_state

        # Print statistics if agent failed and quit inner loop
        if terminated:
            print(
                f'Episode: {episode} of {EPISODES} (score: {t}s, exploration rate: {agent.epsilon:.4})'
            )
            break

    # Re-train Value Function Approximation model if we have enough examples in memory
    if len(agent.memory) >= BATCH_SIZE:
        agent.experience_replay(BATCH_SIZE)

    # Save trained agent every once in a while
    if episode % 100 == 0:
        agent.save(f'./models/{environment_name}.h5')
示例#3
0
                                   env.action_space.n,
                                   n=51,
                                   noisy_net=True,
                                   dueling=[32])

# Action selection in Rainbow is done using noisy nets with no epsilon
epsilon_scheduler = dqn.annealing_schedules.Constant(0)
action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler)

loss = dqn.algorithms.CategoricalAlgorithm(double_q=True)
update_target = dqn.algorithms.HardUpdate()

alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=100000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=200)

agent.train(env, num_timesteps=num_steps, render=False)
agent.save('save_test/rainbow')