Пример #1
0
# Computes epsilon for epsilon greedy policy given the training step
epsilon_fn = keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=0.01, # initial ε
    decay_steps=epsilon_decay_steps, 
    end_learning_rate=epsilon_final) # final ε

agent = DqnAgent(tf_env.time_step_spec(),
                 tf_env.action_spec(),
                 q_network=q_net,
                 optimizer=optimizer,
                 target_update_period=target_update_period, 
                 td_errors_loss_fn=keras.losses.Huber(reduction="none"),
                 gamma=discount_factor, # discount factor
                 train_step_counter=train_step,
                 epsilon_greedy=lambda: epsilon_fn(train_step))
agent.policy = tf.compat.v2.saved_model.load('../DATA/policy_{}'.format(II))
agent.initialize()

# Speed up as tensorflow function
agent.train = function(agent.train)

## ------------------------------------------------------------------------------
## ------------------------------------------------------------------------------
## ------------------------------------------------------------------------------

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    # Determines the data spec type
    data_spec=agent.collect_data_spec,
    # The number of trajectories added at each step
    batch_size=tf_env.batch_size,
    # This can store 4 million trajectories (note: requires a lot of RAM)
Пример #2
0
epsilon_fn = keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=1.0, decay_steps=250000, end_learning_rate=0.01)
print("Before Agent")
agent = DqnAgent(tf_env.time_step_spec(),
                 tf_env.action_spec(),
                 q_network=q_net,
                 optimizer=optimizer,
                 target_update_period=2000,
                 td_errors_loss_fn=keras.losses.Huber(reduction="none"),
                 gamma=0.99,
                 train_step_counter=train_step,
                 epsilon_greedy=lambda: epsilon_fn(train_step))

agent.initialize()
if policy != None:
    agent.policy = policy

print("After  Agent.initialize()")

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec=agent.collect_data_spec,
    batch_size=tf_env.batch_size,
    max_length=100000)
"""
replay_buffer = PyHashedReplayBuffer(
    data_spec = agent.collect_data_spec,
    #batch_size = tf_env.batch_size,
    capacity = 1000000
)
"""
print("After  replay_buffer")