# Computes epsilon for epsilon greedy policy given the training step epsilon_fn = keras.optimizers.schedules.PolynomialDecay( initial_learning_rate=0.01, # initial ε decay_steps=epsilon_decay_steps, end_learning_rate=epsilon_final) # final ε agent = DqnAgent(tf_env.time_step_spec(), tf_env.action_spec(), q_network=q_net, optimizer=optimizer, target_update_period=target_update_period, td_errors_loss_fn=keras.losses.Huber(reduction="none"), gamma=discount_factor, # discount factor train_step_counter=train_step, epsilon_greedy=lambda: epsilon_fn(train_step)) agent.policy = tf.compat.v2.saved_model.load('../DATA/policy_{}'.format(II)) agent.initialize() # Speed up as tensorflow function agent.train = function(agent.train) ## ------------------------------------------------------------------------------ ## ------------------------------------------------------------------------------ ## ------------------------------------------------------------------------------ replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( # Determines the data spec type data_spec=agent.collect_data_spec, # The number of trajectories added at each step batch_size=tf_env.batch_size, # This can store 4 million trajectories (note: requires a lot of RAM)
epsilon_fn = keras.optimizers.schedules.PolynomialDecay( initial_learning_rate=1.0, decay_steps=250000, end_learning_rate=0.01) print("Before Agent") agent = DqnAgent(tf_env.time_step_spec(), tf_env.action_spec(), q_network=q_net, optimizer=optimizer, target_update_period=2000, td_errors_loss_fn=keras.losses.Huber(reduction="none"), gamma=0.99, train_step_counter=train_step, epsilon_greedy=lambda: epsilon_fn(train_step)) agent.initialize() if policy != None: agent.policy = policy print("After Agent.initialize()") replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( data_spec=agent.collect_data_spec, batch_size=tf_env.batch_size, max_length=100000) """ replay_buffer = PyHashedReplayBuffer( data_spec = agent.collect_data_spec, #batch_size = tf_env.batch_size, capacity = 1000000 ) """ print("After replay_buffer")