示例#1
0
  def callback(info):
    episode = info['episode']
    params[0] = info["optimizer"].value
    tracking_params[0] = info["tracking_params"]

    policy_value = run_ddpg.eval_policy(callback_rngs[episode], info["optimizer"].value[0])

    train_reward_per_episode.append(info["reward"])
    policy_value_per_episode.append(policy_value)
    elapsed_per_episode.append(info["elapsed"])
示例#2
0
  def callback(info):
    episode = info['episode']
    reward = info['reward']

    current_actor_params = info["optimizer"].value[0]
    policy_value = run_ddpg.eval_policy(callback_rngs[episode],
                                        current_actor_params)

    print(f"Episode {episode}, "
          f"episode_length = {info['episode_length']}, "
          f"reward = {reward}, "
          f"policy_value = {policy_value}, "
          f"elapsed = {info['elapsed']}")

    train_reward_per_episode.append(reward)
    policy_value_per_episode.append(policy_value)
    episode_lengths.append(info["episode_length"])