Python make_random_trajectory示例

编程语言: Python

命名空间/包名称: tf_agents.drivers.test_utils

方法/功能: make_random_trajectory

hotexamples.com的示例: 4

Python make_random_trajectory - 已找到4个示例。这些是从开源项目中提取的最受好评的tf_agents.drivers.test_utils.make_random_trajectory现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： behavioral_cloning_agent_test.py 项目： w121211/agents

  def testTrainWithRNN(self):
    # Emits trajectories shaped (batch=1, time=6, ...)
    traj, time_step_spec, action_spec = (
        driver_test_utils.make_random_trajectory())
    cloning_net = q_rnn_network.QRnnNetwork(
        time_step_spec.observation, action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec,
        action_spec,
        cloning_network=cloning_net,
        optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01),
        num_outer_dims=2)
    # Disable clipping to make sure we can see the difference in behavior
    agent.policy._clip = False
    # Remove policy_info, as BehavioralCloningAgent expects none.
    traj = traj.replace(policy_info=())
    # TODO(b/123883319)
    if tf.executing_eagerly():
      train_and_loss = lambda: agent.train(traj)
    else:
      train_and_loss = agent.train(traj)
    replay = trajectory_replay.TrajectoryReplay(agent.policy)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    initial_actions = self.evaluate(replay.run(traj)[0])

    for _ in range(TRAIN_ITERATIONS):
      self.evaluate(train_and_loss)
    post_training_actions = self.evaluate(replay.run(traj)[0])
    # We don't necessarily converge to the same actions as in trajectory after
    # 10 steps of an untuned optimizer, but the policy does change.
    self.assertFalse(np.all(initial_actions == post_training_actions))

示例#2

显示文件

 def testTrain(self):
     # Emits trajectories shaped (batch=1, time=6, ...)
     traj, time_step_spec, action_spec = (
         driver_test_utils.make_random_trajectory())
     # Convert to shapes (batch=6, 1, ...) so this works with a non-RNN model.
     traj = nest.map_structure(tf.contrib.rnn.transpose_batch_time, traj)
     cloning_net = q_network.QNetwork(time_step_spec.observation,
                                      action_spec)
     agent = behavioral_cloning_agent.BehavioralCloningAgent(
         time_step_spec,
         action_spec,
         cloning_network=cloning_net,
         optimizer=tf.train.AdamOptimizer(learning_rate=0.01))
     # Remove policy_info, as BehavioralCloningAgent expects none.
     traj = traj.replace(policy_info=())
     train_and_loss = agent.train(traj)
     replay = trajectory_replay.TrajectoryReplay(agent.policy())
     self.evaluate(tf.global_variables_initializer())
     initial_actions = self.evaluate(replay.run(traj)[0])
     for _ in range(TRAIN_ITERATIONS):
         self.evaluate(train_and_loss)
     post_training_actions = self.evaluate(replay.run(traj)[0])
     # We don't necessarily converge to the same actions as in trajectory after
     # 10 steps of an untuned optimizer, but the policy does change.
     self.assertFalse(np.all(initial_actions == post_training_actions))

示例#3

显示文件

文件： behavioral_cloning_agent_test.py 项目： w121211/agents

 def testTrainWithSingleOuterDimension(self):
   # Emits trajectories shaped (batch=1, time=6, ...)
   traj, time_step_spec, action_spec = (
       driver_test_utils.make_random_trajectory())
   # Convert to shapes (batch=6, 1, ...) so this works with a non-RNN model.
   traj = tf.nest.map_structure(common.transpose_batch_time, traj)
   # Remove the time dimension so there is only one outer dimension.
   traj = tf.nest.map_structure(lambda x: tf.squeeze(x, axis=1), traj)
   cloning_net = q_network.QNetwork(
       time_step_spec.observation, action_spec)
   agent = behavioral_cloning_agent.BehavioralCloningAgent(
       time_step_spec,
       action_spec,
       cloning_network=cloning_net,
       optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01))
   # Disable clipping to make sure we can see the difference in behavior
   agent.policy._clip = False
   # Remove policy_info, as BehavioralCloningAgent expects none.
   traj = traj.replace(policy_info=())
   # TODO(b/123883319)
   if tf.executing_eagerly():
     train_and_loss = lambda: agent.train(traj)
   else:
     train_and_loss = agent.train(traj)
   self.evaluate(tf.compat.v1.global_variables_initializer())
   for _ in range(TRAIN_ITERATIONS):
     self.evaluate(train_and_loss)

示例#4

显示文件

    def testReplayBufferObserversWithInitialState(self):
        traj, time_step_spec, action_spec = (
            driver_test_utils.make_random_trajectory())
        policy = driver_test_utils.TFPolicyMock(time_step_spec, action_spec)
        policy_state = policy.get_initial_state(1)
        replay = trajectory_replay.TrajectoryReplay(policy)
        output_actions, output_policy_info, _ = replay.run(
            traj, policy_state=policy_state)
        new_traj = traj._replace(action=output_actions,
                                 policy_info=output_policy_info)
        repeat_output_actions, repeat_output_policy_info, _ = replay.run(
            new_traj, policy_state=policy_state)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        (output_actions, output_policy_info, traj, repeat_output_actions,
         repeat_output_policy_info) = self.evaluate(
             (output_actions, output_policy_info, traj, repeat_output_actions,
              repeat_output_policy_info))

        # Ensure output actions & policy info don't match original trajectory.
        self._compare_to_original(output_actions, output_policy_info, traj)

        # Ensure repeated run with the same deterministic policy recreates the same
        # actions & policy info.
        tf.nest.map_structure(self.assertAllEqual, output_actions,
                              repeat_output_actions)
        tf.nest.map_structure(self.assertAllEqual, output_policy_info,
                              repeat_output_policy_info)