Exemplo n.º 1
0
  def test_update(self):
    # Create two instances of the same model.
    actor_model = snt.nets.MLP([50, 30])
    learner_model = snt.nets.MLP([50, 30])

    # Create variables first.
    input_spec = tf.TensorSpec(shape=(28,), dtype=tf.float32)
    tf2_utils.create_variables(actor_model, [input_spec])
    tf2_utils.create_variables(learner_model, [input_spec])

    # Register them as client and source variables, respectively.
    actor_variables = actor_model.variables
    np_learner_variables = [
        tf2_utils.to_numpy(v) for v in learner_model.variables
    ]
    variable_source = fakes.VariableSource(np_learner_variables)
    variable_client = tf2_variable_utils.VariableClient(
        variable_source, {'policy': actor_variables})

    # Now, given some random batch of test input:
    x = tf.random.normal(shape=(8, 28))

    # Before copying variables, the models have different outputs.
    actor_output = actor_model(x).numpy()
    learner_output = learner_model(x).numpy()
    self.assertFalse(np.allclose(actor_output, learner_output))

    # Update the variable client.
    variable_client.update_and_wait()

    # After copying variables (by updating the client), the models are the same.
    actor_output = actor_model(x).numpy()
    learner_output = learner_model(x).numpy()
    self.assertTrue(np.allclose(actor_output, learner_output))
Exemplo n.º 2
0
 def broadcast_variables(weights):
     if weights is None:
         weights = [
             tf2_utils.to_numpy(v)
             for v in learner.learner._policy_network.variables
         ]
     learner.client.insert(weights, {args.broadcaster_table_name: 1.0})
Exemplo n.º 3
0
    def observe(
        self,
        action: types.NestedArray,
        next_timestep: dm_env.TimeStep,
    ):
        if self._queue is None:
            return

        extras = {'logits': self._prev_logits, 'core_state': self._prev_state}
        extras = tf2_utils.to_numpy(extras)
        self._queue.add(action, next_timestep, extras)
Exemplo n.º 4
0
  def observe(
      self,
      action: types.NestedArray,
      next_timestep: dm_env.TimeStep,
  ):
    assert next_timestep.step_type.shape[0] == 1, \
        'Currently only supports single worker.'

    # Reset hidden state every new episode.
    if next_timestep.first():
      self._state = None

    if self._queue is None:
      return

    extras = {'logits': self._prev_logits, 'core_state': self._prev_state}
    extras = tf2_utils.to_numpy(extras)
    self._queue.add(action, next_timestep, extras)
Exemplo n.º 5
0
    def get_variables(
            self, names: Sequence[str]) -> Dict[str, Dict[str, np.ndarray]]:
        """get network variables

        Args:
            names (Sequence[str]): network names

        Returns:
            Dict[str, Dict[str, np.ndarray]]: network variables
        """

        variables: Dict[str, Dict[str, np.ndarray]] = {}
        for network_type in names:
            variables[network_type] = {
                agent: tf2_utils.to_numpy(
                    self._system_network_variables[network_type][agent])
                for agent in self.unique_net_keys
            }
        return variables
Exemplo n.º 6
0
  def test_update_and_wait(self):
    # Create a variable source (emulating the learner).
    np_learner_variables = tf2_utils.to_numpy(self._learner_model.variables)
    variable_source = fakes.VariableSource(np_learner_variables)

    # Create a variable client (emulating the actor).
    variable_client = tf2_variable_utils.VariableClient(
        variable_source, {'policy': self._actor_model.variables})

    # Create some random batch of test input:
    x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE))

    # Before copying variables, the models have different outputs.
    self.assertNotAllClose(self._actor_model(x), self._learner_model(x))

    # Update the variable client.
    variable_client.update_and_wait()

    # After copying variables (by updating the client), the models are the same.
    self.assertAllClose(self._actor_model(x), self._learner_model(x))
Exemplo n.º 7
0
    def get_variables(
            self, names: Sequence[str]) -> Dict[str, Dict[str, np.ndarray]]:
        """get network variables

        Args:
            names (Sequence[str]): network names

        Returns:
            Dict[str, Dict[str, np.ndarray]]: network variables
        """

        variables: Dict[str, Dict[str, np.ndarray]] = {}
        variables = {}
        for network_type in names:
            if network_type == "mixing":
                # Includes the hypernet variables
                variables[network_type] = self._mixing_network.variables
            else:  # Collect variables for each agent network
                variables[network_type] = {
                    key: tf2_utils.to_numpy(
                        self._system_network_variables[network_type][key])
                    for key in self.unique_net_keys
                }
        return variables
Exemplo n.º 8
0
 def get_variables(self, names: List[str]) -> List[np.ndarray]:
     return tf2_utils.to_numpy(self._variables)
Exemplo n.º 9
0
 def get_variables(self, names: List[str]) -> List[List[np.ndarray]]:
     return [tf2_utils.to_numpy(self._variables[name]) for name in names]
Exemplo n.º 10
0
 def add(self, variables):
   np_variables = [
     tf2_utils.to_numpy(v) for v in variables
   ]
   self._variable_client[0].insert(np_variables,
                                priorities={self._variable_server_name: 1.0})
Exemplo n.º 11
0
    def test_update(self):
        # Create a barrier to be shared between the test body and the variable
        # source. The barrier will block until, in this case, two threads call
        # wait(). Note that the (fake) variable source will call it within its
        # get_variables() call.
        barrier = threading.Barrier(2)

        # Create a variable source (emulating the learner).
        np_learner_variables = tf2_utils.to_numpy(
            self._learner_model.variables)
        variable_source = fakes.VariableSource(np_learner_variables, barrier)

        # Create a variable client (emulating the actor).
        variable_client = tf2_variable_utils.VariableClient(
            variable_source, {'policy': self._actor_model.variables},
            update_period=_UPDATE_PERIOD)

        # Create some random batch of test input:
        x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE))

        # Create variables by doing the computation once.
        learner_output = self._learner_model(x)
        actor_output = self._actor_model(x)
        del learner_output, actor_output

        for _ in range(_UPDATE_PERIOD):
            # Before the update period is reached, the models have different outputs.
            self.assertNotAllClose(self._actor_model.variables,
                                   self._learner_model.variables)

            # Before the update period is reached, the variable client should not make
            # any requests for variables.
            self.assertIsNone(variable_client._future)

            variable_client.update()

        # Make sure the last call created a request for variables and reset the
        # internal call counter.
        self.assertIsNotNone(variable_client._future)
        self.assertEqual(variable_client._call_counter, 0)
        future = variable_client._future

        for _ in range(_UPDATE_PERIOD):
            # Before the barrier allows the variables to be released, the models have
            # different outputs.
            self.assertNotAllClose(self._actor_model.variables,
                                   self._learner_model.variables)

            variable_client.update()

            # Make sure no new requests are made.
            self.assertEqual(variable_client._future, future)

        # Calling wait() on the barrier will now allow the variables to be copied
        # over from source to client.
        barrier.wait()

        # Update once more to ensure the variables are copied over.
        while variable_client._future is not None:
            variable_client.update()

        # After a number of update calls, the variables should be the same.
        self.assertAllClose(self._actor_model.variables,
                            self._learner_model.variables)
Exemplo n.º 12
0
 def get_variables(self, names: List[str]) -> List[List[np.ndarray]]:
   """Exposes the variables for actors to update from."""
   return tf2_utils.to_numpy(self._variables)
Exemplo n.º 13
0
 def get_variables(self, names: List[str]) -> List[Variables]:
   return [tf2_utils.to_numpy(self._variables)]