def test_update(self): # Create two instances of the same model. actor_model = snt.nets.MLP([50, 30]) learner_model = snt.nets.MLP([50, 30]) # Create variables first. input_spec = tf.TensorSpec(shape=(28,), dtype=tf.float32) tf2_utils.create_variables(actor_model, [input_spec]) tf2_utils.create_variables(learner_model, [input_spec]) # Register them as client and source variables, respectively. actor_variables = actor_model.variables np_learner_variables = [ tf2_utils.to_numpy(v) for v in learner_model.variables ] variable_source = fakes.VariableSource(np_learner_variables) variable_client = tf2_variable_utils.VariableClient( variable_source, {'policy': actor_variables}) # Now, given some random batch of test input: x = tf.random.normal(shape=(8, 28)) # Before copying variables, the models have different outputs. actor_output = actor_model(x).numpy() learner_output = learner_model(x).numpy() self.assertFalse(np.allclose(actor_output, learner_output)) # Update the variable client. variable_client.update_and_wait() # After copying variables (by updating the client), the models are the same. actor_output = actor_model(x).numpy() learner_output = learner_model(x).numpy() self.assertTrue(np.allclose(actor_output, learner_output))
def broadcast_variables(weights): if weights is None: weights = [ tf2_utils.to_numpy(v) for v in learner.learner._policy_network.variables ] learner.client.insert(weights, {args.broadcaster_table_name: 1.0})
def observe( self, action: types.NestedArray, next_timestep: dm_env.TimeStep, ): if self._queue is None: return extras = {'logits': self._prev_logits, 'core_state': self._prev_state} extras = tf2_utils.to_numpy(extras) self._queue.add(action, next_timestep, extras)
def observe( self, action: types.NestedArray, next_timestep: dm_env.TimeStep, ): assert next_timestep.step_type.shape[0] == 1, \ 'Currently only supports single worker.' # Reset hidden state every new episode. if next_timestep.first(): self._state = None if self._queue is None: return extras = {'logits': self._prev_logits, 'core_state': self._prev_state} extras = tf2_utils.to_numpy(extras) self._queue.add(action, next_timestep, extras)
def get_variables( self, names: Sequence[str]) -> Dict[str, Dict[str, np.ndarray]]: """get network variables Args: names (Sequence[str]): network names Returns: Dict[str, Dict[str, np.ndarray]]: network variables """ variables: Dict[str, Dict[str, np.ndarray]] = {} for network_type in names: variables[network_type] = { agent: tf2_utils.to_numpy( self._system_network_variables[network_type][agent]) for agent in self.unique_net_keys } return variables
def test_update_and_wait(self): # Create a variable source (emulating the learner). np_learner_variables = tf2_utils.to_numpy(self._learner_model.variables) variable_source = fakes.VariableSource(np_learner_variables) # Create a variable client (emulating the actor). variable_client = tf2_variable_utils.VariableClient( variable_source, {'policy': self._actor_model.variables}) # Create some random batch of test input: x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE)) # Before copying variables, the models have different outputs. self.assertNotAllClose(self._actor_model(x), self._learner_model(x)) # Update the variable client. variable_client.update_and_wait() # After copying variables (by updating the client), the models are the same. self.assertAllClose(self._actor_model(x), self._learner_model(x))
def get_variables( self, names: Sequence[str]) -> Dict[str, Dict[str, np.ndarray]]: """get network variables Args: names (Sequence[str]): network names Returns: Dict[str, Dict[str, np.ndarray]]: network variables """ variables: Dict[str, Dict[str, np.ndarray]] = {} variables = {} for network_type in names: if network_type == "mixing": # Includes the hypernet variables variables[network_type] = self._mixing_network.variables else: # Collect variables for each agent network variables[network_type] = { key: tf2_utils.to_numpy( self._system_network_variables[network_type][key]) for key in self.unique_net_keys } return variables
def get_variables(self, names: List[str]) -> List[np.ndarray]: return tf2_utils.to_numpy(self._variables)
def get_variables(self, names: List[str]) -> List[List[np.ndarray]]: return [tf2_utils.to_numpy(self._variables[name]) for name in names]
def add(self, variables): np_variables = [ tf2_utils.to_numpy(v) for v in variables ] self._variable_client[0].insert(np_variables, priorities={self._variable_server_name: 1.0})
def test_update(self): # Create a barrier to be shared between the test body and the variable # source. The barrier will block until, in this case, two threads call # wait(). Note that the (fake) variable source will call it within its # get_variables() call. barrier = threading.Barrier(2) # Create a variable source (emulating the learner). np_learner_variables = tf2_utils.to_numpy( self._learner_model.variables) variable_source = fakes.VariableSource(np_learner_variables, barrier) # Create a variable client (emulating the actor). variable_client = tf2_variable_utils.VariableClient( variable_source, {'policy': self._actor_model.variables}, update_period=_UPDATE_PERIOD) # Create some random batch of test input: x = tf.random.normal(shape=(_BATCH_SIZE, _INPUT_SIZE)) # Create variables by doing the computation once. learner_output = self._learner_model(x) actor_output = self._actor_model(x) del learner_output, actor_output for _ in range(_UPDATE_PERIOD): # Before the update period is reached, the models have different outputs. self.assertNotAllClose(self._actor_model.variables, self._learner_model.variables) # Before the update period is reached, the variable client should not make # any requests for variables. self.assertIsNone(variable_client._future) variable_client.update() # Make sure the last call created a request for variables and reset the # internal call counter. self.assertIsNotNone(variable_client._future) self.assertEqual(variable_client._call_counter, 0) future = variable_client._future for _ in range(_UPDATE_PERIOD): # Before the barrier allows the variables to be released, the models have # different outputs. self.assertNotAllClose(self._actor_model.variables, self._learner_model.variables) variable_client.update() # Make sure no new requests are made. self.assertEqual(variable_client._future, future) # Calling wait() on the barrier will now allow the variables to be copied # over from source to client. barrier.wait() # Update once more to ensure the variables are copied over. while variable_client._future is not None: variable_client.update() # After a number of update calls, the variables should be the same. self.assertAllClose(self._actor_model.variables, self._learner_model.variables)
def get_variables(self, names: List[str]) -> List[List[np.ndarray]]: """Exposes the variables for actors to update from.""" return tf2_utils.to_numpy(self._variables)
def get_variables(self, names: List[str]) -> List[Variables]: return [tf2_utils.to_numpy(self._variables)]