def test_update(self): # Create two instances of the same model. actor_model = snt.nets.MLP([50, 30]) learner_model = snt.nets.MLP([50, 30]) # Create variables first. input_spec = tf.TensorSpec(shape=(28, ), dtype=tf.float32) tf2_utils.create_variables(actor_model, [input_spec]) tf2_utils.create_variables(learner_model, [input_spec]) # Register them as client and source variables, respectively. actor_variables = actor_model.variables np_learner_variables = [ tf2_utils.to_numpy(v) for v in learner_model.variables ] variable_source = fakes.VariableSource(np_learner_variables) variable_client = tf2_variable_utils.VariableClient( variable_source, {'policy': actor_variables}) # Now, given some random batch of test input: x = tf.random.normal(shape=(8, 28)) # Before copying variables, the models have different outputs. actor_output = actor_model(x).numpy() learner_output = learner_model(x).numpy() self.assertFalse(np.allclose(actor_output, learner_output)) # Update the variable client. variable_client.update_and_wait() # After copying variables (by updating the client), the models are the same. actor_output = actor_model(x).numpy() learner_output = learner_model(x).numpy() self.assertTrue(np.allclose(actor_output, learner_output))
def get_variables(self, names: List[str]) -> List[List[np.ndarray]]: return [tf2_utils.to_numpy(self._variables[name]) for name in names]
def get_variables(self, names: List[str]) -> List[List[np.ndarray]]: """Exposes the variables for actors to update from.""" return tf2_utils.to_numpy(self._variables)
def get_variables(self, names: List[str]) -> List[Variables]: return [tf2_utils.to_numpy(self._variables)]
def get_variables(self, names: List[str]) -> List[np.ndarray]: return tf2_utils.to_numpy(self._variables)