def _setup_normalizer(self, beta, size):
     """Sets up the input normalizer and a distribution strategy to run."""
     normalizer = running_statistics.EMAMeanStd(beta=beta)
     strategy = test_utils.create_distribution_strategy(
         use_tpu=self.primary_device == 'TPU')
     self.assertEqual(strategy.num_replicas_in_sync, 2)
     with strategy.scope():
         normalizer.init(size)
     return normalizer, strategy
示例#2
0
 def _setup(self, beta):
     """Sets up the reward normalizer and a distribution strategy to run."""
     reward_normalizer = popart.PopArt(running_statistics.EMAMeanStd(beta))
     strategy = test_utils.create_distribution_strategy(
         use_tpu=self.primary_device == 'TPU')
     self.assertEqual(strategy.num_replicas_in_sync, 2)
     with strategy.scope():
         reward_normalizer.init()
     return reward_normalizer, strategy
 def _setup_agent(self, kwargs):
     """Sets up the agent and a distribution strategy to run."""
     agent = continuous_control_agent.ContinuousControlAgent(
         parametric_distribution.normal_tanh_distribution(20), **kwargs)
     strategy = test_utils.create_distribution_strategy(
         use_tpu=self.primary_device == 'TPU')
     self.assertEqual(strategy.num_replicas_in_sync, 2)
     with strategy.scope():
         wrapped_f = tf.function(agent.__call__)
         wrapped_f(*_dummy_input(False), unroll=False, is_training=True)
     return agent, strategy
 def _setup_normalizer(self, size, std_min_value=None):
     """Sets up the input normalizer and a distribution strategy to run."""
     args = {}
     if std_min_value is not None:
         args.update(std_min_value=std_min_value)
     normalizer = running_statistics.AverageMeanStd(**args)
     strategy = test_utils.create_distribution_strategy(
         use_tpu=self.primary_device == 'TPU')
     self.assertEqual(strategy.num_replicas_in_sync, 2)
     with strategy.scope():
         normalizer.init(size)
     return normalizer, strategy