def _setup_normalizer(self, beta, size): """Sets up the input normalizer and a distribution strategy to run.""" normalizer = running_statistics.EMAMeanStd(beta=beta) strategy = test_utils.create_distribution_strategy( use_tpu=self.primary_device == 'TPU') self.assertEqual(strategy.num_replicas_in_sync, 2) with strategy.scope(): normalizer.init(size) return normalizer, strategy
def _setup(self, beta): """Sets up the reward normalizer and a distribution strategy to run.""" reward_normalizer = popart.PopArt(running_statistics.EMAMeanStd(beta)) strategy = test_utils.create_distribution_strategy( use_tpu=self.primary_device == 'TPU') self.assertEqual(strategy.num_replicas_in_sync, 2) with strategy.scope(): reward_normalizer.init() return reward_normalizer, strategy
def _setup_agent(self, kwargs): """Sets up the agent and a distribution strategy to run.""" agent = continuous_control_agent.ContinuousControlAgent( parametric_distribution.normal_tanh_distribution(20), **kwargs) strategy = test_utils.create_distribution_strategy( use_tpu=self.primary_device == 'TPU') self.assertEqual(strategy.num_replicas_in_sync, 2) with strategy.scope(): wrapped_f = tf.function(agent.__call__) wrapped_f(*_dummy_input(False), unroll=False, is_training=True) return agent, strategy
def _setup_normalizer(self, size, std_min_value=None): """Sets up the input normalizer and a distribution strategy to run.""" args = {} if std_min_value is not None: args.update(std_min_value=std_min_value) normalizer = running_statistics.AverageMeanStd(**args) strategy = test_utils.create_distribution_strategy( use_tpu=self.primary_device == 'TPU') self.assertEqual(strategy.num_replicas_in_sync, 2) with strategy.scope(): normalizer.init(size) return normalizer, strategy