def _infer(self, **kwargs): feed_dict = { self.step_obs_ph: kwargs['obs_t'], } sess = tf.get_default_session() ops = [self.action, self.log_policy, self.value] return ActionOutput(*sess.run(ops, feed_dict=feed_dict))
def _infer(self, **kwargs): sess = tf.get_default_session() feed_dict = { self.obs_t_ph: np.array([kwargs['obs_t']]) } ops = [self.action, self.log_prob, self.value] return ActionOutput(*sess.run(ops, feed_dict=feed_dict))
def _infer(self, **kwargs): feed_dict = { self.obs_t_ph: np.array([kwargs['obs_t']]) } sess = tf.get_default_session() ops = [self.action, self.value] action, value = sess.run(ops, feed_dict=feed_dict) return ActionOutput(action=action[0], log_prob=None, value=value[0])
def make_output(num_actions=4, batch_size=1, batch=False): if batch: action = np.random.random((batch_size, num_actions)) log_prob = np.random.random((batch_size,)) value = np.random.random((batch_size,)) else: action = np.random.random((num_actions,)) log_prob = np.random.random() value = np.random.random() return ActionOutput(action, log_prob, value)
def test_properties(self): output = ActionOutput('action', 'log_prob', 'value') self.assertEqual(output.action, 'action') self.assertEqual(output.value, 'value') self.assertEqual(output.log_prob, 'log_prob')