def create_input_placeholders(self): with self.graph.as_default(): ( self.global_step, self.increment_step_op, self.steps_to_increment, ) = ModelUtils.create_global_steps() self.visual_in = ModelUtils.create_visual_input_placeholders( self.brain.camera_resolutions ) self.vector_in = ModelUtils.create_vector_input(self.vec_obs_size) if self.normalize: normalization_tensors = ModelUtils.create_normalizer(self.vector_in) self.update_normalization_op = normalization_tensors.update_op self.normalization_steps = normalization_tensors.steps self.running_mean = normalization_tensors.running_mean self.running_variance = normalization_tensors.running_variance self.processed_vector_in = ModelUtils.normalize_vector_obs( self.vector_in, self.running_mean, self.running_variance, self.normalization_steps, ) else: self.processed_vector_in = self.vector_in self.update_normalization_op = None self.batch_size_ph = tf.placeholder( shape=None, dtype=tf.int32, name="batch_size" ) self.sequence_length_ph = tf.placeholder( shape=None, dtype=tf.int32, name="sequence_length" ) self.mask_input = tf.placeholder( shape=[None], dtype=tf.float32, name="masks" ) # Only needed for PPO, but needed for BC module self.epsilon = tf.placeholder( shape=[None, self.act_size[0]], dtype=tf.float32, name="epsilon" ) self.mask = tf.cast(self.mask_input, tf.int32) tf.Variable( int(self.brain.vector_action_space_type == "continuous"), name="is_continuous_control", trainable=False, dtype=tf.int32, ) tf.Variable( self._version_number_, name="version_number", trainable=False, dtype=tf.int32, ) tf.Variable( self.m_size, name="memory_size", trainable=False, dtype=tf.int32 ) if self.brain.vector_action_space_type == "continuous": tf.Variable( self.act_size[0], name="action_output_shape", trainable=False, dtype=tf.int32, ) else: tf.Variable( sum(self.act_size), name="action_output_shape", trainable=False, dtype=tf.int32, )
def __init__( self, policy, m_size=None, h_size=128, normalize=False, use_recurrent=False, num_layers=2, stream_names=None, vis_encode_type=EncoderType.SIMPLE, ): super().__init__( policy, m_size, h_size, normalize, use_recurrent, num_layers, stream_names, vis_encode_type, ) with tf.variable_scope(TARGET_SCOPE): self.visual_in = ModelUtils.create_visual_input_placeholders( policy.brain.camera_resolutions ) self.vector_in = ModelUtils.create_vector_input(policy.vec_obs_size) if self.policy.normalize: normalization_tensors = ModelUtils.create_normalizer(self.vector_in) self.update_normalization_op = normalization_tensors.update_op self.normalization_steps = normalization_tensors.steps self.running_mean = normalization_tensors.running_mean self.running_variance = normalization_tensors.running_variance self.processed_vector_in = ModelUtils.normalize_vector_obs( self.vector_in, self.running_mean, self.running_variance, self.normalization_steps, ) else: self.processed_vector_in = self.vector_in self.update_normalization_op = None if self.policy.use_recurrent: self.memory_in = tf.placeholder( shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in" ) self.value_memory_in = self.memory_in hidden_streams = ModelUtils.create_observation_streams( self.visual_in, self.processed_vector_in, 1, self.h_size, 0, vis_encode_type=vis_encode_type, stream_scopes=["critic/value/"], ) if self.policy.use_continuous_act: self._create_cc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False) else: self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False) if self.use_recurrent: self.memory_out = tf.concat( self.value_memory_out, axis=1 ) # Needed for Barracuda to work