Exemplo n.º 1
0
def CnnMlpFeatureExtractor(obs, image_height=291, image_width=150):
    image_size = image_height * image_width
    assert len(obs.shape) == 2 and obs.shape[1] > image_size
    feature_num = obs.shape[1] - image_size
    grid_map_flat = tf.slice(obs, [0, 0], [1, image_size])
    grid_map = tf.reshape(grid_map_flat, [1, image_height, image_width, 1])
    extracted_features = nature_cnn(grid_map)
    extracted_features = tf.layers.flatten(extracted_features)
    # extracted_features = tf.Print(extracted_features,[tf.shape(extracted_features)], 'extracted features shape : ')
    features = tf.slice(obs, [0, image_size], [1, feature_num])
    # features = tf.Print(features, [tf.shape(features)], "features shape: " )
    return tf.concat([extracted_features, features], 1)
Exemplo n.º 2
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 **kwargs):
        super(CnnMlpPolicy, self).__init__(sess,
                                           ob_space,
                                           ac_space,
                                           n_env,
                                           n_steps,
                                           n_batch,
                                           reuse=reuse,
                                           scale=True)

        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.relu

            feature_num = 7
            h = 291
            w = 150
            proc_obs_float = tf.cast(self.processed_obs, dtype=tf.float32)
            grid_map_flat = tf.slice(proc_obs_float, [0, 0], [1, 43650])
            grid_map = tf.reshape(grid_map_flat, [1, h, w, 1])
            # kwargs['data_format']='NCHW'
            extracted_features = nature_cnn(grid_map, **kwargs)
            extracted_features = tf.layers.flatten(extracted_features)
            features = tf.slice(proc_obs_float, [0, 43650], [1, feature_num])
            pi_h = tf.concat([extracted_features, features], 1)
            for i, layer_size in enumerate([128, 128, 128]):
                pi_h = activ(
                    tf.layers.dense(pi_h, layer_size, name='pi_fc' + str(i)))
            pi_latent = pi_h

            vf_h = tf.concat([extracted_features, features], 1)
            for i, layer_size in enumerate([32, 32]):
                vf_h = activ(
                    tf.layers.dense(vf_h, layer_size, name='vf_fc' + str(i)))
            value_fn = tf.layers.dense(vf_h, 1, name='vf')
            vf_latent = vf_h

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._value_fn = value_fn
        self._setup_init()
Exemplo n.º 3
0
def image_and_pose_network(observation, **kwargs):
    """ Network to process image and pose data.
    
    Use the stable baselines nature_cnn to process images. The resulting
    feature vector is then combined with the pose estimate and given to an
    LSTM (LSTM defined in PPO2 below).
    
    Args:
        raw_observations (tf.Tensor): 1D tensor containing image and 
            pose data.
        
    Returns:
        tf.Tensor: Feature vector. 
    """
    imgs, pose = decode_tensor_observations(observation)
    image_features = nature_cnn(imgs)
    return tf.concat((image_features, pose), axis=-1)
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 **kwargs):
        super(CommunicationPolicy, self).__init__(sess,
                                                  ob_space,
                                                  ac_space,
                                                  n_env,
                                                  n_steps,
                                                  n_batch,
                                                  reuse=reuse,
                                                  scale=True)

        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.relu

            extracted_features = nature_cnn(self.processed_obs, **kwargs)
            extracted_features = tf.layers.flatten(extracted_features)

            pi_h = extracted_features
            for i, layer_size in enumerate([128, 128, 128]):
                pi_h = activ(
                    tf.layers.dense(pi_h, layer_size, name='pi_fc' + str(i)))
            pi_latent = pi_h

            vf_h = extracted_features
            for i, layer_size in enumerate([32, 32]):
                vf_h = activ(
                    tf.layers.dense(vf_h, layer_size, name='vf_fc' + str(i)))
            value_fn = tf.layers.dense(vf_h, 1, name='vf')
            vf_latent = vf_h

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._value_fn = value_fn
        self._setup_init()