def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)

        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean = tf.get_variable('mean',
                                         initializer=np.zeros(
                                             shape, 'float64'),
                                         dtype=tf.float64)
            self._var = tf.get_variable('std',
                                        initializer=np.ones(shape, 'float64'),
                                        dtype=tf.float64)
            self._count = tf.get_variable('count',
                                          initializer=np.full((), epsilon,
                                                              'float64'),
                                          dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var,
                                           self._count]))
        self.sess = sess
        self._set_mean_var_count()
Exemplo n.º 2
0
def dense(x, size, name, weight_init=None, bias_init=0, weight_loss_dict=None, reuse=None):
    with tf.variable_scope(name, reuse=reuse):
        assert (len(tf.get_variable_scope().name.split('/')) == 2)

        w = tf.get_variable("w", [x.get_shape()[1], size], initializer=weight_init)
        b = tf.get_variable("b", [size], initializer=tf.constant_initializer(bias_init))
        weight_decay_fc = 3e-4

        if weight_loss_dict is not None:
            weight_decay = tf.multiply(tf.nn.l2_loss(w), weight_decay_fc, name='weight_decay_loss')
            if weight_loss_dict is not None:
                weight_loss_dict[w] = weight_decay_fc
                weight_loss_dict[b] = 0.0

            tf.add_to_collection(tf.get_variable_scope().name.split('/')[0] + '_' + 'losses', weight_decay)

        return tf.nn.bias_add(tf.matmul(x, w), b)
Exemplo n.º 3
0
 def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
     mean = fc(latent_vector,
               'pi',
               self.size,
               init_scale=init_scale,
               init_bias=init_bias)
     logstd = tf.get_variable(name='pi/logstd',
                              shape=[1, self.size],
                              initializer=tf.zeros_initializer())
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     return self.pdfromflat(pdparam), mean
Exemplo n.º 4
0
def init_weights(shape, name=None):
    return tf.get_variable(name, initializer=tf.random_normal(shape, stddev=0.01))
Exemplo n.º 5
0
def init_bias(shape, name=None):
    return tf.get_variable(name, initializer=tf.zeros(shape, dtype='float'))
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0,
                               5.0)
        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                dense(last_out,
                      hid_size,
                      "vffc%i" % (i + 1),
                      weight_init=U.normc_initializer(1.0)))
        self.vpred = dense(last_out,
                           1,
                           "vffinal",
                           weight_init=U.normc_initializer(1.0))[:, 0]

        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                dense(last_out,
                      hid_size,
                      "polfc%i" % (i + 1),
                      weight_init=U.normc_initializer(1.0)))

        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = dense(last_out,
                         pdtype.param_shape()[0] // 2, "polfinal",
                         U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer())
            pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = dense(last_out,
                            pdtype.param_shape()[0], "polfinal",
                            U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        # change for BC
        stochastic = U.get_placeholder(name="stochastic",
                                       dtype=tf.bool,
                                       shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self.ac = ac
        self._act = U.function([stochastic, ob], [ac, self.vpred])
    def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
        """A normalizer that ensures that observations are approximately distributed according to
        a standard Normal distribution (i.e. have mean zero and variance one).

        Args:
            size (int): the size of the observation to be normalized
            eps (float): a small constant that avoids underflows
            default_clip_range (float): normalized observations are clipped to be in
                [-default_clip_range, default_clip_range]
            sess (object): the TensorFlow session to be used
        """
        self.size = size
        self.eps = eps
        self.default_clip_range = default_clip_range
        self.sess = sess if sess is not None else tf.get_default_session()

        self.local_sum = np.zeros(self.size, np.float32)
        self.local_sumsq = np.zeros(self.size, np.float32)
        self.local_count = np.zeros(1, np.float32)

        self.sum_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                      shape=self.local_sum.shape,
                                      name='sum',
                                      trainable=False,
                                      dtype=tf.float32)
        self.sumsq_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                        shape=self.local_sumsq.shape,
                                        name='sumsq',
                                        trainable=False,
                                        dtype=tf.float32)
        self.count_tf = tf.get_variable(initializer=tf.ones_initializer(),
                                        shape=self.local_count.shape,
                                        name='count',
                                        trainable=False,
                                        dtype=tf.float32)
        self.mean = tf.get_variable(initializer=tf.zeros_initializer(),
                                    shape=(self.size, ),
                                    name='mean',
                                    trainable=False,
                                    dtype=tf.float32)
        self.std = tf.get_variable(initializer=tf.ones_initializer(),
                                   shape=(self.size, ),
                                   name='std',
                                   trainable=False,
                                   dtype=tf.float32)
        self.count_pl = tf.placeholder(name='count_pl',
                                       shape=(1, ),
                                       dtype=tf.float32)
        self.sum_pl = tf.placeholder(name='sum_pl',
                                     shape=(self.size, ),
                                     dtype=tf.float32)
        self.sumsq_pl = tf.placeholder(name='sumsq_pl',
                                       shape=(self.size, ),
                                       dtype=tf.float32)

        self.update_op = tf.group(self.count_tf.assign_add(self.count_pl),
                                  self.sum_tf.assign_add(self.sum_pl),
                                  self.sumsq_tf.assign_add(self.sumsq_pl))
        self.recompute_op = tf.group(
            tf.assign(self.mean, self.sum_tf / self.count_tf),
            tf.assign(
                self.std,
                tf.sqrt(
                    tf.maximum(
                        tf.square(self.eps), self.sumsq_tf / self.count_tf -
                        tf.square(self.sum_tf / self.count_tf)))),
        )
        self.lock = threading.Lock()