Exemplo n.º 1
0
def model(img_in, num_actions, scope, noisy=False, reuse=False):
    """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.flatten(out)
            out = layers.fully_connected(out,
                                         num_outputs=16,
                                         activation_fn=tf.nn.relu)

        with tf.variable_scope("action_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                out = noisy_dense(out,
                                  name='noisy_fc1',
                                  size=16,
                                  activation_fn=tf.nn.relu)
                out = noisy_dense(out, name='noisy_fc2', size=num_actions)
            else:
                out = layers.fully_connected(out,
                                             num_outputs=16,
                                             activation_fn=tf.nn.relu)
                out = layers.fully_connected(out,
                                             num_outputs=num_actions,
                                             activation_fn=None)

        return out
Exemplo n.º 2
0
    def _init(self, ob_space, ac_space, hid_size, num_hid_layers, noisy_nets=False, gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
        last_out = obz

        for i in range(num_hid_layers):
            last_out = tf.nn.selu(U.dense(last_out, hid_size, "vffc%i"%(i + 1), weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:,0]
        
        last_out = obz
        for i in range(num_hid_layers):
            if noisy_nets:
                last_out = tf.nn.selu(U.noisy_dense(last_out, hid_size, "noisy_polfc%i"%(i + 1), weight_init=U.normc_initializer(1.0)))
            else:
                last_out = tf.nn.selu(U.dense(last_out, hid_size, "polfc%i"%(i + 1), weight_init=U.normc_initializer(1.0)))

        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            assert(noisy_nets is False)
            mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            if noisy_nets:
                pdparam = U.noisy_dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01))
            else:
                pdparam = U.dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01))

        self.pdparam = pdparam
        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])

        self._vpred_pdparam = U.function([ob], [self.vpred, self.pdparam])
        self.ob = ob
Exemplo n.º 3
0
def _mlp(hiddens,
         inpt,
         num_actions,
         scope,
         reuse=False,
         noisy=False,
         bootstrap=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out,
                                         num_outputs=hidden,
                                         activation_fn=tf.nn.relu)

        if bootstrap:
            out_list = []
            with tf.variable_scope("heads"):
                for _ in range(10):
                    scope_net = "action_value_head_" + str(_)
                    with tf.variable_scope(scope_net):
                        out_temp = out
                        out_temp = layers.fully_connected(
                            out_temp,
                            num_outputs=num_actions,
                            activation_fn=None)
                    out_list.append(out_temp)
            q_out = out_list
        else:

            if noisy:
                q_out = noisy_dense(out, name='noisy_out', size=num_actions)
            else:
                q_out = layers.fully_connected(out,
                                               num_outputs=num_actions,
                                               activation_fn=None)
        return q_out
Exemplo n.º 4
0
def dueling_model(img_in, num_actions, scope, noisy=False, reuse=False):
    """As described in https://arxiv.org/abs/1511.06581"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out,
                                       num_outputs=32,
                                       kernel_size=8,
                                       stride=4,
                                       activation_fn=tf.nn.relu)
            out = layers.convolution2d(out,
                                       num_outputs=64,
                                       kernel_size=4,
                                       stride=2,
                                       activation_fn=tf.nn.relu)
            out = layers.convolution2d(out,
                                       num_outputs=64,
                                       kernel_size=3,
                                       stride=1,
                                       activation_fn=tf.nn.relu)
        out = layers.flatten(out)

        with tf.variable_scope("state_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                state_hidden = noisy_dense(out,
                                           name='noisy_fc1',
                                           size=512,
                                           activation_fn=tf.nn.relu)
                state_score = noisy_dense(state_hidden,
                                          name='noisy_fc2',
                                          size=1)
            else:
                state_hidden = layers.fully_connected(out,
                                                      num_outputs=512,
                                                      activation_fn=tf.nn.relu)
                state_score = layers.fully_connected(state_hidden,
                                                     num_outputs=1,
                                                     activation_fn=None)
        with tf.variable_scope("action_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                actions_hidden = noisy_dense(out,
                                             name='noisy_fc1',
                                             size=512,
                                             activation_fn=tf.nn.relu)
                action_scores = noisy_dense(actions_hidden,
                                            name='noisy_fc2',
                                            size=num_actions)
            else:
                actions_hidden = layers.fully_connected(
                    out, num_outputs=512, activation_fn=tf.nn.relu)
                action_scores = layers.fully_connected(actions_hidden,
                                                       num_outputs=num_actions,
                                                       activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores = action_scores - tf.expand_dims(
                action_scores_mean, 1)

        return state_score + action_scores