예제 #1
0
def build_simple_fc(states):
    with tf.variable_scope('actor_l1'):
        fc_actor = tf.layers.dense(
            inputs=states,
            units=128,
            activation=tf.nn.relu,
            kernel_initializer=ortho_init(np.sqrt(2)),
        )
    with tf.variable_scope('actor_l2'):
        fc_actor2 = tf.layers.dense(
            inputs=fc_actor,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=ortho_init(np.sqrt(2)),
        )
    with tf.variable_scope('critic_l1'):
        fc_critic = tf.layers.dense(
            inputs=states,
            units=128,
            activation=tf.nn.relu,
            kernel_initializer=ortho_init(np.sqrt(2)),
        )
    with tf.variable_scope('critic_l2'):
        fc_critic2 = tf.layers.dense(
            inputs=fc_critic,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=ortho_init(np.sqrt(2)),
        )

    return fc_actor2, fc_critic2
예제 #2
0
def create_network(states, ac_space, network_type, v_space):

    if network_type == "cnn":
        l_shared = build_cnn(states)
        with tf.variable_scope("Logits"):
            scores = tf.layers.dense(
                inputs=l_shared,
                units=ac_space,
            )
        with tf.variable_scope("V_fc"):
            quality = tf.layers.dense(
                inputs=l_shared,
                units=v_space,
            )
        pi = tf.nn.softmax(scores)

        return pi, quality

    if network_type == "ff":
        l_ac, l_ct = build_simple_fc(states)
        with tf.variable_scope("Logits"):
            scores = tf.layers.dense(
                inputs=l_ac,
                units=ac_space,
                kernel_initializer=ortho_init(0.01),
            )
        with tf.variable_scope("V_fc"):
            quality = tf.layers.dense(
                inputs=l_ct,
                units=v_space,
                kernel_initializer=ortho_init(),
            )
        pi = tf.nn.softmax(scores)

        return pi, quality
    if network_type == "mnih":
        l_shared = build_mnih_A3C(states)
        with tf.variable_scope("Logits"):
            scores = tf.layers.dense(
                inputs=l_shared,
                units=ac_space,
                kernel_initializer=ortho_init(0.01),
            )
        with tf.variable_scope("V_fc"):
            quality = tf.layers.dense(
                inputs=l_shared,
                units=v_space,
                kernel_initializer=ortho_init(),
            )
        pi = tf.nn.softmax(scores)

        return pi, quality
예제 #3
0
def lnlstmbase(xs, s, scope, nh, init_scale=1.0):
    nbatch, nin = [v.value for v in xs[0].get_shape()]
    with tf.variable_scope(scope):
        wx = tf.get_variable("wx", [nin, nh * 4],
                             initializer=utils.ortho_init(init_scale))
        gx = tf.get_variable("gx", [nh * 4],
                             initializer=tf.constant_initializer(1.0))
        bx = tf.get_variable("bx", [nh * 4],
                             initializer=tf.constant_initializer(0.0))

        wh = tf.get_variable("wh", [nh, nh * 4],
                             initializer=utils.ortho_init(init_scale))
        gh = tf.get_variable("gh", [nh * 4],
                             initializer=tf.constant_initializer(1.0))
        bh = tf.get_variable("bh", [nh * 4],
                             initializer=tf.constant_initializer(0.0))

        b = tf.get_variable("b", [nh * 4],
                            initializer=tf.constant_initializer(0.0))

        gc = tf.get_variable("gc", [nh],
                             initializer=tf.constant_initializer(1.0))
        bc = tf.get_variable("bc", [nh],
                             initializer=tf.constant_initializer(0.0))

    c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
    for idx, x in enumerate(xs):
        z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b
        i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f)
        o = tf.nn.sigmoid(o)
        u = tf.tanh(u)
        c = f * c + i * u
        h = o * tf.tanh(_ln(c, gc, bc))
        xs[idx] = h
    s = tf.concat(axis=1, values=[c, h])
    return xs, s
예제 #4
0
    def __init__(self,
                 Vo,
                 Eo,
                 Ho,
                 Ha,
                 Hi,
                 Hl,
                 attn_cls=AttentionModule,
                 init_orth=False,
                 cell_type=rnn_cells.LSTMCell,
                 is_multitarget=False):
        if (type(cell_type) == types.FunctionType):
            gru = cell_type(Eo + Hi, Ho)
        else:
            gru = rnn_cells.create_cell_model_from_string(cell_type)(Eo + Hi,
                                                                     Ho)

        #gru = cell_type(Eo + Hi, Ho)

        log.info("constructing decoder [%r]" % (cell_type, ))

        super(Decoder, self).__init__(
            emb=L.EmbedID(Vo, Eo),
            #             gru = L.GRU(Ho, Eo + Hi),
            gru=gru,
            maxo=L.Maxout(Eo + Hi + Ho, Hl, 2),
            lin_o=L.Linear(Hl, Vo, nobias=False),
            attn_module=attn_cls(Hi, Ha, Ho, init_orth=init_orth))
        #         self.add_param("initial_state", (1, Ho))
        self.add_param("bos_embeding", (1, Eo))

        self.Hi = Hi
        self.Ho = Ho
        self.Eo = Eo
        self.is_multitarget = is_multitarget
        #         self.initial_state.data[...] = np.random.randn(Ho)
        self.bos_embeding.data[...] = np.random.randn(Eo)

        if init_orth:
            ortho_init(self.gru)
            ortho_init(self.lin_o)
            ortho_init(self.maxo)
def _fcnobias(x, scope, nh, *, init_scale=1.0):
    with tf.variable_scope(scope):
        nin = x.get_shape()[1].value
        w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
        return tf.matmul(x, w)
예제 #6
0
파일: policies.py 프로젝트: guoyijie/DTSIL
    def __init__(self, sess, ob_space, loc_space, ac_space, nbatch, nsteps, max_timesteps, reuse=False, seed=0):
        nenv = nbatch // nsteps
        self.pdtype = make_pdtype(ac_space)
        with tf.variable_scope("model", reuse=reuse):
            G = tf.placeholder(tf.float32, [nbatch, max_timesteps, loc_space])
            X = tf.placeholder(tf.float32, (nbatch, )+ob_space.shape)
            Y = tf.placeholder(tf.float32, [nbatch, loc_space])
            M = tf.placeholder(tf.float32, [nbatch])
            S = tf.placeholder(tf.float32, [nenv, 128])
            ys = batch_to_seq(Y, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)

            tf.set_random_seed(seed)
            self.embed_W = tf.get_variable("embed_w", [loc_space, 64], initializer=ortho_init(1.0, seed))
            self.embed_b = tf.get_variable("embed_b", [64,])
            self.wa = tf.get_variable("wa", [128, 128], initializer=ortho_init(1.0, seed))
            self.wb = tf.get_variable("wb", [128,])
            self.ua = tf.get_variable("ua", [128, 128], initializer=ortho_init(1.0, seed))
            self.ub = tf.get_variable("ub", [128,])
            self.va = tf.get_variable("va", [128])
            self.rnn = tf.nn.rnn_cell.GRUCell(128, kernel_initializer=ortho_init(1.0, seed))
            enc_hidden = tf.zeros((nbatch, 128))
            embed_G = tf.matmul(tf.reshape(G, (-1, loc_space)),self.embed_W)+self.embed_b
            embed_G = tf.reshape(embed_G, (nbatch, max_timesteps, -1))
            enc_output, _ = tf.nn.dynamic_rnn(cell=self.rnn, inputs=embed_G, dtype=tf.float32)
            gs = batch_to_seq(enc_output, nenv, nsteps)
            dec_hidden = S
            h = []
            for idx, (y, m, g) in enumerate(zip(ys, ms, gs)):
                dec_hidden = dec_hidden*(1-m)
                embed_y = tf.matmul(y,self.embed_W)+self.embed_b
                dec_output, dec_hidden = tf.nn.dynamic_rnn(cell=self.rnn, inputs=tf.expand_dims(embed_y,axis=1), initial_state=dec_hidden)

                tmp = tf.reshape(tf.matmul(tf.reshape(g, (-1, 128)), self.ua)+self.ub,(nenv, max_timesteps, 128))
                tmp = tf.tanh(tf.expand_dims(tf.matmul(dec_hidden, self.wa)+self.wb,axis=1) + tmp)
                score = tf.reduce_sum(tmp*tf.expand_dims(tf.expand_dims(self.va, axis=0), axis=1), axis=2, keepdims=True)
                attention_weights = tf.nn.softmax(score, axis=1)
                context_vector = attention_weights * g
                context_vector = tf.reduce_sum(context_vector, axis=1)
                x = tf.concat([context_vector, dec_hidden], axis=-1)
                h.append(x)
            h = seq_to_batch(h)
            vf = fc(h, 'v', 1, seed=seed)[:,0]
            self.pd, self.pi = self.pdtype.pdfromlatent(h, seed=seed, init_scale=0.01)
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv,128))

        def step(ob, loc, goal, state, mask):
            a, v, state, neglogp = sess.run([a0, vf, dec_hidden, neglogp0], {X:ob, Y:loc, G:goal, M:mask, S:state})
            return a, v, state, neglogp

        def value(ob, loc, goal, state, mask):
            return sess.run(vf, {X:ob, Y:loc, G:goal, M:mask, S:state})

        self.G = G
        self.X = X
        self.Y = Y
        self.S = S
        self.M = M
        self.vf = vf
        self.step = step
        self.value = value