Exemplos de sample em Python, exemplos de a2c.a2c.utils.sample em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: policies.py Projeto: unghee/learning-from-human-preferences

    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 nlstm=256,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        M = tf.placeholder(tf.float32, [nbatch])  #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm * 2])  #states
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255.,
                     'c1',
                     nf=32,
                     rf=8,
                     stride=4,
                     init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x: x)
            vf = fc(h5, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32)

        def step(ob, state, mask):
            a, v, s = sess.run([a0, v0, snew], {X: ob, S: state, M: mask})
            return a, v, s

        def value(ob, state, mask):
            return sess.run(v0, {X: ob, S: state, M: mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value

Exemplo n.º 2

0

Exibir arquivo

Arquivo: policies.py Projeto: unghee/learning-from-human-preferences

    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  # obs
        with tf.variable_scope("model", reuse=reuse):
            x = tf.cast(X, tf.float32) / 255.

            # Only look at the most recent frame
            x = x[:, :, :, -1]

            w, h = x.get_shape()[1:]
            x = tf.reshape(x, [-1, int(w * h)])
            x = fc(x, 'fc1', nh=2048, init_scale=np.sqrt(2))
            x = fc(x, 'fc2', nh=1024, init_scale=np.sqrt(2))
            x = fc(x, 'fc3', nh=512, init_scale=np.sqrt(2))
            pi = fc(x, 'pi', nact, act=lambda x: x)
            vf = fc(x, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  # not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  # dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value

Exemplo n.º 3

0

Exibir arquivo

Arquivo: policies.py Projeto: unghee/learning-from-human-preferences

    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  #obs
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255.,
                     'c1',
                     nf=32,
                     rf=8,
                     stride=4,
                     init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x: x)
            vf = fc(h4, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value

Exemplo n.º 4

0

Exibir arquivo

Arquivo: policies.py Projeto: unghee/learning-from-human-preferences

    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        nact = ac_space.n
        # X = tf.placeholder(tf.uint8, ob_shape) #obs
        X = tf.placeholder(tf.float32, ob_shape)  #obs

        with tf.variable_scope("model", reuse=reuse):
            images = X

            # add random filter
            num_colors = 3
            randcnn_depth = 4
            kernel_size = 3
            fan_in = num_colors * kernel_size * kernel_size
            fan_out = randcnn_depth * kernel_size * kernel_size

            mask_vbox = tf.Variable(tf.zeros_like(images, dtype=bool),
                                    trainable=False)
            mask_shape = tf.shape(images)

            rh = .2  # hard-coded velocity box size
            mh = tf.cast(tf.cast(mask_shape[1], dtype=tf.float32) * rh,
                         dtype=tf.int32)
            mw = mh * 2
            mask_vbox = mask_vbox[:, :mh, :mw].assign(
                tf.ones([mask_shape[0], mh, mw, mask_shape[3]], dtype=bool))

            img = tf.where(mask_vbox, x=tf.zeros_like(images), y=images)
            rand_img = tf.layers.conv2d(
                img,
                randcnn_depth,
                4,
                padding='same',
                kernel_initializer=tf.keras.initializers.glorot_normal(),
                trainable=False,
                name='randcnn')
            print("img**************", img)
            print("img**************", rand_img)

            X = tf.where(mask_vbox, x=images, y=rand_img, name='randout')

            h = conv(tf.cast(X, tf.float32) / 255.,
                     'c1',
                     nf=32,
                     rf=8,
                     stride=4,
                     init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            pi = fc(h4, 'pi', nact, act=lambda x: x)
            vf = fc(h4, 'v', 1, act=lambda x: x)

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  #not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  #dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value