def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm * 2]) #states with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x: x) vf = fc(h5, 'v', 1, act=lambda x: x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32) def step(ob, state, mask): a, v, s = sess.run([a0, v0, snew], {X: ob, S: state, M: mask}) return a, v, s def value(ob, state, mask): return sess.run(v0, {X: ob, S: state, M: mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) # obs with tf.variable_scope("model", reuse=reuse): x = tf.cast(X, tf.float32) / 255. # Only look at the most recent frame x = x[:, :, :, -1] w, h = x.get_shape()[1:] x = tf.reshape(x, [-1, int(w * h)]) x = fc(x, 'fc1', nh=2048, init_scale=np.sqrt(2)) x = fc(x, 'fc2', nh=1024, init_scale=np.sqrt(2)) x = fc(x, 'fc3', nh=512, init_scale=np.sqrt(2)) pi = fc(x, 'pi', nact, act=lambda x: x) vf = fc(x, 'v', 1, act=lambda x: x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] # not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X: ob}) return a, v, [] # dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x: x) vf = fc(h4, 'v', 1, act=lambda x: x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X: ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) nact = ac_space.n # X = tf.placeholder(tf.uint8, ob_shape) #obs X = tf.placeholder(tf.float32, ob_shape) #obs with tf.variable_scope("model", reuse=reuse): images = X # add random filter num_colors = 3 randcnn_depth = 4 kernel_size = 3 fan_in = num_colors * kernel_size * kernel_size fan_out = randcnn_depth * kernel_size * kernel_size mask_vbox = tf.Variable(tf.zeros_like(images, dtype=bool), trainable=False) mask_shape = tf.shape(images) rh = .2 # hard-coded velocity box size mh = tf.cast(tf.cast(mask_shape[1], dtype=tf.float32) * rh, dtype=tf.int32) mw = mh * 2 mask_vbox = mask_vbox[:, :mh, :mw].assign( tf.ones([mask_shape[0], mh, mw, mask_shape[3]], dtype=bool)) img = tf.where(mask_vbox, x=tf.zeros_like(images), y=images) rand_img = tf.layers.conv2d( img, randcnn_depth, 4, padding='same', kernel_initializer=tf.keras.initializers.glorot_normal(), trainable=False, name='randcnn') print("img**************", img) print("img**************", rand_img) X = tf.where(mask_vbox, x=images, y=rand_img, name='randout') h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) pi = fc(h4, 'pi', nact, act=lambda x: x) vf = fc(h4, 'v', 1, act=lambda x: x) v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] #not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X: ob}) return a, v, [] #dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value