def discriminator(self, inp, cond, stages, t, reuse=False): alpha_trans = self.alpha_tra with tf.variable_scope("d_net", reuse=reuse): x_iden = None if t: x_iden = pool(inp, 2) x_iden = self.from_rgb(x_iden, stages - 2) x = self.from_rgb(inp, stages - 1) for i in range(stages - 1, 0, -1): with tf.variable_scope(self.get_conv_scope_name(i), reuse=reuse): x = conv2d(x, f=self.get_dnf(i), ks=(3, 3), s=(1, 1), act=lrelu_act()) x = conv2d(x, f=self.get_dnf(i-1), ks=(3, 3), s=(1, 1), act=lrelu_act()) x = pool(x, 2) if i == stages - 1 and t: x = tf.multiply(alpha_trans, x) + tf.multiply(tf.subtract(1., alpha_trans), x_iden) with tf.variable_scope(self.get_conv_scope_name(0), reuse=reuse): # Real/False branch cond_compress = fc(cond, units=128, act=lrelu_act()) concat = self.concat_cond4(x, cond_compress) x_b1 = conv2d(concat, f=self.get_dnf(0), ks=(3, 3), s=(1, 1), act=lrelu_act()) x_b1 = conv2d(x_b1, f=self.get_dnf(0), ks=(4, 4), s=(1, 1), padding='VALID', act=lrelu_act()) output_b1 = fc(x_b1, units=1) return output_b1
def discriminator(self, image, is_training, reuse=False): with tf.variable_scope("discriminator"): if reuse: tf.get_variable_scope().reuse_variables() # [batch,256,256,1] -> [batch,128,128,64] h0 = lrelu(conv2d(image, self.discriminator_dim, scope="d_h0_conv")) # [batch,128,128,64] -> [batch,64,64,64*2] h1 = lrelu( batch_norm(conv2d(h0, self.discriminator_dim * 2, scope="d_h1_conv"), is_training, scope="d_bn_1")) # [batch,64,64,64*2] -> [batch,32,32,64*4] h2 = lrelu( batch_norm(conv2d(h1, self.discriminator_dim * 4, scope="d_h2_conv"), is_training, scope="d_bn_2")) # [batch,32,32,64*4] -> [batch,31,31,64*8] h3 = lrelu( batch_norm(conv2d(h2, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h3_conv"), is_training, scope="d_bn_3")) # real or fake binary loss fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 1, scope="d_fc1") return tf.sigmoid(fc1), fc1
def build_model(self, traj, reuse=False): with tf.variable_scope(self.name, reuse=reuse): #fc1: (mb_size, 128) h = ops.fc(traj, 128, name="dis_fc1") h = tf.nn.leaky_relu(h) #fc2: (mb_size, 128) h = ops.fc(h, 128, name="dis_fc2") h = tf.nn.leaky_relu(h) #fc3: (mb_size, 128) h = ops.fc(h, 128, name="dis_fc3") h = tf.nn.leaky_relu(h) #logit: (mb_size, 1) #prob: (mb_size, 1) logit = ops.fc(h, 1, name="dis_fc_logit") prob = tf.nn.sigmoid(logit) return logit, prob
def __init__(self, sess, img_height, img_width, c_dim, a_dim, reuse=False): self.sess = sess with tf.variable_scope("policy", reuse=reuse): #ob_ph: (mb_size, img_height, img_width, c_dim) self.ob_ph = tf.placeholder(tf.uint8, [None, img_height, img_width, c_dim], name="observation") ob_normalized = tf.cast(self.ob_ph, tf.float32) / 255.0 #conv1: (mb_size, img_height1, img_width1, 32) h = ops.conv2d(ob_normalized, 32, 8, 8, 4, 4, name="conv1") h = tf.nn.relu(h) #conv2: (mb_size, img_height2, img_width2, 64) h = ops.conv2d(h, 64, 4, 4, 2, 2, name="conv2") h = tf.nn.relu(h) #conv3: (mb_size, img_height3, img_width3, 64) h = ops.conv2d(h, 64, 3, 3, 1, 1, name="conv3") h = tf.nn.relu(h) #fc: (mb_size, 512) h = ops.fc(tf.reshape(h, [-1, h.shape[1] * h.shape[2] * h.shape[3]]), 512, name="fc1") h = tf.nn.relu(h) #pi: (mb_size, a_dim) #value: (mb_size, 1) pi = ops.fc(h, a_dim, name="fc_pi") value = ops.fc(h, 1, name="fc_value") #value: (mb_size) #action: (mb_size) self.value = value[:, 0] self.cat_dist = tf.distributions.Categorical(pi) self.action = self.cat_dist.sample(1)[0] self.pi = pi
def __init__(self, sess, img_height, img_width, c_dim, a_dim, name="policy", reuse=False): self.sess = sess with tf.variable_scope(name, reuse=reuse): #ob_ph: (mb_size, s_dim) self.ob_ph = tf.placeholder(tf.uint8, [None, img_height, img_width, c_dim], name="observation") ob_normalized = tf.cast(self.ob_ph, tf.float32) / 255.0 #conv1: (mb_size, img_height1, img_width1, 32) h = ops.conv2d(ob_normalized, 32, 8, 8, 4, 4, name="conv1") h = tf.nn.relu(h) #conv2: (mb_size, img_height2, img_width2, 64) h = ops.conv2d(h, 64, 4, 4, 2, 2, name="conv2") h = tf.nn.relu(h) #conv3: (mb_size, img_height3, img_width3, 64) h = ops.conv2d(h, 64, 3, 3, 1, 1, name="conv3") h = tf.nn.relu(h) #fc: (mb_size, 512) h = ops.fc(tf.reshape(h, [-1, h.shape[1]*h.shape[2]*h.shape[3]]), 512, name="fc1") h = tf.nn.relu(h) with tf.variable_scope("actor", reuse=reuse): #fc_logits: (mb_size, a_dim) logits = ops.fc(h, a_dim, name="a_fc_logits") with tf.variable_scope("critic", reuse=reuse): #value: (mb_size, 1) value = ops.fc(h, 1, name="c_fc_value") #value: (mb_size) #action: (mb_size) #neg_logprob: (mb_size) self.value = value[:, 0] self.distrib = distribs.CategoricalDistrib(logits) self.action = self.distrib.sample() self.neg_logprob = self.distrib.neg_logp(self.action)
def __init__(self, sess, s_dim, a_dim, reuse=False): self.sess = sess with tf.variable_scope("policy", reuse=reuse): #ob_ph: (mb_size, s_dim) self.ob_ph = tf.placeholder(tf.float32, [None, s_dim], name="observation") with tf.variable_scope("actor", reuse=reuse): #fc1: (mb_size, 64) h = ops.fc(self.ob_ph, 64, name="a_fc1") h = tf.nn.relu(h) #fc2: (mb_size, 128) h = ops.fc(h, 128, name="a_fc2") h = tf.nn.relu(h) #fc3: (mb_size, 128) h = ops.fc(h, 128, name="a_fc3") h = tf.nn.relu(h) #pi: (mb_size, a_dim) pi = ops.fc(h, a_dim, name="a_fc_pi") with tf.variable_scope("critic", reuse=reuse): #fc1: (mb_size, 64) h = ops.fc(self.ob_ph, 64, name="c_fc1") h = tf.nn.relu(h) #fc2: (mb_size, 128) h = ops.fc(h, 128, name="c_fc2") h = tf.nn.relu(h) #fc3: (mb_size, 128) h = ops.fc(h, 128, name="c_fc3") h = tf.nn.relu(h) #value: (mb_size, 1) value = ops.fc(h, 1, name="c_fc_value") #value: (mb_size) #action: (mb_size) self.value = value[:, 0] self.cat_dist = tf.distributions.Categorical(pi) self.action = self.cat_dist.sample(1)[0] self.pi = pi
def generator(self, z_var, cond_inp, stages, t, reuse=False, cond_noise=True): alpha_trans = self.alpha_tra with tf.variable_scope('g_net', reuse=reuse): with tf.variable_scope(self.get_conv_scope_name(0), reuse=reuse): mean_lr, log_sigma_lr = self.generate_conditionals(cond_inp) cond = self.sample_normal_conditional(mean_lr, log_sigma_lr, cond_noise) # import pdb # pdb.set_trace() x = tf.concat([z_var, cond], axis=1) x = fc(x, units=4 * 4 * self.get_nf(0)) x = layer_norm(x) x = tf.reshape(x, [-1, 4, 4, self.get_nf(0)]) x = conv2d(x, f=self.get_nf(0), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = conv2d(x, f=self.get_nf(0), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x_iden = None for i in range(1, stages): if (i == stages - 1) and t: x_iden = self.to_rgb(x, stages - 2) x_iden = upscale(x_iden, 2) with tf.variable_scope(self.get_conv_scope_name(i), reuse=reuse): x = upscale(x, 2) x = conv2d(x, f=self.get_nf(i), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = conv2d(x, f=self.get_nf(i), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = self.to_rgb(x, stages - 1) if t: x = tf.multiply(tf.subtract(1., alpha_trans), x_iden) + tf.multiply(alpha_trans, x) return x, mean_lr, log_sigma_lr
def __init__(self, sess, s_dim, a_dim, a_low, a_high, name="policy", reuse=False): self.sess = sess with tf.variable_scope(name, reuse=reuse): #ob_ph: (mb_size, s_dim) self.ob_ph = tf.placeholder(tf.float32, [None, s_dim], name="observation") with tf.variable_scope("actor", reuse=reuse): #fc1: (mb_size, 64) h = ops.fc(self.ob_ph, 64, name="a_fc1") h = tf.nn.tanh(h) #fc2: (mb_size, 64) h = ops.fc(h, 64, name="a_fc2") h = tf.nn.tanh(h) #fc_mean (mb_size, a_dim) mean = ops.fc(h, a_dim, name="a_fc_mean") logstd = tf.get_variable(name="a_logstd", shape=[1, a_dim], initializer=tf.zeros_initializer()) with tf.variable_scope("critic", reuse=reuse): #fc1: (mb_size, 64) h = ops.fc(self.ob_ph, 64, name="c_fc1") h = tf.nn.tanh(h) #fc2: (mb_size, 64) h = ops.fc(h, 64, name="c_fc2") h = tf.nn.tanh(h) #value: (mb_size, 1) value = ops.fc(h, 1, name="c_fc_value") #value: (mb_size) #action: (mb_size, a_dim) #neg_logprob: (mb_size) self.value = value[:, 0] self.distrib = distribs.DiagGaussianDistrib(mean, logstd) self.action = self.distrib.sample() self.neg_logprob = self.distrib.neg_logp(self.action)
def generate_conditionals(self, embeddings, units=128): """Takes the embeddings, compresses them and builds the statistics for a multivariate normal distribution""" mean = fc(embeddings, units, act=lrelu_act()) log_sigma = fc(embeddings, units, act=lrelu_act()) return mean, log_sigma