def encode(self, inputs, labels, condition=None): ## Dequantization by adding uniform noise with tf.variable_scope("preprocess"): self.y = tf.one_hot(labels, depth=self.num_classes, dtype=tf.float32) inputs = tf.cast(inputs, 'float32') self.height, self.width, self.channels = inputs.get_shape( ).as_list()[1:] if self.hps.num_bits_x < 8: inputs = tf.floor(inputs / 2**(8 - self.hps.num_bits_x)) inputs = inputs / self.num_bins - 0.5 inputs = inputs + tf.random_uniform(tf.shape(inputs), 0, 1. / self.num_bins) objective = tf.zeros(tf.shape(inputs)[0]) objective += -np.log(self.num_bins) * np.prod( ops.shape(inputs)[1:]) inputs = squeeze2d(inputs) ## Encoder if self.hps.conditioning and condition is None: condition = self.y # with tf.variable_scope("cond_preprocess"): # condition = tf.layers.dense(condition, units=10, use_bias=False) z, objective, eps = codec(inputs, cond=condition, objective=objective, hps=self.hps, reverse=False) ## Prior with tf.variable_scope("prior"): self.hps.top_shape = z.get_shape().as_list()[1:] logp, sample, get_eps = prior(self.y, self.hps) obj = logp(z) eps.append(get_eps(z)) objective += obj self.objective = -objective # Class label predict with latent representation if self.hps.ycond: z_y = tf.reduce_mean(z, axis=[1, 2]) self.logits = linear_zeros(z_y, self.num_classes, name="classifier") return eps
def prior(y_onehot, hps, name=None): n_z = hps.top_shape[-1] h = tf.zeros([tf.shape(y_onehot)[0]]+hps.top_shape[:2]+[2*n_z]) h = conv2d_zeros(h, 2*n_z, name="p") if hps.ycond: h += tf.reshape(linear_zeros(y_onehot, 2*n_z, name="y_emb"), [-1, 1, 1, 2 * n_z]) mean, logsd = tf.split(h, 2, axis=-1) rescale = tf.get_variable("rescale", [], initializer=tf.constant_initializer(1.)) scale_shift = tf.get_variable("scale_shift", [], initializer=tf.constant_initializer(0.)) logsd = tf.tanh(logsd) * rescale + scale_shift pz = gaussian_diag(mean, logsd) logp = lambda z1: pz.logp(z1) eps = lambda z1: pz.get_eps(z1) sample = lambda eps: pz.sample(eps) return logp, sample, eps
def prior(y_onehot, hps, name=None): n_z = hps.top_shape[-1] h = tf.zeros([tf.shape(y_onehot)[0]] + hps.top_shape[:2] + [2 * n_z]) h = conv2d_zeros(h, 2 * n_z, name="p") if hps.ycond: h += tf.reshape(linear_zeros(y_onehot, 2 * n_z, name="y_emb"), [-1, 1, 1, 2 * n_z]) mean, logsd = tf.split(h, 2, axis=-1) rescale = tf.get_variable("rescale", [], initializer=tf.constant_initializer(1.)) scale_shift = tf.get_variable("scale_shift", [], initializer=tf.constant_initializer(0.)) logsd = tf.tanh(logsd) * rescale + scale_shift pz = gaussian_diag(mean, logsd) logp = lambda z1: pz.logp(z1) eps = lambda z1: pz.get_eps(z1) sample = lambda eps: pz.sample(eps) return logp, sample, eps
def encode(self, inputs, labels, condition=None): ## Dequantization by adding uniform noise with tf.variable_scope("preprocess"): self.y = tf.one_hot(labels, depth=self.num_classes, dtype=tf.float32) inputs = tf.cast(inputs, 'float32') self.height, self.width, self.channels = inputs.get_shape().as_list()[1:] if self.hps.num_bits_x < 8: inputs = tf.floor(inputs/2**(8-self.hps.num_bits_x)) inputs = inputs / self.num_bins - 0.5 inputs = inputs + tf.random_uniform(tf.shape(inputs), 0, 1./self.num_bins) objective = tf.zeros(tf.shape(inputs)[0]) objective += -np.log(self.num_bins) * np.prod(ops.shape(inputs)[1:]) inputs = squeeze2d(inputs) ## Encoder if self.hps.conditioning and condition is None: condition = self.y # with tf.variable_scope("cond_preprocess"): # condition = tf.layers.dense(condition, units=10, use_bias=False) z, objective, eps = codec(inputs, cond=condition, objective=objective, hps=self.hps, reverse=False) ## Prior with tf.variable_scope("prior"): self.hps.top_shape = z.get_shape().as_list()[1:] logp, sample, get_eps = prior(self.y, self.hps) obj = logp(z) eps.append(get_eps(z)) objective += obj self.objective = -objective # Class label predict with latent representation if self.hps.ycond: z_y = tf.reduce_mean(z, axis=[1, 2]) self.logits = linear_zeros(z_y, self.num_classes, name="classifier") return eps
def encode(self, inputs, labels, condition=None): # line268 # Dequantization by adding uniform noise 加入均匀噪声来反量化 with tf.variable_scope("preprocess"): # 采用One-hot编码 self.y = tf.one_hot(labels, depth=self.num_classes, dtype=tf.float32) inputs = tf.cast(inputs, 'float32') # tf.cast()数据类型转换 self.height, self.width, self.channels = inputs.get_shape( ).as_list()[1:] # num_bits、num_bin:??? if self.hps.num_bits_x < 8: inputs = tf.floor(inputs / 2**(8 - self.hps.num_bits_x)) inputs = inputs / self.num_bins - 0.5 # 输入加入随机正态分布 inputs = inputs + tf.random_uniform(tf.shape(inputs), 0, 1. / self.num_bins) # objective:??? objective = tf.zeros(tf.shape(inputs)[0]) # np.prod:计算数组中所有元素的乘积 objective += -np.log(self.num_bins) * np.prod( ops.shape(inputs)[1:]) # 使用Squeezing操作(在进入Squeezing之前的操作未知) print("before inter squeeze2d, input.shape=" + inputs.shape()) inputs = squeeze2d(inputs) # inputs的shape为[图片数, 高度, 宽度, 通道数] # Encoder 编码 # 下面作用未知 if self.hps.conditioning and condition is None: condition = self.y # with tf.variable_scope("cond_preprocess"): # condition = tf.layers.dense(condition, units=10, use_bias=False) print("before inter model.codec, inputs.shape=" + inputs.shape()) z, objective, eps = codec(inputs, cond=condition, objective=objective, hps=self.hps, reverse=False) # line 11 # Prior 先验 with tf.variable_scope("prior"): self.hps.top_shape = z.get_shape().as_list()[1:] logp, sample, get_eps = prior(self.y, self.hps) obj = logp(z) eps.append(get_eps(z)) objective += obj self.objective = -objective # Class label predict with latent representation:潜变量标签预测 if self.hps.ycond: z_y = tf.reduce_mean(z, axis=[1, 2]) self.logits = linear_zeros(z_y, self.num_classes, name="classifier") return eps