def resnet(x, is_train): W1 = utils.weight_variable([1, 1, x.get_shape()[3].value, 64], name='W1') b1 = utils.bias_variable([64], 'bias1') conv1 = utils.conv2d(x, W1, b1) # conv1 =tf.layers.batch_normalization(conv1,training=is_train) conv1 = tf.nn.relu(conv1) bn1 = bottleneck(conv1, 1, is_train) cc1 = tf.concat([bn1, conv1], axis=3, name='C1') cc1 = tf.nn.relu(cc1) bn2 = bottleneck(cc1, 2, is_train) cc2 = tf.concat([bn2, cc1], axis=3, name='C2') cc2 = tf.nn.relu(cc2) bn3 = bottleneck(cc2, 3, is_train) cc3 = tf.concat([bn3, cc2], axis=3, name='C2') cc3 = tf.nn.relu(cc3) W2 = utils.weight_variable([1, 1, cc3.get_shape()[3].value, 64], name='W2') b2 = utils.bias_variable([64], 'bias2') conv2 = utils.conv2d(cc3, W2, b2) # conv2 = tf.layers.batch_normalization(conv2, training=is_train) conv2 = tf.nn.relu(conv2) W3 = utils.weight_variable([1, 1, conv2.get_shape()[3].value, 1], name='W3') b3 = utils.bias_variable([1], 'bias3') conv3 = utils.conv2d(conv2, W3, b3) return conv3
def _discriminator(self, input_images, dims, train_phase, activation=tf.nn.relu, scope_name="discriminator", scope_reuse=False): N = len(dims) with tf.variable_scope(scope_name) as scope: if scope_reuse: scope.reuse_variables() h = input_images skip_bn = True # First layer of discriminator skips batch norm for index in range(N - 2): W = utils.weight_variable([4, 4, dims[index], dims[index + 1]], name="W_%d" % index) b = tf.zeros([dims[index + 1]]) h_conv = utils.conv2d_strided(h, W, b) if skip_bn: h_bn = h_conv skip_bn = False else: #d_bn = ops.batch_norm(name='d_bn{0}'.format(index)) h_bn = d_bn(h_conv, train=train_phase) h = activation(h_bn, name="h_%d" % index) utils.add_activation_summary(h) W_pred = utils.weight_variable([4, 4, dims[-2], dims[-1]], name="W_pred") b = tf.zeros([dims[-1]]) h_pred = utils.conv2d_strided(h, W_pred, b) return None, h_pred, None # Return the last convolution output. None values are returned to maintatin disc from other GAN
def build_graph(): x_origin = tf.reshape(x, [-1, 3, 11, 1]) x_origin_noise = tf.reshape(x_noise, [-1, 3, 11, 1]) W_e_conv1 = weight_variable([5, 5, 1, 16], "w_e_conv1") b_e_conv1 = bias_variable([16], "b_e_conv1") print(conv2d(x_origin_noise, W_e_conv1).get_shape()) h_e_conv1 = tf.nn.relu(tf.add(conv2d(x_origin_noise, W_e_conv1), b_e_conv1)) W_e_conv2 = weight_variable([5, 5, 16, 32], "w_e_conv2") b_e_conv2 = bias_variable([32], "b_e_conv2") h_e_conv2 = tf.nn.relu(tf.add(conv2d(h_e_conv1, W_e_conv2), b_e_conv2)) code_layer = h_e_conv2 print("code layer shape : %s" % h_e_conv2.get_shape()) W_d_conv1 = weight_variable([5, 5, 16, 32], "w_d_conv1") # output_shape_d_conv1 = tf.pack([tf.shape(x)[0], 14, 14, 16]) output_shape_d_conv1 = tf.pack([tf.shape(x)[0], 1, 3, 32]) h_d_conv1 = tf.nn.relu(deconv2d(h_e_conv2, W_d_conv1, output_shape_d_conv1)) W_d_conv2 = weight_variable([5, 5, 1, 16], "w_d_conv2") b_d_conv2 = bias_variable([16], "b_d_conv2") # output_shape_d_conv2 = tf.pack([tf.shape(x)[0], 3, 11, 1]) output_shape_d_conv2 = tf.pack([tf.shape(x)[0], 2, 6, 16]) h_d_conv2 = tf.nn.relu(deconv2d(h_d_conv1, W_d_conv2, output_shape_d_conv2)) x_reconstruct = h_d_conv2 print("reconstruct layer shape : %s" % x_reconstruct.get_shape()) return x_origin, code_layer, x_reconstruct
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [ 4, 40, 80 ] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # First fully connected layer. Reshape the convolution output to 1D vector W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024]) b_fc1 = utils.bias_variable([1024]) h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]]) h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) # Dropout (should be added to earlier layers too...) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 3]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return y_conv
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # First fully connected layer. Reshape the convolution output to 1D vector W_fc1 = utils.weight_variable([self.config.strlen * cs[2], 1024]) b_fc1 = utils.bias_variable([1024]) h_conv2_flat = tf.reshape(h_conv2, [-1, self.config.strlen * cs[2]]) h_fc1 = utils.lrelu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) # Dropout (should be added to earlier layers too...) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_out = tf.sigmoid(y_conv) is_zero = tf.clip_by_value(tf.reduce_sum(self.x), 0, 1) # basically will be 1 iff at least one entry of x is nonzero y_out = tf.multiply(y_out, is_zero) return y_out
def _generator(self, z, dims, train_phase, activation=tf.nn.relu, scope_name="generator"): N = len(dims) image_size = self.resized_image_size // (2 ** (N - 1)) with tf.variable_scope(scope_name) as scope: W_z = utils.weight_variable([self.z_dim, dims[0] * image_size * image_size], name="W_z") b_z = utils.bias_variable([dims[0] * image_size * image_size], name="b_z") h_z = tf.matmul(z, W_z) + b_z h_z = tf.reshape(h_z, [-1, image_size, image_size, dims[0]]) h_bnz = utils.batch_norm(h_z, dims[0], train_phase, scope="gen_bnz") h = activation(h_bnz, name='h_z') utils.add_activation_summary(h) for index in range(N - 2): image_size *= 2 W = utils.weight_variable([5, 5, dims[index + 1], dims[index]], name="W_%d" % index) b = utils.bias_variable([dims[index + 1]], name="b_%d" % index) deconv_shape = tf.stack([tf.shape(h)[0], image_size, image_size, dims[index + 1]]) h_conv_t = utils.conv2d_transpose_strided(h, W, b, output_shape=deconv_shape) h_bn = utils.batch_norm(h_conv_t, dims[index + 1], train_phase, scope="gen_bn%d" % index) h = activation(h_bn, name='h_%d' % index) utils.add_activation_summary(h) image_size *= 2 W_pred = utils.weight_variable([5, 5, dims[-1], dims[-2]], name="W_pred") b_pred = utils.bias_variable([dims[-1]], name="b_pred") deconv_shape = tf.stack([tf.shape(h)[0], image_size, image_size, dims[-1]]) h_conv_t = utils.conv2d_transpose_strided(h, W_pred, b_pred, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def _discriminator(self, input_images, dims, train_phase, activation=tf.nn.relu, scope_name="discriminator", scope_reuse=False): N = len(dims) with tf.variable_scope(scope_name) as scope: if scope_reuse: scope.reuse_variables() h = input_images skip_bn = True # First layer of discriminator skips batch norm for index in range(N - 2): W = utils.weight_variable([5, 5, dims[index], dims[index + 1]], name="W_%d" % index) b = utils.bias_variable([dims[index + 1]], name="b_%d" % index) h_conv = utils.conv2d_strided(h, W, b) if skip_bn: h_bn = h_conv skip_bn = False else: h_bn = utils.batch_norm(h_conv, dims[index + 1], train_phase, scope="disc_bn%d" % index) h = activation(h_bn, name="h_%d" % index) utils.add_activation_summary(h) shape = h.get_shape().as_list() image_size = self.resized_image_size // (2 ** (N - 2)) # dims has input dim and output dim h_reshaped = tf.reshape(h, [self.batch_size, image_size * image_size * shape[3]]) W_pred = utils.weight_variable([image_size * image_size * shape[3], dims[-1]], name="W_pred") b_pred = utils.bias_variable([dims[-1]], name="b_pred") h_pred = tf.matmul(h_reshaped, W_pred) + b_pred return tf.nn.sigmoid(h_pred), h_pred, h
def build(self, input, is_dropout=False): #is_dropout 是否dropout # 卷积层1 cov 5*5 6 W_conv1 = weight_variable([5, 5, tf.shape(input)[-1], 6]) #cov 5*5 6 b_conv1 = bias_variable([6]) h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1) h_pool1 = avg_pooling(h_conv1) # 卷积层2 cov 5*5 6 W_conv2 = weight_variable([5, 5, 6, 6]) b_conv2 = bias_variable([6]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = avg_pooling(h_conv2) #全连接1 120 W_fc1 = weight_variable([7 * 7 * 6, 120]) #卷积后图像大小 b_fc1 = bias_variable([120]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 6]) #需要将卷积后的拉伸为一列 h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) if is_dropout: h_fc1 = tf.nn.dropout(h_fc1, 0.5) # 全连接2 84 W_fc2 = weight_variable([120, 84]) # 卷积后图像大小 b_fc2 = bias_variable([84]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) if is_dropout: h_fc2 = tf.nn.dropout(h_fc2, 0.5) # output 10 W_fc3 = weight_variable([84, 10]) # 卷积后图像大小 b_fc3 = bias_variable([10]) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, W_fc3) + b_fc3) return h_fc3
def create_network(self): with tf.name_scope('Input'): self._x = tf.placeholder(tf.float32, shape=[None, 1024], name='X') self._y = tf.placeholder(tf.float32, shape=[None, 46], name='Y') self._keep_prob = tf.placeholder(tf.float32) channels1 = 16 channels2 = 32 channels3 = 64 with tf.name_scope('LeNetConvPool_1'): input_image = tf.reshape(self._x, [-1, 32, 32, 1]) out_image_layer1 = utils.le_net_conv_pool( input_image, input_channels=1, output_channels=channels1, conv_count=1) out_image_layer1_d = tf.nn.dropout(out_image_layer1, self._keep_prob) with tf.name_scope('LeNetConvPool_2'): out_image_layer2 = utils.le_net_conv_pool( out_image_layer1_d, input_channels=channels1, output_channels=channels2, conv_count=2) out_image_layer2_d = tf.nn.dropout(out_image_layer2, self._keep_prob) with tf.name_scope('LeNetConvPool_3'): out_image_layer3 = utils.le_net_conv_pool( out_image_layer2_d, input_channels=channels2, output_channels=channels3, conv_count=3) out_image_layer3_d = tf.nn.dropout(out_image_layer3, self._keep_prob) with tf.name_scope('FullConnect'): W_fc1 = utils.weight_variable([4 * 4 * channels3, 256], 'W_fc1') b_fc1 = utils.bias_variable([256], 'b_fc1') h_pool2_flat = tf.reshape(out_image_layer3_d, [-1, 4 * 4 * channels3]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, self._keep_prob) with tf.name_scope('ReadoutLayer'): W_fc2 = utils.weight_variable([256, 46], 'W_fc2') b_fc2 = utils.bias_variable([46], 'b_fc2') self._y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) with tf.name_scope('Train'): cross_entropy = tf.reduce_mean(-tf.reduce_sum( self._y * tf.log(tf.clip_by_value(self._y_conv, 1e-10, 1.0)), reduction_indices=[1])) self._train_step = tf.train.AdamOptimizer( FLAGS.learning_rate).minimize(cross_entropy) tf.scalar_summary('Cross Entropy', cross_entropy) with tf.name_scope('Accuracy'): correct_prediction = tf.equal(tf.argmax(self._y_conv, 1), tf.argmax(self._y, 1)) self._accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('Accuracy', self._accuracy) self.sess.run(tf.initialize_all_variables())
def __init__(self, input_dim, hidden_dim, n_layers=1, stddev=1., bias_value=0.0): # bias value will only be applied to the hidden layers self.input_dim = input_dim self.hidden_dim = hidden_dim self.hidden_layers = [] num_pairs = int((input_dim * (input_dim - 1)) / 2) pairs = [None] * num_pairs ctr = 0 for u in range(input_dim): for v in range(u+1, input_dim): pairs[ctr] = np.array((u, v)) ctr +=1 with tf.variable_scope('pairs_mlp'): self.w_in_var = weight_variable((input_dim, num_pairs * hidden_dim), stddev / np.sqrt(hidden_dim), name='w_in') self.w_out_var = weight_variable((num_pairs * hidden_dim, input_dim), stddev / np.sqrt(hidden_dim), name='w_out') mask = np.zeros((input_dim, num_pairs * hidden_dim), dtype='float32') hid_to_hid_mask = np.zeros((num_pairs * hidden_dim, num_pairs * hidden_dim), dtype='float32') self.bias_hid = bias_variable((hidden_dim * num_pairs,), value=bias_value, name='bias_first_hid') self.bias_out = bias_variable((input_dim,), name='bias_out') for i in range(0, num_pairs * hidden_dim, hidden_dim): hid_to_hid_mask[i:i + hidden_dim, i:i + hidden_dim] = 1.0 for j in range(num_pairs): u = pairs[j][0] v = pairs[j][1] mask[u, (j*hidden_dim):((j+1)*hidden_dim)] = 1.0 mask[v, (j*hidden_dim):((j+1)*hidden_dim)] = 1.0 self.hid_to_hid_mask = tf.convert_to_tensor(hid_to_hid_mask) self.in_out_mask = tf.convert_to_tensor(mask) self.w_in = self.w_in_var * self.in_out_mask # element by element self.w_out = self.w_out_var * tf.transpose(self.in_out_mask) # element by element for i in range(n_layers - 1): with tf.variable_scope('layer_' + str(i)): w_hid = weight_variable((num_pairs * hidden_dim, num_pairs * hidden_dim), stddev / np.sqrt(hidden_dim)) b_hid = bias_variable((hidden_dim * num_pairs,), value=bias_value) self.hidden_layers.append((w_hid * self.hid_to_hid_mask, b_hid))
def add_prediction_op(self): fs = [5, 5] # filter sizes cs = [4, 40, 80] # cs[i] is output number of channels from layer i [where layer 0 is input layer] # First conv layer W_conv1 = utils.weight_variable([fs[0], cs[0], cs[1]]) b_conv1 = utils.bias_variable([cs[1]]) h_conv1 = utils.lrelu(utils.conv1d(self.x, W_conv1) + b_conv1) # Second conv layer W_conv2 = utils.weight_variable([fs[1], cs[1], cs[2]]) b_conv2 = utils.bias_variable([cs[2]]) h_conv2 = utils.lrelu(utils.conv1d(h_conv1, W_conv2) + b_conv2) # Conv layer on top of the coverage W_conv_coverage = utils.weight_variable([fs[0], 1, cs[2]]) b_conv_coverage = utils.bias_variable([cs[2]]) conv_c = tf.expand_dims(self.e, -1) #print(conv_c.shape, W_conv_coverage.shape, b_conv_coverage.shape) h_conv_coverage = utils.lrelu(utils.conv1d(conv_c, W_conv_coverage) + b_conv_coverage) h_concatenated = tf.concat([h_conv2, h_conv_coverage], axis = -1) # First fully connected layer. Reshape the convolution output to 1D vector orig_shape = h_concatenated.get_shape().as_list() flat_shape = np.prod(orig_shape[1:]) new_shape = [-1,] + [flat_shape] h_concatenated_flat = tf.reshape(h_concatenated, new_shape) h_concat_drop = tf.nn.dropout(h_concatenated_flat, self.keep_prob) fc1_in = h_concatenated_flat.get_shape().as_list()[-1] W_fc1 = utils.weight_variable([fc1_in, 1024]) b_fc1 = utils.bias_variable([1024]) h_fc1 = utils.lrelu(tf.matmul(h_concat_drop, W_fc1) + b_fc1) # Fully-connected layer on top of the coverage #W_fc_coverage = utils.weight_variable([self.config.strlen, cs[2]]) #b_fc_coverage = utils.bias_variable([cs[2]]) #h_fc_coverage = tf.nn.relu(tf.matmul(self.e, W_fc_coverage) + b_fc_coverage) #h_concatenated = tf.concat([h_fc1, h_fc_coverage], axis = -1) # Dropout (should be added to earlier layers too...) #h_concatenated_drop = tf.nn.dropout(h_concatenated, self.keep_prob) h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) # Final fully-connected layer W_fc2 = utils.weight_variable([1024, 1]) b_fc2 = utils.bias_variable([1]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 y_out = tf.sigmoid(y_conv) return y_out
def init_weights(self): """ Initialize all the trainable weights.""" self.w_g0 = weight_variable((self.sensor_size, self.hg_size)) self.b_g0 = bias_variable((self.hg_size, )) self.w_l0 = weight_variable((self.loc_dim, self.hl_size)) self.b_l0 = bias_variable((self.hl_size, )) self.w_g1 = weight_variable((self.hg_size, self.g_size)) self.b_g1 = bias_variable((self.g_size, )) self.w_l1 = weight_variable((self.hl_size, self.g_size)) self.b_l1 = weight_variable((self.g_size, ))
def init_weights(self): """ Initialize all the trainable weights.""" #G_image conv_filt = (64, 64, 128) self.w_c0 = weight_variable((5, 5, self.num_channels, conv_filt[0])) self.b_c0 = bias_variable((conv_filt[0], )) self.w_c1 = weight_variable((3, 3, conv_filt[0], conv_filt[1])) self.b_c1 = bias_variable((conv_filt[1], )) self.w_c2 = weight_variable((3, 3, conv_filt[1], conv_filt[2])) self.b_c2 = bias_variable((conv_filt[2], )) self.flat_dim = (self.coarse_size - 8)**2 * conv_filt[2] self.w_x0 = weight_variable((self.flat_dim, self.output_dim)) self.b_x0 = bias_variable((self.output_dim, ))
def _generator(self, z, dims, train_phase, activation=tf.nn.relu, scope_name="generator"): N = len(dims) image_size = self.resized_image_size // (2**(N - 1)) with tf.variable_scope(scope_name) as scope: W_z = utils.weight_variable( [self.z_dim, dims[0] * image_size * image_size], name="W_z") h_z = tf.matmul(z, W_z) h_z = tf.reshape(h_z, [-1, image_size, image_size, dims[0]]) # h_bnz = tf.contrib.layers.batch_norm(inputs=h_z, decay=0.9, epsilon=1e-5, is_training=train_phase, # scope="gen_bnz") # h_bnz = utils.batch_norm(h_z, dims[0], train_phase, scope="gen_bnz") h_bnz = utils.batch_norm('gen_bnz', h_z, True, 'NHWC', train_phase) h = activation(h_bnz, name='h_z') utils.add_activation_summary(h) for index in range(N - 2): image_size *= 2 W = utils.weight_variable([4, 4, dims[index + 1], dims[index]], name="W_%d" % index) b = tf.zeros([dims[index + 1]]) deconv_shape = tf.stack( [tf.shape(h)[0], image_size, image_size, dims[index + 1]]) h_conv_t = utils.conv2d_transpose_strided( h, W, b, output_shape=deconv_shape) # h_bn = tf.contrib.layers.batch_norm(inputs=h_conv_t, decay=0.9, epsilon=1e-5, is_training=train_phase, # scope="gen_bn%d" % index) # h_bn = utils.batch_norm(h_conv_t, dims[index + 1], train_phase, scope="gen_bn%d" % index) h_bn = utils.batch_norm("gen_bn%d" % index, h_conv_t, True, 'NHWC', train_phase) h = activation(h_bn, name='h_%d' % index) utils.add_activation_summary(h) image_size *= 2 W_pred = utils.weight_variable([4, 4, dims[-1], dims[-2]], name="W_pred") b = tf.zeros([dims[-1]]) deconv_shape = tf.stack( [tf.shape(h)[0], image_size, image_size, dims[-1]]) h_conv_t = utils.conv2d_transpose_strided( h, W_pred, b, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def UNet(x, keep_probability): layer1 = conv_block(x, 1) pool1 = utils.max_pool(layer1, 2) layer2 = conv_block(pool1, 2) pool2 = utils.max_pool(layer2, 2) layer3 = conv_block(pool2, 3) pool3 = utils.max_pool(layer3, 2) layer4 = conv_block(pool3, 4) #upsampling up5 = upsampling_bolck(layer3, layer4, 5) layer5 = conv_block(up5, 5) up6 = upsampling_bolck(layer2, layer5, 6) layer6 = conv_block(up6, 6) up7 = upsampling_bolck(layer1, layer6, 7) layer7 = conv_block(up7, 7) W6 = utils.weight_variable([1, 1, layer7.get_shape()[3].value, 1], name='W6') b6 = utils.bias_variable([1], 'bias6') conv6_1 = utils.conv2d(layer7, W6, b6, 'conv_6') return conv6_1
def LearningRegularizationOmitting(cv_left, cv_right, batch_size=1, F=32, D=192, H=256, W=512, SHARE=None): Y36relu_left = cv_left Y36relu_right = cv_right with tf.name_scope('Conv3d37'): with tf.variable_scope('params', reuse=SHARE): W37 = weight_variable((3, 3, 3, 1, 2 * F)) output37shape = [batch_size, D, H, W, 1] Y37_left = conv3dt(Y36relu_left, W37, outputshape=output37shape, stride=2) Y37_right = conv3dt(Y36relu_right, W37, outputshape=output37shape, stride=2) return Y37_left, Y37_right
def conv_layer(input, r_field, input_c, out_c, nr): W = utils.weight_variable([r_field, r_field, input_c, out_c], name="W" + str(nr)) b = utils.bias_variable([out_c], name="b" + str(nr)) conv = utils.conv2d_basic(input, W, b, name="conv" + str(nr)) relu = tf.nn.relu(conv, name="relu" + str(nr)) return relu
def init_weights(self): """ Initialize all the trainable weights.""" #G_image conv_filt = (64, 64, 128) self.w_c0 = weight_variable((5, 5, self.depth, conv_filt[0])) self.b_c0 = bias_variable((conv_filt[0], )) self.w_c1 = weight_variable((3, 3, conv_filt[0], conv_filt[1])) self.b_c1 = bias_variable((conv_filt[1], )) self.w_c2 = weight_variable((3, 3, conv_filt[1], conv_filt[2])) self.b_c2 = bias_variable((conv_filt[2], )) self.flat_dim = (self.win_size - 8)**2 * conv_filt[2] self.w_x0 = weight_variable((self.flat_dim, self.hg_size)) self.b_x0 = bias_variable((self.hg_size, )) #G_loc self.w_l0 = weight_variable((self.loc_dim, self.hl_size)) self.b_l0 = bias_variable((self.hl_size, ))
def fcnLayer(x, keep_probability): w_size = 3 h_size = 3 layer_size = [128, 128, 64, 64, 64, 64, 64, 64, 64] # for i in range(len(layer_size)): x1 = singleLayer(x, w_size, h_size, layer_size[0], 1, keep_probability) x2 = singleLayer(x1, w_size, h_size, layer_size[1], 2, keep_probability) x3 = singleLayer(x2, w_size, h_size, layer_size[2], 3, keep_probability) x4 = singleLayer(x3, w_size, h_size, layer_size[3], 4, keep_probability) x5 = singleLayer(x4, w_size, h_size, layer_size[4], 5, keep_probability) # x6=singleLayer(x5,w_size,h_size,layer_size[5],6,keep_probability) # x7=singleLayer(x6,w_size,h_size,layer_size[6],7,keep_probability) # x8=singleLayer(x7,1,1,layer_size[7],8,keep_probability) # x9=singleLayer(x8,1,1,layer_size[8],9,keep_probability) # W_out = utils.weight_variable([1, 1, x6.get_shape()[3].value, 1], name='W_out') # b_out = utils.bias_variable([1], 'bias_out') # output = utils.conv2d(x6, W_out, b_out) # # W_out = utils.weight_variable([1, 1, x6.get_shape()[3].value, 1], name='W_out') # b_out = utils.bias_variable([1], 'bias_out') # output = utils.conv2d(x6, W_out, b_out) # W_out = utils.weight_variable([1, 1, x5.get_shape()[3].value, 1], name='W_out') b_out = utils.bias_variable([1], 'bias_out') output = utils.conv2d(x5, W_out, b_out, name='conv_out') # packet=[conv1,conv2,conv3,conv4,output] return output
def inference(self, flag): with tf.variable_scope('layers', reuse=flag) as layer_scope: with tf.variable_scope('stage1') as scope: kernel = utils.weight_variable([5, 5, 3, 32], name='weights') biases = utils.bias_variable([32], name='biases') conv1 = utils.conv2d(self.ph_image, kernel, biases) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') relu1 = tf.nn.relu(pool1, name=scope.name) with tf.variable_scope('stage2') as scope: kernel = utils.weight_variable([5, 5, 32, 32], name='weights') biases = utils.bias_variable([32], name='biases') conv2 = utils.conv2d(relu1, kernel, biases) relu2 = tf.nn.relu(conv2, name='relu2') pool2 = tf.nn.avg_pool(relu2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name=scope.name) with tf.variable_scope('stage3') as scope: kernel = utils.weight_variable([5, 5, 32, 64], name='weights') biases = utils.bias_variable([64], name='biases') conv3 = utils.conv2d(pool2, kernel, biases) relu3 = tf.nn.relu(conv3, name='relu3') pool3 = tf.nn.avg_pool(relu3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name=scope.name) dim = 1 for d in pool3.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool3, [-1, dim]) with tf.variable_scope('fc1') as scope: weights = utils.weight_variable([dim, 64], name='weights') biases = utils.bias_variable([64], name='biases') fc1 = tf.matmul(reshape, weights) + biases with tf.variable_scope('fc2') as scope: weights = utils.weight_variable([64, 10], name='weights') biases = utils.bias_variable([10], name='biases') fc2 = tf.matmul(fc1, weights) + biases return fc2
def __init__(self, input_dim, hidden_dim, n_layers=1, stddev=1., bias_value=0.0): # bias value will only be applied to the hidden layers self.input_dim = input_dim self.hidden_dim = hidden_dim self.hidden_layers = [] with tf.variable_scope('block_mlp'): self.w_in_var = weight_variable( (input_dim, input_dim * hidden_dim), stddev / np.sqrt(hidden_dim), name='w_in') self.w_out_var = weight_variable( (input_dim * hidden_dim, input_dim), stddev / np.sqrt(hidden_dim), name='w_out') mask = np.zeros((input_dim, input_dim * hidden_dim), dtype='float32') hid_to_hid_mask = np.zeros( (input_dim * hidden_dim, input_dim * hidden_dim), dtype='float32') self.bias_hid = bias_variable((hidden_dim * input_dim, ), value=bias_value, name='bias_first_hid') self.bias_out = bias_variable((input_dim, ), name='bias_out') for i, row in enumerate(mask): row[i * hidden_dim:(i + 1) * hidden_dim] = 1.0 for i in range(0, input_dim * hidden_dim, hidden_dim): hid_to_hid_mask[i:i + hidden_dim, i:i + hidden_dim] = 1.0 self.hid_to_hid_mask = tf.convert_to_tensor(hid_to_hid_mask) self.in_out_mask = tf.convert_to_tensor(mask) self.w_in = self.w_in_var * self.in_out_mask self.w_out = self.w_out_var * tf.transpose(self.in_out_mask) for i in range(n_layers - 1): with tf.variable_scope('layer_' + str(i)): w_hid = weight_variable( (input_dim * hidden_dim, input_dim * hidden_dim), stddev / np.sqrt(hidden_dim)) b_hid = bias_variable((hidden_dim * input_dim, ), value=bias_value) self.hidden_layers.append( (w_hid * self.hid_to_hid_mask, b_hid))
def convLayer(x, layer): W = utils.weight_variable([3, 3, x.get_shape()[3].value, 64], name='W%s' % layer) b = utils.bias_variable([64], 'bias%s' % layer) conv = utils.conv2d(x, W, b) conv = tf.nn.relu(conv) return conv
def _init_discriminator_variables(self): with tf.name_scope('discriminator'): with tf.name_scope('weights'): self._W_discr1 = weight_variable([5, 5, 3, 128]) self._W_discr2 = weight_variable([5, 5, 128, 256]) self._W_discr3 = weight_variable([5, 5, 256, 512]) if self.discr_whole_image: self._W_discr4 = weight_variable([5, 5, 512, 512]) self._W_dfc = weight_variable([4 * 4 * 512, 1]) with tf.name_scope('biases'): self._b_discr1 = bias_variable([128]) self._b_discr2 = bias_variable([256]) self._b_discr3 = bias_variable([512]) if self.discr_whole_image: self._b_discr4 = bias_variable([512]) self._b_dfc = bias_variable([1])
def _fully_connected_layer(self): """ Fully connected layer with 1024 neurons to allow processing of entire image. """ W_fc1 = utils.weight_variable([7 * 7 * 64, 1024]) b_fc1 = utils.bias_variable([1024]) h_pool2_flat = tf.reshape(self._pool_layer_2, [-1, 7*7*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) return h_fc1
def _pool_layer_2(self): """ Second pooling layer. """ W_conv2 = utils.weight_variable([5, 5, 32, 64]) b_conv2 = utils.bias_variable([64]) h_conv2 = tf.nn.relu(utils.conv2d(self._pool_layer_1, W_conv2) + b_conv2) h_pool2 = utils.max_pool_2x2(h_conv2) # image size is 7 x 7 return h_pool2
def classifier(self, x, phase_train, is_training=True, reuse=False): ''' The end part of the deep network which takes in the extracted features and classifies them into digits ''' with tf.variable_scope('classifier', reuse=reuse): with tf.name_scope('fc4'): W_fc2 = ut.weight_variable([256, 128], 'fc4_wt') b_fc2 = ut.bias_variable([128], 'fc4_bias') h_fc2 = tf.nn.relu(tf.matmul(x, W_fc2) + b_fc2) with tf.name_scope('fc5'): W_fc3 = ut.weight_variable([128, 4], 'fc5_wt') b_fc3 = ut.bias_variable([4], 'fc5 _bias') out = tf.matmul(h_fc2, W_fc3) + b_fc3 return out
def _value_network(self, state): """Builds the value network.""" w1 = weight_variable([3, 3, 1, self.NUM_CONV_1_FILTERS], name='w1') b1 = bias_variable([self.NUM_CONV_1_FILTERS]) conv1 = tf.nn.relu(conv2d(state, w1) + b1) w2 = weight_variable( [3, 3, self.NUM_CONV_1_FILTERS, self.NUM_CONV_2_FILTERS], name='w2') b2 = bias_variable([self.NUM_CONV_2_FILTERS]) conv2 = tf.nn.relu(conv2d(conv1, w2) + b2) value_flattened = layers.flatten(conv2) w3 = weight_variable([value_flattened.get_shape().as_list()[1], 1], name='w3') b3 = bias_variable([1]) value = tf.matmul(value_flattened, w3) + b3 return value
def __init__(self, input_layer, input_layer_size, num_classes, scope=None, **kwargs): if not hasattr(num_classes, "__len__"): num_classes = (num_classes, ) # All splits are done via half-spaces, so there are always 2^k-1 output # nodes. We handle non-power-of-two nodes by keeping track of the buffer # sizes vs. the actual multinomial dimensions. self._num_classes = num_classes self._dim_sizes = [2**(int(np.ceil(np.log2(c)))) for c in num_classes] self._num_nodes = np.prod( self._dim_sizes) - 1 # flatten the density into a 1-d grid self._split_labels, self._split_masks = self.multinomial_split_masks() with tf.variable_scope(scope or type(self).__name__): self._labels = tf.placeholder( tf.float32, shape=[None, np.prod(self._num_classes)]) W = weight_variable([input_layer_size, self._num_nodes]) b = bias_variable([self._num_nodes]) split_indices = tf.to_int32(tf.argmax(self._labels, 1)) splits, z = tf.gather(self._split_labels, split_indices), tf.gather( self._split_masks, split_indices) # q is the value of the tree nodes # m is the value of the multinomial bins self._q = tf.reciprocal(1 + tf.exp(-(tf.matmul(input_layer, W) + b))) r = splits * tf.log(tf.clip_by_value(self._q, 1e-10, 1.0)) s = (1 - splits) * tf.log(tf.clip_by_value(1 - self._q, 1e-10, 1.0)) self._loss_function = tf.reduce_mean( -tf.reduce_sum(z * (r + s), axis=[1])) # Convert from multiscale output to multinomial output L, R = self.multiscale_splits_masks() q_tiles = tf.constant([1, np.prod(self._num_classes)]) m = tf.map_fn( lambda q_i: self.multiscale_to_multinomial(q_i, L, R, q_tiles), self._q) # Reshape to the original dimensions of the density density_shape = tf.stack([tf.shape(self._q)[0]] + list(self._num_classes)) self._density = tf.reshape(m, density_shape) self._cross_entropy = tf.reduce_mean(-tf.reduce_sum( self._labels * tf.log(tf.clip_by_value(m, 1e-10, 1.0)) + (1 - self._labels) * tf.log(tf.clip_by_value(1 - m, 1e-10, 1.0)), axis=[1]))
def _pool_layer_1(self): """ First pooling layer. """ W_conv1 = utils.weight_variable([5, 5, 1, 32]) b_conv1 = utils.bias_variable([32]) x_image = tf.reshape(self.image, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu(utils.conv2d(x_image, W_conv1) + b_conv1) h_pool1 = utils.max_pool_2x2(h_conv1) # image size is 14 x 14 here return h_pool1
def bottleneck(x, layer, is_train): W1 = utils.weight_variable([1, 1, x.get_shape()[3].value, 64], name='W%s_1' % layer) conv1 = utils.resConv2d(x, W1) # conv1 = tf.layers.batch_normalization(conv1, training=is_train) conv1 = tf.nn.relu(conv1) W2 = utils.weight_variable([3, 3, conv1.get_shape()[3].value, 64], name='W%s_2' % layer) conv2 = utils.resConv2d(conv1, W2) # conv2 = tf.layers.batch_normalization(conv2, training=is_train) conv2 = tf.nn.relu(conv2) W3 = utils.weight_variable([1, 1, conv2.get_shape()[3].value, 64], name='W%s_3' % layer) conv3 = utils.resConv2d(conv2, W3) # conv3 = tf.layers.batch_normalization(conv3, training=is_train) return conv3
def VAE(input_shape=[None, 784], n_components_encoder=200, n_components_decoder=200, n_hidden=20, continuous=False, denoising=False, debug=False): # %% # Input placeholder if debug: input_shape = [50, 784] x = tf.Variable(np.zeros((input_shape), dtype=np.float32)) else: x = tf.placeholder(tf.float32, input_shape) print('* Input') print('X:', x.get_shape().as_list()) # %% # Optionally apply noise if denoising: print('* Denoising') x_noise = corrupt(x) else: x_noise = x if continuous: activation = lambda x: tf.log(1 + tf.exp(x)) else: activation = lambda x: tf.tanh(x) dims = x_noise.get_shape().as_list() n_features = dims[1] print('* Encoder') W_enc = weight_variable([n_features, n_components_encoder]) b_enc = bias_variable([n_components_encoder]) h_enc = activation(tf.matmul(x_noise, W_enc) + b_enc) print('in:', x_noise.get_shape().as_list(), 'W_enc:', W_enc.get_shape().as_list(), 'b_enc:', b_enc.get_shape().as_list(), 'h_enc:', h_enc.get_shape().as_list()) print('* Variational Autoencoder') W_mu = weight_variable([n_components_encoder, n_hidden]) b_mu = bias_variable([n_hidden]) W_log_sigma = weight_variable([n_components_encoder, n_hidden]) b_log_sigma = bias_variable([n_hidden]) z_mu = tf.matmul(h_enc, W_mu) + b_mu z_log_sigma = 0.5 * (tf.matmul(h_enc, W_log_sigma) + b_log_sigma) print('in:', h_enc.get_shape().as_list(), 'W_mu:', W_mu.get_shape().as_list(), 'b_mu:', b_mu.get_shape().as_list(), 'z_mu:', z_mu.get_shape().as_list()) print('in:', h_enc.get_shape().as_list(), 'W_log_sigma:', W_log_sigma.get_shape().as_list(), 'b_log_sigma:', b_log_sigma.get_shape().as_list(), 'z_log_sigma:', z_log_sigma.get_shape().as_list()) # %% # Sample from noise distribution p(eps) ~ N(0, 1) if debug: epsilon = tf.random_normal( [dims[0], n_hidden]) else: epsilon = tf.random_normal( tf.pack([tf.shape(x)[0], n_hidden])) print('epsilon:', epsilon.get_shape().as_list()) # Sample from posterior z = z_mu + tf.exp(z_log_sigma) * epsilon print('z:', z.get_shape().as_list()) print('* Decoder') W_dec = weight_variable([n_hidden, n_components_decoder]) b_dec = bias_variable([n_components_decoder]) h_dec = activation(tf.matmul(z, W_dec) + b_dec) print('in:', z.get_shape().as_list(), 'W_dec:', W_dec.get_shape().as_list(), 'b_dec:', b_dec.get_shape().as_list(), 'h_dec:', h_dec.get_shape().as_list()) W_mu_dec = weight_variable([n_components_decoder, n_features]) b_mu_dec = bias_variable([n_features]) y = tf.nn.sigmoid(tf.matmul(h_dec, W_mu_dec) + b_mu_dec) print('in:', z.get_shape().as_list(), 'W_mu_dec:', W_mu_dec.get_shape().as_list(), 'b_mu_dec:', b_mu_dec.get_shape().as_list(), 'y:', y.get_shape().as_list()) W_log_sigma_dec = weight_variable([n_components_decoder, n_features]) b_log_sigma_dec = bias_variable([n_features]) y_log_sigma = 0.5 * ( tf.matmul(h_dec, W_log_sigma_dec) + b_log_sigma_dec) print('in:', z.get_shape().as_list(), 'W_log_sigma_dec:', W_log_sigma_dec.get_shape().as_list(), 'b_log_sigma_dec:', b_log_sigma_dec.get_shape().as_list(), 'y_log_sigma:', y_log_sigma.get_shape().as_list()) # p(x|z) if continuous: log_px_given_z = tf.reduce_sum( -(0.5 * tf.log(2.0 * np.pi) + y_log_sigma) - 0.5 * tf.square((x - y) / tf.exp(y_log_sigma))) else: log_px_given_z = tf.reduce_sum( x * tf.log(y) + (1 - x) * tf.log(1 - y)) # d_kl(q(z|x)||p(z)) # Appendix B: 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) kl_div = 0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma)) print('* Output') print('Y:', y.get_shape().as_list()) loss = -(log_px_given_z + kl_div) return {'cost': loss, 'x': x, 'z': z, 'y': y}
def VAE(input_shape=[None, 784], n_filters=[], filter_sizes=[], n_hidden=512, n_code=64, activation=tf.nn.relu, denoising=False, convolutional=False, debug=False): # %% # Input placeholder if debug: input_shape = [50, 784] x = tf.Variable(np.zeros((input_shape), dtype=np.float32)) else: x = tf.placeholder(tf.float32, input_shape) # %% # Optionally apply denoising autoencoder if denoising: x_noise = corrupt(x) else: x_noise = x # %% # ensure 2-d is converted to square tensor. if convolutional: if len(x.get_shape()) == 2: x_dim = np.sqrt(x_noise.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape( x_noise, [-1, x_dim, x_dim, 1]) elif len(x_noise.get_shape()) == 4: x_tensor = x_noise else: raise ValueError('Unsupported input dimensions') else: x_tensor = x current_input = x_tensor print('* Input') print('X:', current_input.get_shape().as_list()) # %% # Build the encoder shapes = [] print('* Encoder') for layer_i, n_input in enumerate(n_filters[:-1]): n_output = n_filters[layer_i + 1] shapes.append(current_input.get_shape().as_list()) if convolutional: n_input = shapes[-1][3] W = weight_variable([ filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output]) b = bias_variable([n_output]) output = activation( tf.add(tf.nn.conv2d( current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) else: W = weight_variable([n_input, n_output]) b = bias_variable([n_output]) output = activation(tf.matmul(current_input, W) + b) print('in:', current_input.get_shape().as_list(), 'W:', W.get_shape().as_list(), 'b:', b.get_shape().as_list(), 'out:', output.get_shape().as_list()) current_input = output dims = current_input.get_shape().as_list() if convolutional: # %% # Flatten and build latent layer as means and standard deviations size = (dims[1] * dims[2] * dims[3]) if debug: flattened = tf.reshape(current_input, [dims[0], size]) else: flattened = tf.reshape(current_input, tf.pack([tf.shape(x)[0], size])) else: size = dims[1] flattened = current_input print('* Reshape') print(current_input.get_shape().as_list(), '->', flattened.get_shape().as_list()) print('* FC Layer') W_fc = weight_variable([size, n_hidden]) b_fc = bias_variable([n_hidden]) h = tf.nn.tanh(tf.matmul(flattened, W_fc) + b_fc) print('in:', current_input.get_shape().as_list(), 'W_fc:', W_fc.get_shape().as_list(), 'b_fc:', b_fc.get_shape().as_list(), 'h:', h.get_shape().as_list()) print('* Variational Autoencoder') W_mu = weight_variable([n_hidden, n_code]) b_mu = bias_variable([n_code]) W_sigma = weight_variable([n_hidden, n_code]) b_sigma = bias_variable([n_code]) mu = tf.matmul(h, W_mu) + b_mu log_sigma = tf.mul(0.5, tf.matmul(h, W_sigma) + b_sigma) print('in:', h.get_shape().as_list(), 'W_mu:', W_mu.get_shape().as_list(), 'b_mu:', b_mu.get_shape().as_list(), 'mu:', mu.get_shape().as_list()) print('in:', h.get_shape().as_list(), 'W_sigma:', W_sigma.get_shape().as_list(), 'b_sigma:', b_sigma.get_shape().as_list(), 'log_sigma:', log_sigma.get_shape().as_list()) # %% # Sample from noise distribution p(eps) ~ N(0, 1) if debug: epsilon = tf.random_normal( [dims[0], n_code]) else: epsilon = tf.random_normal( tf.pack([tf.shape(x)[0], n_code])) print('epsilon:', epsilon.get_shape().as_list()) # Sample from posterior z = mu + tf.mul(epsilon, tf.exp(log_sigma)) print('z:', z.get_shape().as_list()) print('* Decoder') W_dec = weight_variable([n_code, n_hidden]) b_dec = bias_variable([n_hidden]) h_dec = tf.nn.relu(tf.matmul(z, W_dec) + b_dec) print('in:', z.get_shape().as_list(), 'W_dec:', W_dec.get_shape().as_list(), 'b_dec:', b_dec.get_shape().as_list(), 'h_dec:', h_dec.get_shape().as_list()) W_fc_t = weight_variable([n_hidden, size]) b_fc_t = bias_variable([size]) h_fc_dec = tf.nn.relu(tf.matmul(h_dec, W_fc_t) + b_fc_t) print('in:', h_dec.get_shape().as_list(), 'W_fc_t:', W_fc_t.get_shape().as_list(), 'b_fc_t:', b_fc_t.get_shape().as_list(), 'h_fc_dec:', h_fc_dec.get_shape().as_list()) if convolutional: if debug: h_tensor = tf.reshape( h_fc_dec, [dims[0], dims[1], dims[2], dims[3]]) else: h_tensor = tf.reshape( h_fc_dec, tf.pack([tf.shape(x)[0], dims[1], dims[2], dims[3]])) else: h_tensor = h_fc_dec shapes.reverse() n_filters.reverse() print('* Reshape') print(h_fc_dec.get_shape().as_list(), '->', h_tensor.get_shape().as_list()) ## %% ## Decoding layers current_input = h_tensor for layer_i, n_output in enumerate(n_filters[:-1][::-1]): n_input = n_filters[layer_i] n_output = n_filters[layer_i + 1] shape = shapes[layer_i] if convolutional: W = weight_variable([ filter_sizes[layer_i], filter_sizes[layer_i], n_output, n_input]) b = bias_variable([n_output]) if debug: output = activation(tf.add( tf.nn.deconv2d( current_input, W, shape, strides=[1, 2, 2, 1], padding='SAME'), b)) else: output = activation(tf.add( tf.nn.deconv2d( current_input, W, tf.pack( [tf.shape(x)[0], shape[1], shape[2], shape[3]]), strides=[1, 2, 2, 1], padding='SAME'), b)) else: W = weight_variable([n_input, n_output]) b = bias_variable([n_output]) output = activation(tf.matmul(current_input, W) + b) print('in:', current_input.get_shape().as_list(), 'W:', W.get_shape().as_list(), 'b:', b.get_shape().as_list(), 'out:', output.get_shape().as_list()) current_input = output # %% # Now have the reconstruction through the network y_tensor = current_input y = tf.reshape(y_tensor, tf.pack([tf.shape(x)[0], input_shape[1]])) print('* Output') print('Y:', y_tensor.get_shape().as_list()) # %% # Log Prior: D_KL(q(z|x)||p(z)) # Equation 10 prior_loss = 0.5 * tf.reduce_sum( 1.0 + 2.0 * log_sigma - tf.pow(mu, 2.0) - tf.exp(2.0 * log_sigma)) # Reconstruction Cost recon_loss = tf.reduce_sum(tf.abs(y_tensor - x_tensor)) # Total cost loss = recon_loss - prior_loss # log_px_given_z = normal2(x, mu, log_sigma) # loss = (log_pz + log_px_given_z - log_qz_given_x).sum() return {'cost': loss, 'x': x, 'z': z, 'y': y}
def VAE(input_shape=[None, 784], n_components_encoder=2048, n_components_decoder=2048, n_hidden=2, debug=False): # %% # Input placeholder if debug: input_shape = [50, 784] x = tf.Variable(np.zeros((input_shape), dtype=np.float32)) else: x = tf.placeholder(tf.float32, input_shape) activation = tf.nn.softplus dims = x.get_shape().as_list() n_features = dims[1] W_enc1 = weight_variable([n_features, n_components_encoder]) b_enc1 = bias_variable([n_components_encoder]) h_enc1 = activation(tf.matmul(x, W_enc1) + b_enc1) W_enc2 = weight_variable([n_components_encoder, n_components_encoder]) b_enc2 = bias_variable([n_components_encoder]) h_enc2 = activation(tf.matmul(h_enc1, W_enc2) + b_enc2) W_enc3 = weight_variable([n_components_encoder, n_components_encoder]) b_enc3 = bias_variable([n_components_encoder]) h_enc3 = activation(tf.matmul(h_enc2, W_enc3) + b_enc3) W_mu = weight_variable([n_components_encoder, n_hidden]) b_mu = bias_variable([n_hidden]) W_log_sigma = weight_variable([n_components_encoder, n_hidden]) b_log_sigma = bias_variable([n_hidden]) z_mu = tf.matmul(h_enc3, W_mu) + b_mu z_log_sigma = 0.5 * (tf.matmul(h_enc3, W_log_sigma) + b_log_sigma) # %% # Sample from noise distribution p(eps) ~ N(0, 1) if debug: epsilon = tf.random_normal( [dims[0], n_hidden]) else: epsilon = tf.random_normal( tf.pack([tf.shape(x)[0], n_hidden])) # Sample from posterior z = z_mu + tf.exp(z_log_sigma) * epsilon W_dec1 = weight_variable([n_hidden, n_components_decoder]) b_dec1 = bias_variable([n_components_decoder]) h_dec1 = activation(tf.matmul(z, W_dec1) + b_dec1) W_dec2 = weight_variable([n_components_decoder, n_components_decoder]) b_dec2 = bias_variable([n_components_decoder]) h_dec2 = activation(tf.matmul(h_dec1, W_dec2) + b_dec2) W_dec3 = weight_variable([n_components_decoder, n_components_decoder]) b_dec3 = bias_variable([n_components_decoder]) h_dec3 = activation(tf.matmul(h_dec2, W_dec3) + b_dec3) W_mu_dec = weight_variable([n_components_decoder, n_features]) b_mu_dec = bias_variable([n_features]) y = tf.nn.tanh(tf.matmul(h_dec3, W_mu_dec) + b_mu_dec) # p(x|z) log_px_given_z = -tf.reduce_sum( x * tf.log(y + 1e-10) + (1 - x) * tf.log(1 - y + 1e-10), 1) # d_kl(q(z|x)||p(z)) # Appendix B: 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) kl_div = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) loss = tf.reduce_mean(log_px_given_z + kl_div) return {'cost': loss, 'x': x, 'z': z, 'y': y}
def VAE(input_shape=[None, 784], n_filters=[1, 64, 128, 128], filter_sizes=[3, 3, 3, 3], n_hidden=512, n_code=2, activation=tf.nn.relu, convolutional=True, debug=False): # %% # Input placeholder if debug: input_shape = [50, 784] x = tf.Variable(np.zeros((input_shape), dtype=np.float32)) else: x = tf.placeholder(tf.float32, input_shape, 'x') # %% # ensure 2-d is converted to square tensor. if convolutional: if len(x.get_shape()) == 2: x_dim = np.sqrt(x.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape( x, [-1, x_dim, x_dim, 1]) elif len(x.get_shape()) == 4: x_tensor = x else: raise ValueError('Unsupported input dimensions') else: x_tensor = x current_input = x_tensor print('* Input') print('X:', current_input.get_shape().as_list()) # %% # Build the encoder shapes = [] print('* Encoder') for layer_i, n_input in enumerate(n_filters[:-1]): n_output = n_filters[layer_i + 1] shapes.append(current_input.get_shape().as_list()) if convolutional: n_input = shapes[-1][3] W = weight_variable([ filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output]) b = bias_variable([n_output]) output = activation( tf.add(tf.nn.conv2d( current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) else: W = weight_variable([n_input, n_output]) b = bias_variable([n_output]) output = activation(tf.matmul(current_input, W) + b) print('in:', current_input.get_shape().as_list(), 'W:', W.get_shape().as_list(), 'b:', b.get_shape().as_list(), 'out:', output.get_shape().as_list()) current_input = output dims = current_input.get_shape().as_list() if convolutional: # %% # Flatten and build latent layer as means and standard deviations size = (dims[1] * dims[2] * dims[3]) flattened = tf.reshape(current_input, [dims[0], size] if debug else tf.pack([tf.shape(current_input)[0], size])) else: size = dims[1] flattened = current_input print('* Reshape') print(current_input.get_shape().as_list(), '->', flattened.get_shape().as_list()) print('* FC Layer') W_fc = weight_variable([size, n_hidden]) b_fc = bias_variable([n_hidden]) h = activation(tf.matmul(flattened, W_fc) + b_fc) print('in:', current_input.get_shape().as_list(), 'W_fc:', W_fc.get_shape().as_list(), 'b_fc:', b_fc.get_shape().as_list(), 'h:', h.get_shape().as_list()) print('* Variational Autoencoder') W_mu = weight_variable([n_hidden, n_code]) b_mu = bias_variable([n_code]) W_sigma = weight_variable([n_hidden, n_code]) b_sigma = bias_variable([n_code]) z_mu = tf.matmul(h, W_mu) + b_mu z_log_sigma = 0.5 * tf.matmul(h, W_sigma) + b_sigma print('in:', h.get_shape().as_list(), 'W_mu:', W_mu.get_shape().as_list(), 'b_mu:', b_mu.get_shape().as_list(), 'mu:', z_mu.get_shape().as_list()) print('in:', h.get_shape().as_list(), 'W_sigma:', W_sigma.get_shape().as_list(), 'b_sigma:', b_sigma.get_shape().as_list(), 'log_sigma:', z_log_sigma.get_shape().as_list()) # %% # Sample from noise distribution p(eps) ~ N(0, 1) if debug: epsilon = tf.random_normal( [dims[0], n_code]) else: epsilon = tf.random_normal( tf.pack([tf.shape(x)[0], n_code])) # Sample from posterior z = z_mu + tf.mul(epsilon, tf.exp(z_log_sigma)) print('z:', z.get_shape().as_list()) print('* Decoder') W_dec = weight_variable([n_code, n_hidden]) b_dec = bias_variable([n_hidden]) h_dec = activation(tf.matmul(z, W_dec) + b_dec) print('in:', z.get_shape().as_list(), 'W_dec:', W_dec.get_shape().as_list(), 'b_dec:', b_dec.get_shape().as_list(), 'h_dec:', h_dec.get_shape().as_list()) W_fc_t = weight_variable([n_hidden, size]) b_fc_t = bias_variable([size]) h_fc_dec = activation(tf.matmul(h_dec, W_fc_t) + b_fc_t) print('in:', h_dec.get_shape().as_list(), 'W_fc_t:', W_fc_t.get_shape().as_list(), 'b_fc_t:', b_fc_t.get_shape().as_list(), 'h_fc_dec:', h_fc_dec.get_shape().as_list()) if convolutional: h_tensor = tf.reshape( h_fc_dec, [dims[0], dims[1], dims[2], dims[3]] if debug else tf.pack([tf.shape(h_dec)[0], dims[1], dims[2], dims[3]])) else: h_tensor = h_fc_dec shapes.reverse() n_filters.reverse() print('* Reshape') print(h_fc_dec.get_shape().as_list(), '->', h_tensor.get_shape().as_list()) # %% # Decoding layers current_input = h_tensor for layer_i, n_output in enumerate(n_filters[:-1][::-1]): n_input = n_filters[layer_i] n_output = n_filters[layer_i + 1] shape = shapes[layer_i] if convolutional: W = weight_variable([ filter_sizes[layer_i], filter_sizes[layer_i], n_output, n_input]) b = bias_variable([n_output]) output = activation(tf.add( tf.nn.conv2d_transpose( current_input, W, shape if debug else tf.pack( [tf.shape(current_input)[0], shape[1], shape[2], shape[3]]), strides=[1, 2, 2, 1], padding='SAME'), b)) else: W = weight_variable([n_input, n_output]) b = bias_variable([n_output]) output = activation(tf.matmul(current_input, W) + b) print('in:', current_input.get_shape().as_list(), 'W:', W.get_shape().as_list(), 'b:', b.get_shape().as_list(), 'out:', output.get_shape().as_list()) current_input = output dec_flat = tf.reshape( current_input, tf.pack([tf.shape(current_input)[0], input_shape[1]])) # %% # An extra fc layer and nonlinearity W_fc_final = weight_variable([input_shape[1], input_shape[1]]) b_fc_final = bias_variable([input_shape[1]]) y = tf.nn.sigmoid(tf.matmul(dec_flat, W_fc_final) + b_fc_final) # p(x|z) log_px_given_z = -tf.reduce_sum( x * tf.log(y + 1e-10) + (1 - x) * tf.log(1 - y + 1e-10), 1) # d_kl(q(z|x)||p(z)) # Appendix B: 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) kl_div = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) loss = tf.reduce_mean(log_px_given_z + kl_div) return {'cost': loss, 'x': x, 'z': z, 'y': y}