def decoder(input): # Create a deconv network with 1 FC layer and 3 deconv layers # FC: output dim: 128, relu fc = layers.fc(input,name='dfc',out_dim=128) dfc=tf.reshape(fc, [-1, 4, 4, 8]) # Deconv 1: filter: [3, 3, 8], stride: [2, 2], relu dconv1=layers.deconv(dfc,name='deconv1',filter_dims=[3,3,8],stride_dims=[2,2],padding='SAME') print("dconv1 shape", dconv1.get_shape().as_list()) # Deconv 2: filter: [8, 8, 1], stride: [2, 2], padding: valid, relu dconv2=layers.deconv(dconv1,name = 'deconv2',filter_dims=[8,8,1],stride_dims= [2,2],padding='VALID') print("dconv2 shape", dconv2.get_shape().as_list()) # Deconv 3: filter: [7, 7, 1], stride: [1, 1], padding: valid, sigmoid dconv3=layers.deconv(dconv2,name='deconv3',filter_dims=[7,7,1],stride_dims=[1,1],padding='VALID',non_linear_fn=tf.nn.sigmoid) print("dconv3 shape", dconv3.get_shape().as_list()) return dconv3 raise NotImplementedError
def decoder_network(latent, anchor_layer=None, activation='swish', scope='g_decoder_network', bn_phaze=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid #l = tf.cond(bn_phaze, lambda: latent, lambda: make_multi_modal_noise(8)) l = tf.cond(bn_phaze, lambda: latent, lambda: latent) l = layers.fc(l, 6*6*32, non_linear_fn=act_func) print('decoder input:', str(latent.get_shape().as_list())) l = tf.reshape(l, shape=[-1, 6, 6, 32]) l = add_residual_block(l, filter_dims=[3, 3, g_dense_block_depth*4], num_layers=4, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_0') print('block 0:', str(l.get_shape().as_list())) l = layers.batch_norm_conv(l, b_train=bn_phaze, scope='bn1') l = act_func(l) # 12 x 12 l = layers.deconv(l, b_size=batch_size, scope='g_dec_deconv1', filter_dims=[3, 3, g_dense_block_depth * 3], stride_dims=[2, 2], padding='SAME', non_linear_fn=None) print('deconv1:', str(l.get_shape().as_list())) l = add_residual_block(l, filter_dims=[3, 3, g_dense_block_depth * 3], num_layers=4, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_1', use_dilation=True) l = layers.batch_norm_conv(l, b_train=bn_phaze, scope='bn2') l = act_func(l) # 24 x 24 l = layers.deconv(l, b_size=batch_size, scope='g_dec_deconv2', filter_dims=[3, 3, g_dense_block_depth * 2], stride_dims=[2, 2], padding='SAME', non_linear_fn=None) print('deconv2:', str(l.get_shape().as_list())) l = add_residual_block(l, filter_dims=[3, 3, g_dense_block_depth * 2], num_layers=4, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_2', use_dilation=True) l = layers.batch_norm_conv(l, b_train=bn_phaze, scope='bn3') l = act_func(l) # 48 x 48 l = layers.deconv(l, b_size=batch_size, scope='g_dec_deconv3', filter_dims=[3, 3, g_dense_block_depth], stride_dims=[2, 2], padding='SAME', non_linear_fn=None) print('deconv3:', str(l.get_shape().as_list())) l = add_residual_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=4, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_3', use_dilation=True) l = layers.batch_norm_conv(l, b_train=bn_phaze, scope='bn4') l = act_func(l) l = layers.self_attention(l, g_dense_block_depth, act_func=act_func) if anchor_layer is not None: l = tf.concat([l, anchor_layer], axis=3) # 96 x 96 l = layers.deconv(l, b_size=batch_size, scope='g_dec_deconv4', filter_dims=[3, 3, g_dense_block_depth], stride_dims=[2, 2], padding='SAME', non_linear_fn=None) l = add_residual_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=2, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_4', use_dilation=True) l = layers.add_dense_transition_layer(l, filter_dims=[1, 1, 3], act_func=act_func, scope='dense_transition_1', bn_phaze=bn_phaze, use_pool=False) l = add_residual_block(l, filter_dims=[3, 3, 3], num_layers=2, act_func=act_func, bn_phaze=bn_phaze, use_residual=False, scope='block_5', use_dilation=True) l = tf.nn.tanh(l) print('final:', str(l.get_shape().as_list())) return l
def translator(x, activation='relu', scope='translator', norm='layer', use_upsample=False, b_train=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if activation == 'swish': act_func = util.swish elif activation == 'relu': act_func = tf.nn.relu elif activation == 'lrelu': act_func = tf.nn.leaky_relu else: act_func = tf.nn.sigmoid bottleneck_width = 64 bottleneck_itr = 9 num_iter = input_width // bottleneck_width num_iter = int(np.sqrt(num_iter)) print('Translator Input: ' + str(x.get_shape().as_list())) block_depth = dense_block_depth l = layers.conv(x, scope='conv_init', filter_dims=[7, 7, block_depth], stride_dims=[1, 1], non_linear_fn=None, bias=False) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_init') l = act_func(l) for i in range(num_iter): print('Translator Block ' + str(i) + ': ' + str(l.get_shape().as_list())) block_depth = block_depth * 2 l = layers.conv(l, scope='tr' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[2, 2], non_linear_fn=None) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='norm_' + str(i)) l = act_func(l) for i in range(bottleneck_itr): print('Bottleneck Block : ' + str(l.get_shape().as_list())) l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='bt_block_' + str(i)) for i in range(num_iter): block_depth = block_depth // 2 if use_upsample is True: w = l.get_shape().as_list()[2] h = l.get_shape().as_list()[1] # l = tf.image.resize_bilinear(l, (2 * h, 2 * w)) l = tf.image.resize_bicubic(l, (2 * h, 2 * w)) # l = tf.image.resize_nearest_neighbor(l, (2 * h, 2 * w)) l = layers.conv(l, scope='up_' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[1, 1], non_linear_fn=None) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='up_norm_' + str(i)) l = act_func(l) print('Upsampling ' + str(i) + ': ' + str(l.get_shape().as_list())) for j in range(2): l = layers.add_residual_block(l, filter_dims=[3, 3, block_depth], num_layers=2, act_func=act_func, norm=norm, b_train=b_train, scope='block_' + str(i) + '_' + str(j)) else: l = layers.deconv(l, b_size=l.get_shape().as_list()[0], scope='deconv_' + str(i), filter_dims=[3, 3, block_depth], stride_dims=[2, 2], padding='SAME', non_linear_fn=None) print('Deconvolution ' + str(i) + ': ' + str(l.get_shape().as_list())) l = layers.conv_normalize(l, norm=norm, b_train=b_train, scope='deconv_norm_' + str(i)) l = act_func(l) l = layers.conv(l, scope='last', filter_dims=[7, 7, num_channel], stride_dims=[1, 1], non_linear_fn=tf.nn.tanh, bias=False) print('Translator Final: ' + str(l.get_shape().as_list())) return l
def __init__(self, image_size=64, z_dim=100, conv_dim=64): super(Generator, self).__init__() layer1 = [] layer2 = [] layer3 = [] layer4 = [] output = [] # layer 1 layer_num = int(np.log2(image_size)) - 3 # 3 mult = 2**layer_num # 8 output_dim = conv_dim * mult # 512 # 100 -> 512 layer1.append(spectral_norm(deconv(z_dim, output_dim, kernel_size=4))) layer1.append(batch_norm(output_dim)) layer1.append(lrelu()) # layer 2 input_dim = output_dim output_dim = int(input_dim / 2) # 512 -> 256 layer2.append( spectral_norm( deconv(input_dim, output_dim, kernel_size=4, stride=2, padding=1))) layer2.append(batch_norm(output_dim)) layer2.append(lrelu()) # layer 3 input_dim = output_dim output_dim = int(input_dim / 2) # 256 -> 128 layer3.append( spectral_norm( deconv(input_dim, output_dim, kernel_size=4, stride=2, padding=1))) layer3.append(batch_norm(output_dim)) layer3.append(lrelu()) # layer 4 input_dim = output_dim output_dim = int(input_dim / 2) # 128 -> 64 layer4.append( spectral_norm( deconv(input_dim, output_dim, kernel_size=4, stride=2, padding=1))) layer4.append(batch_norm(output_dim)) layer4.append(lrelu()) # output layer input_dim = output_dim # 64 -> 3 output.append( deconv(input_dim, out_channels=3, kernel_size=4, stride=2, padding=1)) output.append(tanh()) self.l1 = nn.Sequential(*layer1) self.l2 = nn.Sequential(*layer2) self.l3 = nn.Sequential(*layer3) self.attn1 = SelfAttn(128) self.l4 = nn.Sequential(*layer4) self.attn2 = SelfAttn(64) self.output = nn.Sequential(*output)
def generator(z, y, is_training=True, update_batch_stats=True, act_fn=L.lrelu, bn=FLAGS.gen_bn, reuse=True, dropout=FLAGS.gen_dropout): with tf.variable_scope('generator', reuse=reuse): if FLAGS.method == "cgan": inputs = tf.concat(axis=1, values=[z, y]) h = L.fc(inputs, Z_dim + y_dim, ((X_dim / 4)**2) * 128, seed=rng.randint(123456), name='fc1') else: h = L.fc(z, Z_dim, ((X_dim / 4)**2) * 128, seed=rng.randint(123456), name='fc1') h =, ((X_dim / 4)**2) * 128, is_training=is_training, update_batch_stats=update_batch_stats, use_gamma=False, name='bn1') if bn else h h = act_fn(h) h = tf.reshape(h, [-1, X_dim / 4, X_dim / 4, 128]) # 16x16 -> 32x32 h = L.deconv(h, ksize=2, stride=2, f_in=128, f_out=64, name="deconv1") h = L.conv(h, 5, 1, 64, 64, name="conv1") h =, 64, is_training=is_training, update_batch_stats=update_batch_stats, use_gamma=False, name='bn2') if bn else h h = tf.nn.dropout(h, keep_prob=0.5) if dropout else h h = act_fn(h) h = L.conv(h, 3, 1, 64, 64, name="conv2") h =, 64, is_training=is_training, update_batch_stats=update_batch_stats, use_gamma=False, name='b3') if bn else h h = tf.nn.dropout(h, keep_prob=0.5) if dropout else h h = act_fn(h) # 32x32 -> 64x64 h = L.deconv(h, ksize=2, stride=2, f_in=64, f_out=32, name="deconv2") h = L.conv(h, 5, 1, 32, 32, name="conv3") h =, 32, is_training=is_training, update_batch_stats=update_batch_stats, use_gamma=False, name='b4') h = tf.nn.dropout(h, keep_prob=0.5) if dropout else h h = act_fn(h) h = L.conv(h, 5, 1, 32, num_channels, name="conv4") h = tf.nn.tanh(h, name="output") return h
def create_generator(self, room): layers = [] # encoder_1: [batch, 32, 32, in_channels] => [batch, 16, 16, ngf] with tf.variable_scope("encoder_1"): output = tf.layers.conv2d(room, filters=self.opt.ngf, kernel_size=2, strides=2, padding='valid') layers.append(output) layer_specs = [ self.opt.ngf * 2, # encoder_2: [batch, 16, 16, ngf] => [batch, 8, 8, ngf * 2] self.opt.ngf * 4, # encoder_3: [batch, 8, 8, ngf * 2] => [batch, 4, 4, ngf * 4] self.opt.ngf * 8, # encoder_4: [batch, 4, 4, ngf * 4] => [batch, 2, 2, ngf * 8] self.opt.ngf * 16, ] for out_channels in layer_specs: with tf.variable_scope("encoder_%d" % (len(layers) + 1)): rectified = lrelu(layers[-1], 0.2) # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] convolved = tf.layers.conv2d(rectified, filters=out_channels, kernel_size=2, strides=2, padding='valid') output = tf.layers.batch_normalization(convolved) layers.append(output) layer_specs = [ (self.opt.ngf * 8, 0.1), (self.opt.ngf * 4, 0.1), # decoder_8: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 4] (self.opt.ngf * 2, 0.1), # decoder_7: [batch, 4, 4, ngf * 4 * 2] => [batch, 8, 8, ngf * 2] (self.opt.ngf * 1, 0.1), # decoder_6: [batch, 8, 8, ngf * 2 * 2] => [batch, 16, 16, ngf * 1] ] num_encoder_layers = len(layers) for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): skip_layer = num_encoder_layers - decoder_layer - 1 with tf.variable_scope("decoder_%d" % (skip_layer + 1)): if decoder_layer == 0: # first decoder layer doesn't have skip connections # since it is directly connected to the skip_layer input = layers[-1] else: input = tf.concat([layers[-1], layers[skip_layer]], axis=3) rectified = tf.nn.relu(input) # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] output = deconv(rectified, out_channels) output = tf.layers.batch_normalization(output) if dropout > 0.0: output = tf.nn.dropout(output, keep_prob=1 - dropout) layers.append(output) # decoder_1: [batch, 16, 16, ngf * 2] => [batch, 32, 32, generator_outputs_channels] with tf.variable_scope("decoder_1"): input = tf.concat([layers[-1], layers[0]], axis=3) rectified = tf.nn.relu(input) output = deconv(rectified, self.depth) category = output[:, :, :, :self.depth - ROTATION_COUNT] category_output = tf.nn.softmax(category) rotation = output[:, :, :, self.depth - ROTATION_COUNT:] rotation_output = tf.nn.softmax(rotation) final_output = tf.concat([category_output, rotation_output], axis=3) layers.append(final_output) return layers[-1], category, rotation
def autoencoder(x, zca, is_training=True, update_batch_stats=True, stochastic=True, seed=1234, use_zca=True): if is_training: scope = tf.name_scope("Training") else: scope = tf.name_scope("Testing") with scope: #Initial shape (-1, 32, 32, 3) x = x + 0.5 #Recover [0,1] range if use_zca: h = zca else: h = x print(h.shape) rng = np.random.RandomState(seed) #h = tf.map_fn(lambda x:transform(x),h) #(1) conv + relu + maxpool (-1, 16, 16, 64) h = L.conv(h, ksize=3, stride=1, f_in=3, f_out=64, seed=rng.randint(123456), padding="SAME", name='conv1') h = L.lrelu(, 64, is_training=is_training, update_batch_stats=update_batch_stats, name='conv1_bn'), FLAGS.lrelu_a) h = L.max_pool(h, ksize=2, stride=2) #(2) conv + relu + maxpool (-1, 8, 8, 32) h = L.conv(h, ksize=3, stride=1, f_in=64, f_out=32, seed=rng.randint(123456), padding="SAME", name='conv2') h = L.lrelu(, 32, is_training=is_training, update_batch_stats=update_batch_stats, name='conv2_bn'), FLAGS.lrelu_a) h = L.max_pool(h, ksize=2, stride=2) #(3) conv + relu + maxpool (-1, 4, 4, 16) h = L.conv(h, ksize=3, stride=1, f_in=32, f_out=16, seed=rng.randint(123456), padding="SAME", name='conv3') h = L.lrelu(, 16, is_training=is_training, update_batch_stats=update_batch_stats, name='conv3_bn'), FLAGS.lrelu_a) h = L.max_pool(h, ksize=2, stride=2) encoded = h #(4) deconv + relu (-1, 8, 8, 16) h = L.deconv(encoded, ksize=5, stride=1, f_in=16, f_out=16, seed=rng.randint(123456), padding="SAME", name="deconv1") h = L.lrelu(, 16, is_training=is_training, update_batch_stats=update_batch_stats, name='deconv1_bn'), FLAGS.lrelu_a) #(5) deconv + relu (-1, 16, 16, 32) h = L.deconv(h, ksize=5, stride=1, f_in=16, f_out=32, padding="SAME", name="deconv2") h = L.lrelu(, 32, is_training=is_training, update_batch_stats=update_batch_stats, name='deconv2_bn'), FLAGS.lrelu_a) #(5) deconv + relu (-1, 32, 32, 64) h = L.deconv(h, ksize=5, stride=1, f_in=32, f_out=64, padding="SAME", name="deconv3") h = L.lrelu(, 64, is_training=is_training, update_batch_stats=update_batch_stats, name='deconv3_bn'), FLAGS.lrelu_a) #(7) conv + sigmoid (-1, 32, 32, 3) h = L.conv(h, ksize=3, stride=1, f_in=64, f_out=3, seed=rng.randint(123456), padding="SAME", name='convfinal') if use_zca: h =, 3, is_training=is_training, update_batch_stats=update_batch_stats, name='deconv4_bn') else: h = tf.sigmoid(h) num_samples = 10 sample_og_zca = tf.reshape( tf.slice(zca, [0, 0, 0, 0], [num_samples, 32, 32, 3]), (num_samples * 32, 32, 3)) sample_og_color = tf.reshape( tf.slice(x, [0, 0, 0, 0], [num_samples, 32, 32, 3]), (num_samples * 32, 32, 3)) sample_rec = tf.reshape( tf.slice(h, [0, 0, 0, 0], [num_samples, 32, 32, 3]), (num_samples * 32, 32, 3)) if use_zca: sample = tf.concat([sample_og_zca, sample_rec], axis=1) m = tf.reduce_min(sample) sample = (sample - m) / (tf.reduce_max(sample) - m) else: m = tf.reduce_min(sample_og_zca) sample_og_zca = (sample_og_zca - m) / (tf.reduce_max(sample_og_zca) - m) sample = tf.concat([sample_og_zca, sample_rec], axis=1) sample = tf.concat([sample_og_color, sample], axis=1) sample = tf.cast(255.0 * sample, tf.uint8) if use_zca: loss = tf.reduce_mean(tf.losses.mean_squared_error(zca, h)) else: loss = tf.reduce_mean(tf.losses.log_loss(x, h)) return loss, encoded, sample
def inference(image): #with tf.variable_scope('downsampling'): input_x = image activates = [] input_channel = NUM_INPUT_CHANNEL output_channel = FIRST_OUTPUT_CHANNEL for lyr in range(1, NUM_EXTRACTING_LAYER): scope_name = 'conv' + str(lyr) activate = layers.conv_layers(input_x, scope_name, input_channel, output_channel) # if (lyr == NUM_EXTRACTING_LAYER - 1): # activate = tf.nn.dropout(activate, keep_prob = tf.constant(0.5, dtype=tf.float32)) activates.append(activate) input_x = tf.nn.max_pool(activate, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') input_channel = output_channel output_channel = output_channel * 2 scope_name = 'conv' + str(NUM_EXTRACTING_LAYER) activate = layers.conv_layers(input_x, scope_name, input_channel, output_channel) # activate = tf.nn.dropout(activate, keep_prob = tf.constant(0.5, dtype=tf.float32)) #with tf.variable_scope('upsampling'): input_channel = output_channel input_x = activate for lyr in range(NUM_EXTRACTING_LAYER - 1, 0, -1): scope_name = 'deconv' + str(lyr) output_channel = int(input_channel / 2) #deconv upconv = layers.deconv(input_x, scope_name, input_channel, output_channel) #skip connection contracted_feature = activates[lyr - 1] # current_shape = tf.shape(upconv) # feature_shape = tf.shape(contracted_feature) # current_height = current_shape[1] # current_width = current_shape[2] # height_to_crop = (feature_shape[1] - current_height) # / 2 # width_to_crop = (feature_shape[2] - current_width) # / 2 # cropped_feature = tf.slice( # contracted_feature, # begin=[0, height_to_crop, width_to_crop, 0], # size=[-1, current_height, current_width, -1]) concat_feature = tf.concat([contracted_feature, upconv], axis=3) # conv # same channel num as previous input_x = layers.conv_layers(concat_feature, scope_name, input_channel, output_channel) input_channel = output_channel # 1x1 conv with tf.device('/cpu:0'): weights_1x1 = tf.get_variable( name='weight_1x1', shape=[1, 1, input_channel, NUM_CLASSES], initializer=tf.contrib.layers.xavier_initializer()) biases_1x1 = tf.get_variable(name='biases_1x1', shape=[NUM_CLASSES], initializer=tf.constant_initializer(0.0)) output_seg = tf.nn.bias_add(tf.nn.conv2d(input_x, weights_1x1, [1, 1, 1, 1], padding='SAME'), biases_1x1) return output_seg