def build_net(self): with tf.variable_scope('resnet' + str(self.DEPTH_OF_NET)): conv1 = utils.convolution_layer(self.INPUTS, kernel_size=7, stride=2, kernel_nums=64, name='conv1') norm1 = utils.batch_normalization(conv1) relu1 = utils.nonlinear_ops(norm1) # 112*112 temp = relu1 for i in range(4): # 2:56*56 3:28*28 4:14*14 5:7*7 name = 'conv' + str(i + 2) with tf.variable_scope(name): if i == 0: kernel_size = 3 padding = 'SAME' else: kernel_size = 1 padding = 'VALID' temp = utils.max_pool_layer(temp, kernel_size, 2, padding=padding, name='down_sample') temp = utils.res_block(temp, self.LAYER_INFO[name]['nums'], self.LAYER_INFO[name]['length']) #avg_pool = utils.average_pool_layer(temp, 7, 1, name='avg_pool') fcinput = tf.reshape(temp, [-1, 2048]) self.fc = utils.fully_connect_layer(fcinput, 2048, self.NUM_OF_CLASS, activation=None, name='fc')
def discriminator(inputs): ''' inputs: 输入的高分辨图像的tensor[-1, 128, 128, 3]\n 和encoder很像,只是将relu修改为lrelu ''' x = inputs filters = 128 initializer = tf.random_normal_initializer(mean=0.0, stddev=0.02) # ----- x = tf.layers.conv2d( x, filters, kernel_size=5, strides=(1, 1), padding='SAME', kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') x = lrelu(x) # -----[-1, 64, 64, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 32, 32, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 16, 16, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 8, 8, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 4, 4, 128] # ----- x = tf.reshape(x, shape=[-1, 4 * 4 * filters]) x = tf.layers.dense(x, units=1, kernel_initializer=initializer) return x
def encoder(inputs, latern_dim=128): ''' inputs: 输入的高分辨图像的tensor[-1, 176, 216, 3]\n latern_dim: code的维数 ''' x = inputs filters = 128 initializer = tf.random_normal_initializer(mean=0.0, stddev=0.02) # ----- x = tf.layers.conv2d( x, filters, kernel_size=5, strides=(1, 1), padding='SAME', kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') x = tf.nn.relu(x) # -----[-1, 64, 64, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 32, 32, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 16, 16, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 8, 8, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.average_pooling2d(x, 2, 2, padding='SAME') # -----[-1, 4, 4, 128] # ----- x = tf.reshape(x, shape=[-1, 4 * 4 * filters]) x = tf.layers.dense(x, units=latern_dim, kernel_initializer=initializer) return x
def mdsr(num_layers=80, feature_size=64): input_tensor = Input(shape=(img_size, img_size, channel)) # One convolution before res blocks and to convert to required feature depth x = Conv2D(feature_size, (3, 3), activation='relu', padding='same')(input_tensor) conv_x2 = utils.res_block(x, feature_size, kernel=5) conv_x2 = utils.res_block(conv_x2, feature_size, kernel=5) conv_x3 = utils.res_block(x, feature_size, kernel=5) conv_x3 = utils.res_block(conv_x3, feature_size, kernel=5) conv_x4 = utils.res_block(x, feature_size, kernel=5) conv_x4 = utils.res_block(conv_x4, feature_size, kernel=5) x = add([conv_x2, conv_x3, conv_x4]) # Add the residual blocks to the model for i in range(num_layers): x = utils.res_block(x, feature_size) x = Conv2D(feature_size, (3, 3), padding='same')(x) # Upsample output of the convolution x2 = utils.upsample(add([x, conv_x2]), 2, feature_size) x3 = utils.upsample(add([x, conv_x3]), 3, feature_size) x4 = utils.upsample(add([x, conv_x4]), 4, feature_size) outputs = [x2, x3, x4] model = Model(inputs=input_tensor, outputs=outputs, name="MDSR") return model
def generator(inputs): ''' inputs: 输入的低分辨图像的tensor[-1, 32, 32, 3]\n ''' x = inputs filters = 128 initializer = tf.random_normal_initializer(mean=0.0, stddev=0.02) # ----- x = tf.layers.conv2d( x, filters, kernel_size=3, strides=(1, 1), padding='SAME', kernel_initializer=initializer) # -----[-1, 32, 32, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.image.resize_nearest_neighbor( x, size=[x.shape[1] * 2, x.shape[2] * 2]) # -----[-1, 64, 64, 128] # ----- x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.image.resize_nearest_neighbor( x, size=[x.shape[1] * 2, x.shape[2] * 2]) # -----[-1, 128, 128, 128] # ----- for i in range(2): x = res_block(x, filters, kernel_size=3, kernel_initializer=initializer) x = tf.layers.conv2d( x, 3, kernel_size=3, strides=(1, 1), padding='SAME', kernel_initializer=initializer) x = tf.nn.tanh(x) return x
def build_model(scale, num_layers=32, feature_size=256, scaling_factor=0.1): input_tensor = Input(shape=(img_size, img_size, channel)) # One convolution before res blocks and to convert to required feature depth x = Conv2D(feature_size, (kernel, kernel), activation='relu', padding='same', name='conv1')(input_tensor) # Store the output of the first convolution to add later conv_1 = x """ This creates `num_layers` number of resBlocks a resBlock is defined in the paper as (excuse the ugly ASCII graph) x |\ | \ | conv2d | relu | conv2d | / |/ + (addition here) | result """ """ Doing scaling here as mentioned in the paper: `we found that increasing the number of feature maps above a certain level would make the training procedure numerically unstable. A similar phenomenon was reported by Szegedy et al. We resolve this issue by adopting the residual scaling with factor 0.1. In each residual block, constant scaling layers are placed after the last convolution layers. These modules stabilize the training procedure greatly when using a large number of filters. In the test phase, this layer can be integrated into the previous convolution layer for the computational efficiency.' """ # Add the residual blocks to the model for i in range(num_layers): x = utils.res_block(x, feature_size, scale=scaling_factor) x = Conv2D(feature_size, (kernel, kernel), padding='same')(x) x = Add()([x, conv_1]) # Upsample output of the convolution x = utils.upsample(x, scale, feature_size) outputs = x model = Model(inputs=input_tensor, outputs=outputs, name="EDSR") return model
def network_Gen(name, in_data, c, image_size, num_filters, image_c, c_dim, n_blocks=6, reuse=False): # num_filters=64; image_c=3. assert in_data is not None with tf.variable_scope(name, reuse=reuse): # input_data: the number channel of in_data is image_c + c_dim. # c_dim: c_dim is the dimension of condition variable, i.e. the length of selected attribute labels. # c_dim is 8. # First, should concat input image and attribute label. Refer to StarGAN/model.py. # The channel of input for Generator network is image_c + c_dim. # in_data is images shape is [batch_size, H, W, C], c is attribute labels, shape is [batch_size, c_dim]. # unsqueeze c = tf.expand_dims(tf.expand_dims(c, axis=1), axis=2) # c shape is [batch_size, c_dim]. Utlize tf.expand_dims() expand the 2nd and 3rd dimension, i.e. # shape is [batch_size, 1, 1, c_dim]. # tf.expand_dims(), Inserts a dimension of 1 into a tensor's shape. # Finally, c shape is [batch_size, H, W, c_dim]. e.g. H is 128. ''' In pytorch, use c = c.expand(c.size(0), c.size(1), 128, 128) to achieve this goal, In Tensorflow, it can utlize tf.tile() to implement this, tf.tile() arguments like: input: A Tensor. 1-D or higher. multiples: Tensor, e.g. input shape is 2 * 1 * 1 * C, multiples is (1, 3, 3, 1), then output shape is (2 * 1, 1 * 3, 1 * 3, C * 1) == (2, 3, 3, C). ''' c = tf.tile(c, (1, image_size, image_size, 1)) # c shape is [batch_size, H, W, c_dim]. # int(c.shape[0]) input_data = tf.concat([in_data, c], axis=3) # concat channel dim. concat argument is values: A list of Tensor. # input_data shape is [batch_size, H, W, C + c_dim] # In conv, if padding='VALID', it will use tf.pad() to pad tensor. c_out_res01 = utils.res_mod_layers(in_data=input_data, num_filters=num_filters, kernel_size=7, strides=[1, 1], padding='SAME', use_bias=False, ReflectionPadding=False) # kernel_size=7, stride=1, padding=3. i.e. padding='SAME'. # Down-Sampling. # In Tensorflow, conv2d padding type only have VALID and SAME, the padding is not a number. # So, when the feature map size is changing, it could use it will use tf.pad() to pad tensor. # then use conv2d. If use tf.pad() to pad tensor, the conv type is "VALID". # For example, in Pytorch, kernel_size=4, stride=2, padding=1. padding is padding 0, so in tf.pad() # the mode is CONSTANT(constant can change). The padding_size in Tensorflow is corresponding the padding in Pytorch. # For 4D tensor, padding is working on the H * W matrix. # padding=1, padding_size=[[0,0],[1,1],[1,1],[0,0]]. # padding=3, padding_size=[[0,0],[3,3],[3,3],[0,0]]. curr_dim = num_filters c_in_G = c_out_res01 for i in range(2): c_out_res02 = utils.res_mod_layers(in_data=c_in_G, num_filters=curr_dim * 2, kernel_size=4, strides=[2, 2], padding="VALID", use_bias=False, ReflectionPadding=True, padding_size=[[0, 0], [1, 1], [1, 1], [0, 0]]) # kernel_size=4, stride=2, padding=1. c_in_G = c_out_res02 curr_dim = curr_dim * 2 # Bottleneck for i in range(n_blocks): c_out_resblock = utils.res_block(in_data=c_in_G, num_filters=curr_dim, kernel_size=3, strides=[1, 1], padding='SAME', use_bias=False, ReflectionPadding=False) c_in_G = c_out_resblock # Up-Sampling for i in range(2): ''' transpose conv is a Up-Sampling operate. 1) padding is "VALID" out = (in - 1) * stride + kernel_size 2) padding is "SAME" out = (in - 1) * stride + kernel_size + stride-1 - 2 * padding padding is (kernel_size - 1)/2 ''' c_out_deconv = tf.layers.conv2d_transpose(inputs=c_in_G, filters=int(curr_dim // 2), kernel_size=4, strides=[2, 2], padding="SAME", use_bias=False) # Instance Norm c_out_bn = tf.contrib.layers.instance_norm(inputs=c_out_deconv, center=True, scale=True, epsilon=1e-05) c_out_relu = tf.nn.relu(c_out_bn) c_in_G = c_out_relu curr_dim = curr_dim // 2 c_out_conv = tf.layers.conv2d(inputs=c_in_G, filters=image_c, kernel_size=7, strides=[1, 1], padding="SAME", use_bias=False) # kernel_size=7, stride=1, padding=3. i.e. padding='SAME'. c_out_tanh = tf.nn.tanh(c_out_conv) return c_out_tanh
def generator_model(self): # built the generator model input_size = self.param.g_input_size ngf = self.param.ngf n_downsampling = self.param.n_downsampling output_nc = self.param.output_nc n_blocks_gen = self.param.n_blocks_gen with tf.variable_scope('g_model'): self.real_A = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3], name='real_A') g_input = self.real_A _out = tf.pad(g_input, [[0, 0], [3, 3], [3, 3], [0, 0]], mode="REFLECT") _out = tf.layers.conv2d(_out, filters=ngf, kernel_size=(7, 7), strides=(1, 1), padding='VALID') _out = tf.layers.batch_normalization(_out, training=self.training) _out = tf.nn.relu(features=_out) for i in range(n_downsampling): mult = 2**i _out = tf.pad(_out, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT") _out = tf.layers.conv2d(_out, filters=ngf * mult * 2, kernel_size=(3, 3), strides=(2, 2), padding='VALID') _out = tf.layers.batch_normalization(_out, training=self.training) _out = tf.nn.relu(features=_out) mult = 2**n_downsampling for i in range(n_blocks_gen): _out = res_block(_out, ngf * mult, use_dropout=True, training=self.training) for i in range(n_downsampling): mult = 2**(n_downsampling - i) #_out = tf.pad(_out,[[0,0],[1,1],[1,1],[0,0]], mode="CONSTANT") _out = tf.layers.conv2d_transpose(_out, filters=int(ngf * mult / 2), kernel_size=(3, 3), strides=(2, 2), padding='SAME') #_out = tf.pad(_out,[[0,0],[1,0],[1,0],[0,0]], mode="CONSTANT") _out = tf.layers.batch_normalization(_out, training=self.training) _out = tf.nn.relu(features=_out) _out = tf.pad(_out, [[0, 0], [3, 3], [3, 3], [0, 0]], mode="REFLECT") _out = tf.layers.conv2d(_out, filters=output_nc, kernel_size=(7, 7), strides=(1, 1), padding='VALID') _out = tf.tanh(x=_out) _out = tf.add(_out, g_input) # _out = tf.clip_by_value( _out, clip_value_min = -1, clip_value_max = 1 ) _out = _out / 2 self.fake_B = _out