def max_pool(x, name, kernel_size, strides, padding): """Max pooling layer on each GPU device. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this layer. kernel_size: A number indicating the size of pooling kernels. strides: A number indicating the stride of the sliding window for height and width. padding: 'VALID' or 'SAME'. Returns: A tensor of size [batch_size, height_out, width_out, channels]. """ return nn.max_pool(x, name, kernel_size, strides, padding)
def bottleneck(x, name, filters, strides=None, dilation=None, is_training=True, use_global_status=True): """Builds the bottleneck module in ResNet. This function stack 3 convolutional layers and fuse the output with the residual connection. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this layer. filters: A number indicating the number of output channels. strides: A number indicating the stride of the sliding window for height and width. dilation: A number indicating the dilation factor for height and width. is_training: If the tensorflow variables defined in this layer would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. Returns: A tensor of size [batch_size, height_out, width_out, channels_out]. """ if strides is None and dilation is None: raise ValueError('None of strides or dilation is specified, ' + 'set one of them to 1 or bigger number.') elif strides > 1 and dilation is not None and dilation > 1: raise ValueError('strides and dilation are both specified, ' + 'set one of them to 1 or None.') with tf.variable_scope(name) as scope: c_i = x.get_shape().as_list()[-1] if c_i != filters * 4: # Use a convolutional layer as residual connection when the # number of input channels is different from output channels. shortcut = nn.conv(x, name='shortcut', filters=filters * 4, kernel_size=1, strides=strides, padding='VALID', biased=False, bn=True, relu=False, is_training=is_training, use_global_status=use_global_status) elif strides > 1: # Use max-pooling as residual connection when the number of # input channel is same as output channels, but stride is # larger than 1. shortcut = nn.max_pool(x, name='shortcut', kernel_size=1, strides=strides, padding='VALID') else: # Otherwise, keep the original input as residual connection. shortcut = x # Build the 1st convolutional layer. x = nn.conv(x, name='conv1', filters=filters, kernel_size=1, strides=1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) if dilation is not None and dilation > 1: # If dilation > 1, apply atrous conv to the 2nd convolutional layer. x = nn.atrous_conv(x, name='conv2', filters=filters, kernel_size=3, dilation=dilation, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) else: padding = 'VALID' if strides > 1 else 'SAME' x = nn.conv(x, name='conv2', filters=filters, kernel_size=3, strides=strides, padding=padding, biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) # Build the 3rd convolutional layer (increase the channels). x = nn.conv(x, name='conv3', filters=filters * 4, kernel_size=1, strides=1, padding='SAME', biased=False, bn=True, relu=False, is_training=is_training, use_global_status=use_global_status) # Fuse the convolutional outputs with residual connection. x = tf.add_n([x, shortcut], name='add') x = tf.nn.relu(x, name='relu') return x
def resnet_v1(x, name, filters=[64, 128, 256, 512], num_blocks=[3, 4, 23, 3], strides=[2, 1, 1, 1], dilations=[None, None, 2, 2], is_training=True, use_global_status=True, reuse=False): """Helper function to build ResNet. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this network. filters: A list of numbers indicating the number of output channels (The output channels would be 4 times to the numbers). strides: A list of numbers indicating the stride of the sliding window for height and width. dilation: A number indicating the dilation factor for height and width. is_training: If the tensorflow variables defined in this layer would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. reuse: enable/disable reuse for reusing tensorflow variables. It is useful for sharing weight parameters across two identical networks. Returns: A tensor of size [batch_size, height_out, width_out, channels_out]. """ if len(filters) != len(num_blocks) or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope(name, reuse=reuse) as scope: # Build conv1. x = nn.conv(x, name='conv1', filters=64, kernel_size=7, strides=2, padding='VALID', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) bn = [] bn.append(x) # Build pool1. x = nn.max_pool(x, name='pool1', kernel_size=3, strides=2, padding='VALID') # Build residual bottleneck blocks. for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'block{:d}/unit_{:d}/bottleneck_v1' block_name = name_format.format(ib + 1, iu + 1) c_o = filters[ib] # output channel # Apply strides to the last block. s = strides[ib] if iu == num_blocks[ib] - 1 else 1 d = dilations[ib] if iu == num_blocks[ib] - 1: bn.append(x) x = bottleneck(x, name=block_name, filters=c_o, strides=s, dilation=d, is_training=is_training, use_global_status=use_global_status) return x, bn
def _unet_builder(x, mask, name, filters=[64, 128, 256, 512, 1024], num_blocks=[2, 3, 3, 3, 3], strides=[2, 2, 2, 2, 2], is_training=True, use_global_status=False, reuse=False): """Helper function to construct UNet. """ if len(filters) != len(num_blocks)\ or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope('Analyzer', reuse=reuse) as scope: with tf.name_scope(name): input_x = x # Encoder. shortcuts = [] not_ignore_masks = [] for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'layer{:d}/unit_{:d}/encoder/' block_name = name_format.format(ib + 1, iu + 1) c_o = filters[ib] # output channel # strides at the begginning s = strides[ib] if iu == 0 else 1 padding = 'VALID' if s > 1 else 'SAME' if ib == 0 and iu == 0: x = [] for ix, in_x in enumerate(input_x): x.append( nn.conv( in_x, name=block_name + 'conv{:d}'.format(ix), filters=int(c_o / 2), #filters=c_o, kernel_size=3, strides=s, padding=padding, #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status)) x = tf.concat(x, axis=-1, name=block_name + 'concat') else: x = nn.conv( x, name=block_name + 'conv', filters=c_o, kernel_size=3, strides=s, padding=padding, #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status) if iu == 0: mask = nn.max_pool(mask, block_name + 'mask_pool', 3, s, padding=padding) not_ignore_masks.append(1 - mask) f = tf.multiply(x, not_ignore_masks[-1], name=block_name + 'masked_conv') tf.add_to_collection('Analyzer/features', f) x = tf.nn.relu(x) print(x) shortcuts.append(x) # Decoder. for ib in range(len(shortcuts) - 1, 0, -1): for iu in range(num_blocks[ib - 1]): n, h, w, c_o = shortcuts[ib - 1].get_shape().as_list() name_format = 'layer{:d}/unit_{:d}/decoder/' block_name = name_format.format(2 * len(filters) - ib, iu + 1) x = nn.conv( x, name=block_name + 'conv', filters=c_o, kernel_size=3, strides=1, padding='SAME', #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status) f = tf.multiply(x, not_ignore_masks[ib], name=block_name + 'masked_conv') tf.add_to_collection('Analyzer/features', f) x = tf.nn.relu(x) if iu == 0: x = tf.image.resize_bilinear(x, [h, w]) x = tf.concat([x, shortcuts[ib - 1]], axis=-1) print(x) c_i = 0 for in_x in input_x: c_i += in_x.get_shape().as_list()[-1] x = nn.conv(x, name='block5/fc', filters=c_i, kernel_size=1, strides=1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) x = tf.image.resize_bilinear(x, tf.shape(input_x[0])[1:3]) tf.add_to_collection('Analyzer/outputs', x) return x