def _bottleneck_block_v1(inputs, filters, training, projection_shortcut, strides): """A single block for ResNet v1, with a bottleneck. Similar to _building_block_v1(), except using the "bottleneck" blocks described in: Convolution then batch normalization then ReLU as described by: Deep Residual Learning for Image Recognition https://arxiv.org/pdf/1512.03385.pdf by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. Args: inputs: A tensor of size [batch, channels, height_in, width_in] or [batch, height_in, width_in, channels] depending on data_format. filters: The number of filters for the convolutions. training: A Boolean for whether the model is in training or inference mode. Needed for batch normalization. projection_shortcut: The function to use for projection shortcuts (typically a 1x1 convolution when downsampling the input). strides: The block's stride. If greater than 1, this block will ultimately downsample the input. data_format: The input format ('channels_last' or 'channels_first'). Returns: The output tensor of the block; shape should match inputs. """ with tf.variable_scope('bottleneck_v1'): shortcut = inputs if projection_shortcut is not None: shortcut = projection_shortcut(inputs) shortcut = batch_norm(inputs=shortcut, training=training) inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=1, strides=1) inputs = batch_norm(inputs, training) inputs = tf.nn.relu(inputs) inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=strides) inputs = batch_norm(inputs, training) inputs = tf.nn.relu(inputs) inputs = conv2d_fixed_padding(inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) inputs = batch_norm(inputs, training) inputs += shortcut inputs = tf.nn.relu(inputs) return inputs
def res_block(inputs, expansion_ratio, output_dim, stride, is_train, name, bias=False, shortcut=True, is_pw=True): with tf.name_scope(name), tf.variable_scope(name): if is_pw: # pw bottleneck_dim = round(expansion_ratio * inputs.get_shape().as_list()[-1]) net = conv_1x1(inputs, bottleneck_dim, name='pw', bias=bias) net = batch_norm(net, training=is_train, scale=True, name='batch_normalization_pw') net = relu6(net) else: net = inputs # dw net = dwise_conv(net, strides=[1, stride, stride, 1], name='dw', bias=bias) net = batch_norm(net, training=is_train, scale=True, name='batch_normalization_dw') net = relu6(net) # pw & linear net = conv_1x1(net, output_dim, name='pw_linear', bias=bias) net = batch_norm(net, training=is_train, scale=True, name='batch_normalization_pw_linear') # element wise add, only for stride==1 if shortcut and stride == 1: in_dim = int(inputs.get_shape().as_list()[-1]) if in_dim != output_dim: ins = conv_1x1(inputs, output_dim, name='ex_dim') net = ins + net else: net = inputs + net return net
def _building_block_v2(inputs, filters, training, projection_shortcut, strides): """A single block for ResNet v2, without a bottleneck. Batch normalization then ReLu then convolution as described by: Identity Mappings in Deep Residual Networks https://arxiv.org/pdf/1603.05027.pdf by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016. Args: inputs: A tensor of size [batch, channels, height_in, width_in] or [batch, height_in, width_in, channels] depending on data_format. filters: The number of filters for the convolutions. training: A Boolean for whether the model is in training or inference mode. Needed for batch normalization. projection_shortcut: The function to use for projection shortcuts (typically a 1x1 convolution when downsampling the input). strides: The block's stride. If greater than 1, this block will ultimately downsample the input. data_format: The input format ('channels_last' or 'channels_first'). Returns: The output tensor of the block; shape should match inputs. """ with tf.variable_scope('building_block_v2'): shortcut = inputs inputs = batch_norm(inputs, training) inputs = tf.nn.relu(inputs) # The projection shortcut should come after the first batch norm and ReLU # since it performs a 1x1 convolution. if projection_shortcut is not None: shortcut = projection_shortcut(inputs) inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=strides) inputs = batch_norm(inputs, training) inputs = tf.nn.relu(inputs) inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=1) inputs = inputs + shortcut return inputs
def build_model(self, inputs, is_training, requested_stages=None): """Add operations to classify a batch of input images. Args: inputs: A Tensor representing a batch of input images. is_training: A boolean. Set to True to add operations required only when training the classifier. requested_stages: A list of requested stages Returns: A logits Tensor with shape [<batch_size>, self.num_classes]. """ super().build_model(inputs, is_training, requested_stages) frontend_scope = 'resnet_v' + str(self.resnet_version) + '_' + str( self.resnet_size) with tf.variable_scope(frontend_scope): # TODO: Consider converting the inputs from NHWC to NCHW to improve GPU performance # See https://www.tensorflow.org/performance/performance_guide inputs = conv2d_fixed_padding(inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, use_bias=True) inputs = tf.identity(inputs, 'initial_conv') # We do not include batch normalization or activation functions in V2 # for the initial conv1 because the first ResNet unit will perform these # for both the shortcut and non-shortcut paths as part of the first # block's projection. Cf. Appendix of [2]. if self.resnet_version == 1: inputs = batch_norm(inputs, is_training) inputs = tf.nn.relu(inputs) if self.update_endpoint(inputs): # Stage 1 return inputs, self.end_points, frontend_scope if self.first_pool_size: inputs = tf.layers.max_pooling2d( inputs=inputs, pool_size=self.first_pool_size, strides=self.first_pool_stride, padding='SAME', data_format='channels_last') inputs = tf.identity(inputs, 'initial_max_pool') for i, num_blocks in enumerate(self.block_sizes): num_filters = self.num_filters * (2**i) inputs = block_layer(inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=self.block_fn, blocks=num_blocks, strides=self.block_strides[i], training=is_training, name='block_layer{}'.format(i + 1), namescope="block" + str(i + 1)) if self.update_endpoint(inputs): # Stage 2 - 5 return inputs, self.end_points, frontend_scope if self.update_endpoint(None): # Stage 6 return inputs, self.end_points, frontend_scope # Only apply the BN and ReLU for model that does pre_activation in each # building/bottleneck block, eg resnet V2. if self.pre_activation: inputs = batch_norm(inputs, is_training) inputs = tf.nn.relu(inputs) if self.update_endpoint(inputs): # Stage 7 return inputs, self.end_points, frontend_scope else: if self.update_endpoint(None): # Stage 7 return inputs, self.end_points, frontend_scope # The current top layer has shape # `batch_size x pool_size x pool_size x final_size`. # ResNet does an Average Pooling layer over pool_size, # but that is the same as doing a reduce_mean. We do a reduce_mean # here because it performs better than AveragePooling2D. axes = [1, 2] inputs = tf.reduce_mean(input_tensor=inputs, axis=axes, keepdims=True) inputs = tf.identity(inputs, 'final_reduce_mean') inputs = tf.squeeze(inputs, axes) inputs = tf.layers.dense(inputs=inputs, units=self.num_classes) inputs = tf.identity(inputs, 'final_dense') self.update_endpoint(inputs) # Stage 8 return inputs, self.end_points, frontend_scope
def pwise_block(input, output_dim, is_train, name, bias=False, scale=False): with tf.name_scope(name), tf.variable_scope(name): out = conv_1x1(input, output_dim, bias=bias, name='pwb') out = batch_norm(out, training=is_train, scale=scale) out = relu6(out) return out
def conv2d_block(input, out_dim, k, s, is_train, name): with tf.name_scope(name), tf.variable_scope(name): net = conv2d(input, out_dim, k, k, s, s, name='conv2d') net = batch_norm(net, training=is_train) net = relu6(net) return net