예제 #1
0
def res_fcn_32s(inputs, num_classes, is_training):
    with tf.variable_scope('res_fcn_32s'):
        # Use the structure of res_v1_50 classification network
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes, is_training=is_training,
                                                 global_pool=False, output_stride=32)
        # Deconvolutional layers to recover the size of input image
        # Padding is 'SAME' for conv layers thus conv layers do not change the size
        # There are 5 max-pool layers with size reduced by half
        # Totally size reduced by scale of 2^5 = 32 times
        # That's also the reason why this model is called fcn_32s
        # Use bilinear interpolation for upsampling
        upsample_filter = upsampling.bilinear_upsample_weights(32, num_classes)
        upsample_filter_tensor = tf.constant(upsample_filter)
        shape = tf.shape(net)
        output = tf.nn.conv2d_transpose(net, upsample_filter_tensor,
                                        output_shape = tf.stack([shape[0], shape[1] * 32,
                                                        shape[2] * 32, shape[3]]),
                                        strides=[1, 32, 32, 1])
        variables = slim.get_variables('res_fcn_32s')
        # Extract variables that are the same as original vgg-16, they could be intialized
        # with pre-trained vgg-16 network
        res_variables = {}
        for variable in variables:
            res_variables[variable.name[12:-2]] = variable
        return output, res_variables
예제 #2
0
    def fcn16s(self, net, reuse=False):
        with tf.variable_scope('fcn16s') as scope:
            if reuse:
                tf.get_variable_scope().reuse_variables()

            upscore_weights = bilinear_upsample_weights(2, self.n_classes)
            output_weights = bilinear_upsample_weights(16, self.n_classes)
            upscore_weights_tensor = tf.constant(upscore_weights)
            output_weights_tensor = tf.constant(output_weights)

            ## Score from pool4:
            pool4_score = slim.convolution2d(self.pool4,
                                             self.n_classes,
                                             1,
                                             1,
                                             padding='SAME',
                                             scope='pool4_score',
                                             reuse=reuse)
            pool4_h, pool4_w = pool4_score.get_shape().as_list()[1:3]
            ## Upsample the stream
            batch_size, h, w, _ = net.get_shape().as_list()
            upscore = tf.nn.conv2d_transpose(
                net,
                upscore_weights_tensor,
                [batch_size, h * 2, w * 2, self.n_classes], [1, 2, 2, 1],
                reuse=reuse)
            ## Crop pool4_score to be same shape as upscore
            upscore_pool4_crop = tf.image.resize_image_with_crop_or_pad(
                upscore, pool4_h, pool4_h)
            ## Order invariant combination
            upscore = pool4_score + upscore_pool4_crop
            ## Final upsample
            h, w = upscore.get_shape().as_list()[1:3]
            upscore = tf.nn.conv2d_transpose(
                upscore,
                output_weights_tensor,
                [batch_size, h * 16, w * 16, self.n_classes], [1, 16, 16, 1],
                reuse=reuse)  ## 4 * 16 = 64
            ## Force to be the same size as input
            upscore = tf.image.resize_image_with_crop_or_pad(
                upscore, self.x_dim, self.y_dim)
            print 'fcn16s upscore', upscore.get_shape()
        return upscore
예제 #3
0
    def fcn32s(self, net, reuse=False):
        with tf.variable_scope('fcn32s') as scope:
            if reuse:
                tf.get_variable_scope().reuse_variables()

            upsample_filter_np = bilinear_upsample_weights(32, self.n_classes)
            upsample_filter_tensor = tf.constant(upsample_filter_np)

            batch_size, h, w, channels = net.get_shape().as_list()
            output = tf.nn.conv2d_transpose(
                net,
                upsample_filter_tensor, [batch_size, h * 32, w * 32, channels],
                [1, 32, 32, 1],
                reuse=reuse)
            output = tf.image.resize_image_with_crop_or_pad(
                output, self.x_dim, self.y_dim)
            print 'fcn32s output', output.get_shape()
        return output
예제 #4
0
def upsample_tf(factor, input_img):
    number_of_classes = input_img.shape[2]
    new_height = input_img.shape[0] * factor
    new_width = input_img.shape[1] * factor

    expanded_img = np.expand_dims(input_img, axis=0).astype(np.float32)

    with tf.Graph().as_default():
        with tf.Session() as sess:
            upsample_filter_np = utile.bilinear_upsample_weights(
                factor, number_of_classes)
            res = tf.nn.conv2d_transpose(
                expanded_img,
                upsample_filter_np,
                output_shape=[1, new_height, new_width, number_of_classes],
                strides=[1, factor, factor, 1])
            res = sess.run(res)
    return np.squeeze(res)
예제 #5
0
def upsample(net, nf=32, upsample_factor=2):
    upsample_filter_np = upsampling.bilinear_upsample_weights(
        upsample_factor, nf)
    upsample_filter_tensor = tf.constant(upsample_filter_np)
    downsampled_logits_shape = net.get_shape()

    # Calculate the ouput size of the upsampled tensor
    upsampled_logits_shape = [
        downsampled_logits_shape[0],
        downsampled_logits_shape[1] * upsample_factor,
        downsampled_logits_shape[2] * upsample_factor,
        downsampled_logits_shape[3]
    ]
    print upsampled_logits_shape
    # Perform the upsampling
    net = tf.nn.conv2d_transpose(
        net,
        upsample_filter_tensor,
        output_shape=upsampled_logits_shape,
        strides=[1, upsample_factor, upsample_factor, 1])

    return net
예제 #6
0
def FCN_8s(image_batch_tensor,
           number_of_classes,
           new_number_of_classes,
           is_training,
           is_reuse,
           is_fine_tune=False):
    """Returns the FCN-8s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses three bilinear
    upsampling layers to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_8s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    fcn_16s_variables_mapping : dict {string: variable}
        Dict which maps the FCN-8s model's variables to FCN-16s checkpoint variables
        names. We need this to initilize the weights of FCN-8s model with FCN-16s from
        checkpoint file. Look at ipython notebook for examples.
    """

    # Convert image to float32 before subtracting the
    # mean pixel value
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    upsample_filter_factor_2_np = bilinear_upsample_weights(factor=2,
                                                            number_of_classes=number_of_classes)

    upsample_filter_factor_8_np = bilinear_upsample_weights(factor=8,
                                                             number_of_classes=number_of_classes)

    upsample_filter_factor_2_tensor = tf.constant(upsample_filter_factor_2_np)
    upsample_filter_factor_8_tensor = tf.constant(upsample_filter_factor_8_np)

    with tf.variable_scope('FCN_slice', reuse = is_reuse):
       
       with tf.variable_scope("fcn_8s")  as fcn_8s_scope:
            # Define the model that we want to use -- specify to use only two classes at the last layer
            # TODO: make pull request to get this custom vgg feature accepted
            # to avoid using custom slim repo.
            with slim.arg_scope(vgg.vgg_arg_scope()):

                ## Original FCN-32s model definition

                last_layer_logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                                        num_classes=number_of_classes,
                                                        is_training=is_training,
                                                        dropout_keep_prob=0.5,
                                                        spatial_squeeze=False,
                                                        use_dilated=False,
                                                        scope='vgg_16',
                                                        fc_conv_padding='SAME',
                                                        global_pool=False)


                last_layer_logits_shape = tf.shape(last_layer_logits)


                # Calculate the ouput size of the upsampled tensor
                last_layer_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                    last_layer_logits_shape[0],
                                                                    last_layer_logits_shape[1] * 2,
                                                                    last_layer_logits_shape[2] * 2,
                                                                    last_layer_logits_shape[3]
                                                                    ])

                
                last_layer_logits = slim.batch_norm(last_layer_logits, activation_fn=tf.nn.relu)
                
                # Perform the upsampling
                last_layer_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(last_layer_logits,
                                                                                upsample_filter_factor_2_tensor,
                                                                                output_shape=last_layer_upsampled_by_factor_2_logits_shape,
                                                                                strides=[1, 2, 2, 1],
                                                                                name='upscore2')

                ## Adding the skip here for FCN-16s model
                
                # We created vgg in the fcn_8s name scope -- so
                # all the vgg endpoints now are prepended with fcn_8s name
            
                pool4_features = end_points['FCN_slice/fcn_8s/vgg_16/pool4']

                # We zero initialize the weights to start training with the same
                # accuracy that we ended training FCN-32s

                pool4_features = slim.batch_norm(pool4_features, activation_fn=tf.nn.relu)
                
                pool4_logits = slim.conv2d(pool4_features,
                                        number_of_classes,
                                        [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        weights_initializer=tf.zeros_initializer,
                                        scope='pool4_fc')


                
                fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits

                fused_last_layer_and_pool4_logits_shape = tf.shape(fused_last_layer_and_pool4_logits)
                
                
                

                # Calculate the ouput size of the upsampled tensor
                fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                            fused_last_layer_and_pool4_logits_shape[0],
                                                                            fused_last_layer_and_pool4_logits_shape[1] * 2,
                                                                            fused_last_layer_and_pool4_logits_shape[2] * 2,
                                                                            fused_last_layer_and_pool4_logits_shape[3]
                                                                            ])

                fused_last_layer_and_pool4_logits = slim.batch_norm(fused_last_layer_and_pool4_logits, activation_fn=tf.nn.relu)
                
                # Perform the upsampling
                fused_last_layer_and_pool4_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits,
                                                                            upsample_filter_factor_2_tensor,
                                                                            output_shape=fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape,
                                                                            strides=[1, 2, 2, 1],
                                                                            name='upscore4')
                
                
                ## Adding the skip here for FCN-8s model

                pool3_features = end_points['FCN_slice/fcn_8s/vgg_16/pool3']
                
                # We zero initialize the weights to start training with the same
                # accuracy that we ended training FCN-32s

                pool3_features = slim.batch_norm(pool3_features, activation_fn=tf.nn.relu)
                
                pool3_logits = slim.conv2d(pool3_features,
                                        number_of_classes,
                                        [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        weights_initializer=tf.zeros_initializer,
                                        scope='pool3_fc')

                
                fused_last_layer_and_pool4_logits_and_pool_3_logits = pool3_logits + \
                                                fused_last_layer_and_pool4_upsampled_by_factor_2_logits
                
                
                fused_last_layer_and_pool4_logits_and_pool_3_logits_shape = tf.shape(fused_last_layer_and_pool4_logits_and_pool_3_logits)
                
                
                # Calculate the ouput size of the upsampled tensor
                fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.stack([
                                                                            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[0],
                                                                            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[1] * 8,
                                                                            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[2] * 8,
                                                                            fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[3]
                                                                            ])

                fused_last_layer_and_pool4_logits_and_pool_3_logits = slim.batch_norm(fused_last_layer_and_pool4_logits_and_pool_3_logits, activation_fn=tf.nn.relu)
                
                # Perform the upsampling
                fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits_and_pool_3_logits,
                                                                            upsample_filter_factor_8_tensor,
                                                                            output_shape=fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape,
                                                                            strides=[1, 8, 8, 1],
                                                                            name='upscore32')
                
                
                fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits = slim.batch_norm(fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, activation_fn=tf.nn.relu)


                fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_3 = \
                _score_layer(wd, fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, number_of_classes, 1, 'fc_9_1', new_number_of_classes)


                fcn_8s_variables_mapping = {}

                fcn_8s_variables = slim.get_variables(fcn_8s_scope)
                
                for variable in fcn_8s_variables:
                    
                    # We only need FCN-16s variables to resture from checkpoint
                    # Variables of FCN-8s should be initialized
                    if not is_fine_tune:
                        if 'fc_9_1' in variable.name:
                            continue
                    
                    # Here we remove the part of a name of the variable
                    # that is responsible for the current variable scope
                    original_fcn_8s_checkpoint_string = 'FCN_slice/fcn_8s' +  variable.name[len(fcn_8s_scope.name):-2]
                    fcn_8s_variables_mapping[original_fcn_8s_checkpoint_string] = variable


    return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_3, fcn_8s_variables_mapping
예제 #7
0
def fcn_32s(inputs, num_classes, is_training, dropout_keep_prob=0.5):
    with tf.variable_scope('fcn_32s'):
        # Based on the structure of vgg-16 network
        with tf.variable_scope('vgg_16'):
            # Default settings for conv layers and fc layers
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                    activation_fn=tf.nn.relu,
                    weights_initializer=tf.truncated_normal_initializer(
                        0, 0.01),
                    biases_initializer=tf.zeros_initializer,
                    weights_regularizer=slim.l2_regularizer(0.0005)):
                # Default settings for conv layer
                with slim.arg_scope([slim.conv2d], padding='SAME'):
                    net = slim.repeat(inputs,
                                      2,
                                      slim.conv2d,
                                      64, [3, 3],
                                      scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], scope='pool1')
                    net = slim.repeat(net,
                                      2,
                                      slim.conv2d,
                                      128, [3, 3],
                                      scope='conv2')
                    net = slim.max_pool2d(net, [2, 2], scope='pool2')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      256, [3, 3],
                                      scope='conv3')
                    net = slim.max_pool2d(net, [2, 2], scope='pool3')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope='conv4')
                    net = slim.max_pool2d(net, [2, 2], scope='pool4')
                    net = slim.repeat(net,
                                      3,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope='conv5')
                    net = slim.max_pool2d(net, [2, 2], scope='pool5')
                    # Change from fc layer to conv layer
                    net = slim.conv2d(net, 4096, [7, 7], scope='fc6')
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='dropout6')
                    net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='dropout7')
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='fc8')
                    # Upsampling layer to recover the size of input image
                    # Based on bilinear interpolation upsampling
                    upsample_filter = upsampling.bilinear_upsample_weights(
                        32, num_classes)
                    upsample_filter_tensor = tf.constant(upsample_filter)
                    shape = tf.shape(net)
                    output = tf.nn.conv2d_transpose(net,
                                                    upsample_filter_tensor,
                                                    output_shape=tf.stack([
                                                        shape[0],
                                                        shape[1] * 32,
                                                        shape[2] * 32, shape[3]
                                                    ]),
                                                    strides=[1, 32, 32, 1])
                    variables = slim.get_variables('fcn_32s')
                    # Extract variables that are the same as original vgg-16
                    # They could be intialized with pre-trained vgg-16 parameters
                    vgg_variables = {}
                    for variable in variables:
                        vgg_variables[variable.name[8:-2]] = variable
                    return output, vgg_variables
def fcn_16s(inputs, num_classes, is_training, dropout_keep_prob=0.5):
    with tf.variable_scope('fcn_16s'):
        # Based on the structure of vgg-16 network
        with tf.variable_scope('vgg_16'):
            end_points_collection = 'vgg_16' + '_end_points'
            # Collect outputs for conv2d, fully_connected and max_pool2d.
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                    outputs_collections=end_points_collection):
                # Default settings for conv layers and fc layers
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(
                            0, 0.01),
                        biases_initializer=tf.zeros_initializer,
                        weights_regularizer=slim.l2_regularizer(0.0005)):
                    # Default settings for conv layer
                    with slim.arg_scope([slim.conv2d], padding='SAME'):
                        net = slim.repeat(inputs,
                                          2,
                                          slim.conv2d,
                                          64, [3, 3],
                                          scope='conv1')
                        net = slim.max_pool2d(net, [2, 2], scope='pool1')
                        net = slim.repeat(net,
                                          2,
                                          slim.conv2d,
                                          128, [3, 3],
                                          scope='conv2')
                        net = slim.max_pool2d(net, [2, 2], scope='pool2')
                        net = slim.repeat(net,
                                          3,
                                          slim.conv2d,
                                          256, [3, 3],
                                          scope='conv3')
                        net = slim.max_pool2d(net, [2, 2], scope='pool3')
                        net = slim.repeat(net,
                                          3,
                                          slim.conv2d,
                                          512, [3, 3],
                                          scope='conv4')
                        net = slim.max_pool2d(net, [2, 2], scope='pool4')
                        net = slim.repeat(net,
                                          3,
                                          slim.conv2d,
                                          512, [3, 3],
                                          scope='conv5')
                        net = slim.max_pool2d(net, [2, 2], scope='pool5')
                        # Change from fc layer to conv layer
                        net = slim.conv2d(net, 4096, [7, 7], scope='fc6')
                        net = slim.dropout(net,
                                           dropout_keep_prob,
                                           is_training=is_training,
                                           scope='dropout6')
                        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                        net = slim.dropout(net,
                                           dropout_keep_prob,
                                           is_training=is_training,
                                           scope='dropout7')
                        output = slim.conv2d(net,
                                             num_classes, [1, 1],
                                             activation_fn=None,
                                             normalizer_fn=None,
                                             scope='fc8')
                        end_points = slim.utils.convert_collection_to_dict(
                            end_points_collection)
        # Upsampling layer to recover the size of input image
        # Based on bilinear interpolation upsampling
        output_pool4 = end_points['fcn_16s/vgg_16/pool4']
        output_pool4 = slim.conv2d(output_pool4,
                                   num_classes, [1, 1],
                                   activation_fn=None,
                                   normalizer_fn=None,
                                   scope='pool4_fc')
        upsample_filter_2 = upsampling.bilinear_upsample_weights(
            2, num_classes)
        upsample_filter_tensor_2 = tf.constant(upsample_filter_2)
        shape_output = tf.shape(output)
        output = tf.nn.conv2d_transpose(output,
                                        upsample_filter_tensor_2,
                                        output_shape=tf.stack([
                                            shape_output[0],
                                            shape_output[1] * 2,
                                            shape_output[2] * 2,
                                            shape_output[3]
                                        ]),
                                        strides=[1, 2, 2, 1])
        output_combined = output + output_pool4
        upsample_filter_16 = upsampling.bilinear_upsample_weights(
            16, num_classes)
        upsample_filter_tensor_16 = tf.constant(upsample_filter_16)
        shape_output_pool4 = tf.shape(output_pool4)
        output_combined = tf.nn.conv2d_transpose(
            output_combined,
            upsample_filter_tensor_16,
            output_shape=tf.stack([
                shape_output_pool4[0], shape_output_pool4[1] * 16,
                shape_output_pool4[2] * 16, shape_output_pool4[3]
            ]),
            strides=[1, 16, 16, 1])

        variables = slim.get_variables('fcn_16s')
        # Extract variables that are the same as fcn_32s model
        # They could be intialized with pre-trained fcn_32s parameters
        fcn_32s_variables = {}
        for variable in variables:
            if 'pool4_fc' not in variable.name:
                fcn_32s_variables[variable.name[0:4] + '32' +
                                  variable.name[6:-2]] = variable
        return output_combined, fcn_32s_variables
def FCN_32s(image_batch_tensor, number_of_classes, is_training, reuse=None):
    """Returns the FCN-32s model definition.
    The function returns the model definition of a network that was described
    in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al.
    The network subsamples the input by a factor of 32 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 32. This means that
    if the image size is not of the factor 32, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(FCN_32s, 32). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
        It affects the work of underlying dropout layer of VGG-16.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 32.
    vgg_16_variables_mapping : dict {string: variable}
        Dict which maps the FCN-32s model's variables to VGG-16 checkpoint variables
        names. We need this to initilize the weights of FCN-32s model with VGG-16 from
        checkpoint file. Look at ipython notebook for examples.
    """

    with tf.variable_scope("fcn_32s", reuse=reuse) as fcn_32s_scope:
        #with tf.variable_scope("fcn_32s") as fcn_32s_scope:
        upsample_factor = 32

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [
            _R_MEAN, _G_MEAN, _B_MEAN
        ]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        with slim.arg_scope(vgg.vgg_arg_scope()):

            logits, end_points = vgg.vgg_16(mean_centered_image_batch,
                                            num_classes=number_of_classes,
                                            is_training=is_training,
                                            spatial_squeeze=False,
                                            fc_conv_padding='SAME')

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        #upsampled_logits_shape = tf.pack([
        #                                  downsampled_logits_shape[0],
        #                                  downsampled_logits_shape[1] * upsample_factor,
        #                                  downsampled_logits_shape[2] * upsample_factor,
        #                                  downsampled_logits_shape[3]
        #                                 ])

        upsampled_logits_shape = tf.stack([
            downsampled_logits_shape[0],
            downsampled_logits_shape[1] * upsample_factor,
            downsampled_logits_shape[2] * upsample_factor,
            downsampled_logits_shape[3]
        ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(
            logits,
            upsample_filter_tensor,
            output_shape=upsampled_logits_shape,
            strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        vgg_16_variables_mapping = {}

        vgg_16_variables = slim.get_variables(fcn_32s_scope)

        for variable in vgg_16_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            # original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.original_name_scope):-2]

            # Updated: changed .name_scope to .name because name_scope only affects operations
            # and variable scope is actually represented by .name
            original_vgg_16_checkpoint_string = variable.name[
                len(fcn_32s_scope.name) + 1:-2]
            vgg_16_variables_mapping[
                original_vgg_16_checkpoint_string] = variable

    return upsampled_logits, vgg_16_variables_mapping
예제 #10
0
    def fcn_net(self, x, end_points):
        def _shape(tensor):
            shape = tf.shape(tensor)
            return shape[0], shape[1], shape[2], shape[3]

        n_classes = self.n_classes
        upsample_filter_2 = tf.constant(
            bilinear_upsample_weights(factor=2, n_classes=n_classes))

        with slim.arg_scope([slim.conv2d],
                            activation_fn=None,
                            normalizer_fn=None,
                            weights_initializer=tf.zeros_initializer):
            # first upsampling x2
            n, h, w, c = _shape(x)
            output_shape = tf.stack([n, h * 2, w * 2, c])
            # 2x conv7
            net = tf.nn.conv2d_transpose(x,
                                         upsample_filter_2,
                                         output_shape=output_shape,
                                         strides=[1, 2, 2, 1])
            # pool4
            pool4_features = end_points['vgg_16/pool4']
            pool4_logits = slim.conv2d(pool4_features,
                                       n_classes, [1, 1],
                                       scope='pool4_fc')
            # pool4 + 2x conv7
            net = pool4_logits + net

            # second upsampling x2
            n, h, w, c = _shape(net)
            output_shape = tf.stack([n, h * 2, w * 2, c])
            # 2x (pool4 + 2x conv7)
            net = tf.nn.conv2d_transpose(net,
                                         upsample_filter_2,
                                         output_shape=output_shape,
                                         strides=[1, 2, 2, 1])
            # pool3
            pool3_features = end_points['vgg_16/pool3']
            pool3_logits = slim.conv2d(pool3_features,
                                       n_classes, [1, 1],
                                       scope='pool3_fc')
            # pool3 + 2x pool4 + 4x conv7
            net = pool3_logits + net

            # third upsampling x2
            n, h, w, c = _shape(net)
            output_shape = tf.stack([n, h * 2, w * 2, c])
            # 2x (pool3 + 2x pool4 + 4x conv7)
            net = tf.nn.conv2d_transpose(net,
                                         upsample_filter_2,
                                         output_shape=output_shape,
                                         strides=[1, 2, 2, 1])
            # pool2
            pool2_features = end_points['vgg_16/pool2']
            pool2_logits = slim.conv2d(pool2_features,
                                       n_classes, [1, 1],
                                       scope='pool2_fc')
            # pool2 + 2x pool3 + 4x pool4 + 8x pool7
            net = pool2_logits + net

            # fourth upsampling x2
            n, h, w, c = _shape(net)
            output_shape = tf.stack([n, h * 2, w * 2, c])
            # 2x (2x pool3 + 4x pool4 + 8x pool7)
            net = tf.nn.conv2d_transpose(net,
                                         upsample_filter_2,
                                         output_shape=output_shape,
                                         strides=[1, 2, 2, 1])
            # pool1
            pool1_features = end_points['vgg_16/pool1']
            pool1_logits = slim.conv2d(pool1_features,
                                       n_classes, [1, 1],
                                       scope='pool1_fc')
            # pool1 + 2x pool2 + 4x pool3 + 8x pool4 + 16x pool7
            net = pool1_logits + net

            # final upsampling x2
            n, h, w, c = _shape(net)
            output_shape = tf.stack([n, h * 2, w * 2, c])
            net = tf.nn.conv2d_transpose(net,
                                         upsample_filter_2,
                                         output_shape=output_shape,
                                         strides=[1, 2, 2, 1])
        return net
예제 #11
0
def fcn_16s(image_batch_tensor, num_classes, is_training):
    # Get the filters for upsampling by factor 2 and 16
    upsample_by_2_weights = bilinear_upsample_weights(
        factor=2, number_of_classes=num_classes)
    upsample_by_2_filter = tf.constant(upsample_by_2_weights)
    upsample_by_16_weights = bilinear_upsample_weights(
        factor=16, number_of_classes=num_classes)
    upsample_by_16_filter = tf.constant(upsample_by_16_weights)

    image_batch_float = tf.to_float(image_batch_tensor)

    # Create a variable scope for our model
    with tf.variable_scope('fcn_16s') as fcn_16s_scope:
        # arg_scope defines the default functions for layer variables such as initailzers
        with slim.arg_scope(vgg.vgg_arg_scope()):
            # tensorflow slim vgg_16 signature:
            # def vgg_16(inputs, num_classes=1000, is_training=True,
            #            dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'):
            # Need to use 'same' padding for convolutional layers in vgg
            vgg_logits, vgg_endpoints = vgg.vgg_16(image_batch_float,
                                                   num_classes=num_classes,
                                                   is_training=is_training,
                                                   spatial_squeeze=False,
                                                   fc_conv_padding='SAME')
            vgg_layer_shape = tf.shape(vgg_logits)

            # Calculate the size of the tensor upsampled by two times
            # vgg_layer_shape[0] is the batch size
            # vgg_layer_shape[1] is the height and vgg_layer_shape[2] is the width
            upsample_by_2_shape = tf.stack([
                vgg_layer_shape[0], vgg_layer_shape[1] * 2,
                vgg_layer_shape[2] * 2, vgg_layer_shape[3]
            ])
            # Perform upsampling using transpose convolution
            # conv2d_transpose input:
            # tf.nn.conv2d_transpose(value, filter, output_shape, strides,
            #                       padding='SAME', data_format='NHWC', name=None)
            upsample_by_2_logits = tf.nn.conv2d_transpose(
                vgg_logits, upsample_by_2_filter, upsample_by_2_shape,
                [1, 2, 2, 1])

            # Now we add the skip layer from pool4 layer of vgg
            pool4_features = vgg_endpoints['fcn_16s/vgg_16/pool4']

            # The pool4 output, according to paper, needs to go through a
            # convolutional layer before being combined with the FCN32 logits
            pool4_logits = slim.conv2d(
                pool4_features,
                num_classes, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                weights_initializer=tf.zeros_initializer(),
                scope='pool4_fc')

            vgg_pool4_combined_logits = upsample_by_2_logits + pool4_logits

            vgg_pool4_combined_shape = tf.shape(vgg_pool4_combined_logits)
            # Now upsample the combined logits by a factor of 16
            upsample_by_16_shape = tf.stack([
                vgg_pool4_combined_shape[0], vgg_pool4_combined_shape[1] * 16,
                vgg_pool4_combined_shape[2] * 16, vgg_pool4_combined_shape[3]
            ])

            upsample_by_16_logits = tf.nn.conv2d_transpose(
                vgg_pool4_combined_logits, upsample_by_16_filter,
                upsample_by_16_shape, [1, 16, 16, 1])

            # We need this mapping to load pretrained vgg model
            vgg_16_variables_mapping = {}
            fcn_16s_variables = slim.get_variables(fcn_16s_scope)

            for variable in fcn_16s_variables:

                # We only need FCN-32s variables to resture from checkpoint
                # Variables of FCN-16s should be initialized
                if 'pool4_fc' in variable.name:
                    continue

                # Here we remove the part of a name of the variable
                # that is responsible for the current variable scope
                original_vgg_16_checkpoint_string = variable.name[
                    len(fcn_16s_scope.original_name_scope):-2]
                vgg_16_variables_mapping[
                    original_vgg_16_checkpoint_string] = variable

    return upsample_by_16_logits, vgg_16_variables_mapping