def vgg_net(weights, image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )

    net = {}
    current = image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            if name == 'conv1_1':
                kernels = utils.weight_variable([3, 3, 4, 64], name=name)
            else:
                # matconvnet: weights are [width, height, in_channels, out_channels]
                # tensorflow: weights are [height, width, in_channels, out_channels]
                kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w")
            bias = utils.get_variable(bias.reshape(-1), name=name + "_b")
            if name == 'conv5_1':
                current = utils.conv2d_atrous(current, kernels, bias)
            else:
                current = utils.conv2d_basic(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current, name=name)
            if FLAGS.debug:
                utils.add_activation_summary(current)
        elif kind == 'pool':
            if name == 'pool4' or name == 'pool5':
                current = utils.max_pool_notchange(current)
            else:
                current = utils.avg_pool_2x2(current)
        net[name] = current

    return net
def inference(image, keep_prob):
    """
    Semantic segmentation network definition
    :param image: input image. Should have values in range 0-255
    :param keep_prob:
    :return:
    """
    print("setting up vgg initialized conv layers ...")
    model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL)

    mean = model_data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = np.squeeze(model_data['layers'])
    processed_image = utils.process_image(image[:, :, :, :3], mean_pixel)
    pic_last = (tf.expand_dims(image[:, :, :, 3], -1) - 65.114) / 62.652
    processed_image = tf.concat([processed_image, pic_last], axis=3)
    # processed_image = tf.concat(3, [processed_image, tf.expand_dims(image[:, :, :, 3], -1)])
    with tf.variable_scope("inference"):
        image_net = vgg_net(weights, processed_image)
        conv_final_layer = image_net["conv5_3"]

        pool5 = utils.max_pool_notchange(conv_final_layer)

        W6 = utils.weight_variable([7, 7, 512, 512], name="W6")
        b6 = utils.bias_variable([512], name="b6")
        conv6 = utils.conv2d_atrous(pool5, W6, b6)
        relu6 = tf.nn.relu(conv6, name="relu6")
        if FLAGS.debug:
            utils.add_activation_summary(relu6)
        relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)

        W7 = utils.weight_variable([1, 1, 512, 512], name="W7")
        b7 = utils.bias_variable([512], name="b7")
        conv7 = utils.conv2d_basic(relu_dropout6, W7, b7)
        relu7 = tf.nn.relu(conv7, name="relu7")
        if FLAGS.debug:
            utils.add_activation_summary(relu7)
        relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)

        W8 = utils.weight_variable([1, 1, 512, NUM_OF_CLASSESS], name="W8")
        b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")
        conv8 = utils.conv2d_basic(relu_dropout7, W8, b8)
        # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")

        # now to upscale to actual image size
        # deconv_shape1 = image_net["pool4"].get_shape()
        # W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
        # b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
        # conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))
        # fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1")
        #
        # deconv_shape2 = image_net["pool3"].get_shape()
        # W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
        # b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
        # conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"]))
        # fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")

        shape = tf.shape(image)
        deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])
        W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, NUM_OF_CLASSESS], name="W_t3")
        b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")
        conv_t3 = utils.conv2d_transpose_strided(conv8, W_t3, b_t3, output_shape=deconv_shape3, stride=8)
        annotation_pred = tf.argmax(conv_t3, axis=3, name="prediction")

    return tf.expand_dims(annotation_pred, dim=3), conv_t3
Пример #3
0
    def build_model(self):
        """
        load variable from npy to build the VGG
        :param rgb: rgb image [batch, height, width, 3] values 0-255
        """
        #### Sum of weights of all filters for weight decay loss
        self.SumWeights = tf.constant(0.0, name="SumFiltersWeights")
        self.image = tf.placeholder(
            tf.float32,
            shape=[None, self.img_size, self.img_size, 3],
            name="input_image")
        self.label_true = tf.placeholder(
            tf.int32,
            shape=[None, self.img_size // 8, self.img_size // 8, 1],
            name="label_true")
        # self.keep_prob = tf.placeholder(tf.float32, name="keep_probabilty")
        self.bn_train = tf.placeholder('bool')
        self.learning_rate = tf.placeholder(
            tf.float32, shape=[])  ##### for adaptive learning rate

        print("RGB to BGR")
        # rgb_scaled = rgb * 255.0
        #### Input layer: convert RGB to BGR and subtract pixels mean
        red, green, blue = tf.split(axis=3,
                                    num_or_size_splits=3,
                                    value=self.image)
        self.bgr = tf.concat(axis=3,
                             values=[
                                 blue - VGG_MEAN[0],
                                 green - VGG_MEAN[1],
                                 red - VGG_MEAN[2],
                             ])
        print("build model started")
        #### ------------------------------------------------------------
        #### VGG conv+pooling part. Note that only max_pool(.) will halve
        #### the feature map size (both H and W) by a factor of 2, while
        #### all conv_layer(.) keep the same feature map size.
        #### ------------------------------------------------------------
        #### Layer 1
        self.conv1_1 = self.conv_layer(self.bgr, "conv1_1")
        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
        self.pool1 = self.max_pool(self.conv1_2, 'pool1')
        #### Layer 2
        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
        self.pool2 = self.max_pool(self.conv2_2, 'pool2')
        #### Layer 3
        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
        self.pool3 = self.max_pool(self.conv3_3, 'pool3')
        #### Layer 4
        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
        #### Layer 5
        self.conv5_1 = self.dilated_conv_layer(self.conv4_3,
                                               rate=2,
                                               name="conv5_1")
        self.conv5_2 = self.dilated_conv_layer(self.conv5_1,
                                               rate=2,
                                               name="conv5_2")
        self.conv5_3 = self.dilated_conv_layer(self.conv5_2,
                                               rate=2,
                                               name="conv5_3")
        #### ------------------------------------------------------------
        #### Replace Dense layers of original VGG by convolutional layers.
        #### Note that all feature maps keep the same size (H and W), only
        #### depths are modified (512 --> 4096 --> 4096 --> self.n_class).
        #### ------------------------------------------------------------
        #### FCN 1
        W6 = utils.weight_variable([3, 3, 512, 4096], name="W6")
        b6 = utils.bias_variable([4096], name="b6")
        self.conv6 = utils.conv2d_atrous(self.conv5_3, W6, b6, rate=4)
        ##### https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
        self.conv6_bn = batch_norm(self.conv6,
                                   decay=self.bnDecay,
                                   epsilon=self.epsilon,
                                   scale=True,
                                   is_training=self.bn_train,
                                   updates_collections=None)
        self.relu6 = tf.nn.relu(self.conv6_bn, name="relu6")
        # self.relu6 = utils.leaky_relu(self.conv6, alpha=0.2, name="relu6")
        # if FLAGS.debug: utils.add_activation_summary(relu6)
        # self.relu_dropout6 = tf.nn.dropout(self.relu6, keep_prob=self.keep_prob)
        #### FCN 2 (1X1 convloution)
        W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7")
        b7 = utils.bias_variable([4096], name="b7")
        self.conv7 = utils.conv2d_basic(self.relu6, W7, b7)
        ##### https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
        self.conv7_bn = batch_norm(self.conv7,
                                   decay=self.bnDecay,
                                   epsilon=self.epsilon,
                                   scale=True,
                                   is_training=self.bn_train,
                                   updates_collections=None)
        self.relu7 = tf.nn.relu(self.conv7_bn, name="relu7")
        # self.relu7 = utils.leaky_relu(self.conv7, alpha=0.2, name="relu7")
        # if FLAGS.debug: utils.add_activation_summary(relu7)
        # self.relu_dropout7 = tf.nn.dropout(self.relu7, keep_prob=self.keep_prob)
        #### FCN 3 (1X1 convloution)
        W8 = utils.weight_variable([1, 1, 4096, self.n_class], name="W8")
        b8 = utils.bias_variable([self.n_class], name="b8")
        self.conv8 = utils.conv2d_basic(self.relu7, W8, b8)
        # self.relu8 = tf.nn.relu(self.conv8, name="relu8")
        # annotation_pred1 = tf.argmax(conv8, axis=3, name="prediction1")
        #### Transform probability vectors to label maps
        self.label_predict = tf.argmax(self.conv8,
                                       axis=3,
                                       name="label_predict")

        #### for interpolation
        self.label_prob = tf.nn.softmax(self.conv8, dim=3)

        print("DILATED model (frontend) built")

        #### Define trainable variables and loss function
        self.t_vars = tf.trainable_variables()
        #### WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency.
        ####          Do not call this op with the output of softmax, as it will produce incorrect results.
        self.loss = tf.reduce_mean(
            (tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(
                self.label_true, squeeze_dims=[3]),
                                                            logits=self.conv8,
                                                            name="loss")))
        ### define training operations
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss,
                                                       var_list=self.t_vars)
        #### Create model saver
        self.saver = tf.train.Saver(max_to_keep=1)  ##### keep all checkpoints!