Пример #1
0
def resnet_tensorboard():
    x_input = tf.placeholder(dtype=tf.float32, shape=(None, image_size, image_size, 3))
    arg_scope = resnet_utils.resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        logits_50, end_points_50 = resnet_v1.resnet_v1_50(x_input,
                                                    num_classes=1000,
                                                    is_training=False,
                                                    global_pool=True,
                                                    output_stride=None,
                                                    spatial_squeeze=True,
                                                    store_non_strided_activations=False,
                                                    reuse=False,
                                                    scope='resnet_v1_50')

        logits_101, end_points_101 = resnet_v1.resnet_v1_101(x_input,
                                                          num_classes=1000,
                                                          is_training=False,
                                                          global_pool=True,
                                                          output_stride=None,
                                                          spatial_squeeze=True,
                                                          store_non_strided_activations=False,
                                                          reuse=False,
                                                          scope='resnet_v1_101')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config= config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        summary_writer = tf.summary.FileWriter('/Users/alexwang/data/resnet_summary', graph=sess.graph)
        summary_writer.close()
def res_101_fcn_8s(inputs, num_classes, is_training):
    with tf.variable_scope('res_101_fcn_8s'):
        # Use the structure of res_v1_101 classification network
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_101(inputs,
                                                      num_classes,
                                                      is_training=is_training,
                                                      global_pool=False,
                                                      output_stride=8)
        # Deconvolutional layers to recover the size of input image
        # Padding is 'SAME' for conv layers thus conv layers do not change the size
        # There are 5 max-pool layers with size reduced by half
        # Totally size reduced by scale of 2^5 = 32 times
        # That's also the reason why this model is called fcn_32s
        # Use bilinear interpolation for upsampling
        upsample_filter = upsampling.bilinear_upsample_weights(8, num_classes)
        upsample_filter_tensor = tf.constant(upsample_filter)
        shape = tf.shape(net)
        output = tf.nn.conv2d_transpose(net,
                                        upsample_filter_tensor,
                                        output_shape=tf.stack([
                                            shape[0], shape[1] * 8,
                                            shape[2] * 8, shape[3]
                                        ]),
                                        strides=[1, 8, 8, 1])
        variables = slim.get_variables('res_101_fcn_8s')
        # Extract variables that are the same as original vgg-16, they could be intialized
        # with pre-trained vgg-16 network
        res_variables = {}
        for variable in variables:
            res_variables[variable.name[15:-2]] = variable
        return output, res_variables
Пример #3
0
def build_FPN(images, config, is_training, backbone='resnet50'):
    # images: [batch, h, w, channels]
    # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the
    # 		  feature pyramid. Each is [batch, height, width, channels]
    pyramid = {}
    # build backbone network
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
        if backbone == "resnet50":
            logits, end_points = resnet_v1.resnet_v1_50(
                images, is_training=is_training, scope='resnet_v1_50')
            pyramid['C2'] = end_points[
                'resnet_v1_50/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_50/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_50/block3/unit_5/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_50/block4/unit_3/bottleneck_v1']
        elif backbone == "resnet101":
            logits, end_points = resnet_v1.resnet_v1_101(
                images, is_training=is_training, scope='resnet_v1_101')
            pyramid['C2'] = end_points[
                'resnet_v1_101/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_101/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_101/block3/unit_22/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_101/block4/unit_3/bottleneck_v1']
        else:
            print("Unkown backbone : ", backbone)
    # build FPN
    pyramid_feature = {}
    arg_scope = _extra_conv_arg_scope_with_bn()
    with tf.variable_scope('FPN'):
        with slim.arg_scope(arg_scope):
            pyramid_feature['P5'] = slim.conv2d(pyramid['C5'],
                                                config.TOP_DOWN_PYRAMID_SIZE,
                                                1)
            for i in range(4, 1, -1):
                upshape = tf.shape(pyramid['C%d' % i])
                u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \
                 size = (upshape[1], upshape[2]))
                c = slim.conv2d(pyramid['C%d' % i],
                                config.TOP_DOWN_PYRAMID_SIZE, 1)
                s = tf.add(c, u)
                pyramid_feature['P%d' % i] = slim.conv2d(
                    s, config.TOP_DOWN_PYRAMID_SIZE, 3)
    return pyramid_feature
Пример #4
0
    def __init__(self, images):
        self.layer = {}
        self.images = images

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            self.nets, _ = resnet_v1.resnet_v1_101(self.images,
                                                   1000,
                                                   is_training=False,
                                                   spatial_squeeze=False,
                                                   global_pool=False,
                                                   output_stride=16)
            print(len(self.nets))
            for index in range(len(self.nets)):
                print("resnet_bolck_%d" % (index + 1))
                print(self.nets[index].get_shape())
        self.layer['block1'] = self.nets[0]
        self.layer['block2'] = self.nets[1]
        self.layer['block3'] = self.nets[2]
        self.layer['block4'] = self.nets[3]
def model(x, H, reuse, is_training=True):
    if H['slim_basename'] == 'resnet_v1_101':
        with slim.arg_scope(resnet.resnet_arg_scope()):
            _, T = resnet.resnet_v1_101(x,
                                        is_training=is_training,
                                        num_classes=1000,
                                        reuse=reuse)
    elif H['slim_basename'] == 'InceptionV1':
        with slim.arg_scope(inception.inception_v1_arg_scope()):
            _, T = inception.inception_v1(x,
                                          is_training=is_training,
                                          num_classes=1001,
                                          spatial_squeeze=False,
                                          reuse=reuse)
    #print '\n'.join(map(str, [(k, v.op.outputs[0].get_shape()) for k, v in T.iteritems()]))

    coarse_feat = T[H['slim_top_lname']][:, :, :, :H['later_feat_channels']]
    assert coarse_feat.op.outputs[0].get_shape()[3] == H['later_feat_channels']

    # fine feat can be used to reinspect input
    attention_lname = H.get('slim_attention_lname', 'Mixed_3b')
    early_feat = T[attention_lname]

    return coarse_feat, early_feat
Пример #6
0
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"):
    """
    Builds the GCN model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      GCN model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'GCN-Res50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'GCN-Res101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'GCN-Res152':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152'))
    else:
    	raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model))

    


    res = [end_points['pool5'], end_points['pool4'],
         end_points['pool3'], end_points['pool2']]

    down_5 = GlobalConvBlock(res[0], n_filters=21, size=3)
    down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3])
    down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2)

    down_4 = GlobalConvBlock(res[1], n_filters=21, size=3)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = tf.add(down_4, down_5)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2)

    down_3 = GlobalConvBlock(res[2], n_filters=21, size=3)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = tf.add(down_3, down_4)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2)

    down_2 = GlobalConvBlock(res[3], n_filters=21, size=3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = tf.add(down_2, down_3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2)

    net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2)
    net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3])

    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')

    return net, init_fn
Пример #7
0
def build_refinenet(inputs,
                    num_classes,
                    preset_model='RefineNet-Res101',
                    weight_decay=1e-5,
                    is_training=True,
                    upscaling_method="bilinear",
                    pretrained_dir="models"):
    """
    Builds the RefineNet model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      RefineNet model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'RefineNet-Res50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs, is_training=is_training, scope='resnet_v1_50')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'),
                slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'RefineNet-Res101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs, is_training=is_training, scope='resnet_v1_101')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'),
                slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'RefineNet-Res152':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(
                inputs, is_training=is_training, scope='resnet_v1_152')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'),
                slim.get_model_variables('resnet_v1_152'))
    else:
        raise ValueError(
            "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152"
            % (preset_model))

    f = [
        end_points['pool5'], end_points['pool4'], end_points['pool3'],
        end_points['pool2']
    ]

    g = [None, None, None, None]
    h = [None, None, None, None]

    for i in range(4):
        h[i] = slim.conv2d(f[i], 256, 1)

    g[0] = RefineBlock(high_inputs=None, low_inputs=h[0])
    g[1] = RefineBlock(g[0], h[1])
    g[2] = RefineBlock(g[1], h[2])
    g[3] = RefineBlock(g[2], h[3])

    # g[3]=Upsampling(g[3],scale=4)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.conv2d(g[3],
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
Пример #8
0
def res_v1_101_lstm(input_imgs, input_seqs, input_masks,
                    batch_size, embedding_size, vocab_size,
                    is_training, lstm_dropout_keep_prob):
    with tf.variable_scope('res_v1_101_lstm'):
        # Sequence embedding layer
        with tf.variable_scope("seq_embedding"):
            embedding_map = tf.get_variable(
                name="map",
                shape=[vocab_size, embedding_size],
                initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08))

        # Image feature extraction layer
        with slim.arg_scope(resnet_v1.resnet_arg_scope(trainable=is_training)):
            # Set is_training = False to fix running mean/variance of batch normalization
            image_feature, _ = resnet_v1.resnet_v1_101(input_imgs, None, is_training=False, output_stride=32)

        # Image embedding layer
        image_feature = tf.squeeze(image_feature, axis=[1, 2])
        image_embedding = slim.fully_connected(image_feature, embedding_size, activation_fn=None,
                                               weights_initializer=tf.truncated_normal_initializer(0, 0.01),
                                               biases_initializer=tf.zeros_initializer,
                                               weights_regularizer=slim.l2_regularizer(0.0005),
                                               scope = 'image_embedding')

        # LSTM layer
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=embedding_size, state_is_tuple=True)
        # Training process
        if is_training is True:
            lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,
                                                      input_keep_prob=lstm_dropout_keep_prob,
                                                      output_keep_prob=lstm_dropout_keep_prob)
            seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs)

            with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope:
                # Feed the image embeddings to set the initial LSTM state.
                zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
                _, initial_state = lstm_cell(image_embedding, zero_state)
                lstm_scope.reuse_variables()
                sequence_length = tf.reduce_sum(input_masks, 1)
                lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell,
                                                    inputs=seq_embeddings,
                                                    sequence_length=sequence_length,
                                                    initial_state=initial_state,
                                                    dtype=tf.float32,
                                                    scope=lstm_scope)
                lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size])

            # Word logits layer
            output_logits = slim.fully_connected(lstm_outputs, vocab_size, activation_fn=None,
                                                weights_initializer=tf.truncated_normal_initializer(0, 0.01),
                                                biases_initializer=tf.zeros_initializer,
                                                weights_regularizer=slim.l2_regularizer(0.0005),
                                                scope='logits'
                                                )

            variables = slim.get_variables('res_v1_101_lstm')
            res_variables = {}
            for variable in variables:
                if 'resnet_v1_101' in variable.name:
                    res_variables[variable.name[16:-2]] = variable

            return output_logits, res_variables
        # Inference process
        else:
            weights = tf.get_variable("logits/weights", [embedding_size, vocab_size])
            biases = tf.get_variable("logits/biases", [vocab_size])
            with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope:
                # Feed the image embeddings to set the initial LSTM state.
                zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
                _, initial_state = lstm_cell(image_embedding, zero_state)
                lstm_scope.reuse_variables()
                memory_state = initial_state
                output_words = [input_seqs[0]]
                # TODO: replace the end condition of the loop with meeting the end word
                for _ in range(30):
                    input_seqs = tf.nn.embedding_lookup(embedding_map, input_seqs)
                    output_seqs, memory_state = lstm_cell(input_seqs, memory_state)
                    output_logits = tf.matmul(output_seqs, weights) + biases
                    output_word = tf.argmax(output_logits, -1)
                    output_words.append(output_word[0])
                    input_seqs = output_word
                output_words = tf.stack(output_words)

            return output_words
Пример #9
0
def build_pspnet(inputs,
                 label_size,
                 num_classes,
                 preset_model='PSPNet-Res50',
                 pooling_type="MAX",
                 weight_decay=1e-5,
                 upscaling_method="bilinear",
                 is_training=True,
                 pretrained_dir="models"):
    """
    Builds the PSPNet model. 

    Arguments:
      inputs: The input tensor
      label_size: Size of the final label tensor. We need to know this for proper upscaling 
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes
      pooling_type: Max or Average pooling

    Returns:
      PSPNet model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'PSPNet-Res50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs, is_training=is_training, scope='resnet_v1_50')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'),
                slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'PSPNet-Res101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs, is_training=is_training, scope='resnet_v1_101')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'),
                slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'PSPNet-Res152':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(
                inputs, is_training=is_training, scope='resnet_v1_152')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'),
                slim.get_model_variables('resnet_v1_152'))
    else:
        raise ValueError(
            "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152"
            % (preset_model))

    f = [
        end_points['pool5'], end_points['pool4'], end_points['pool3'],
        end_points['pool2']
    ]

    feature_map_shape = [int(x / 8.0) for x in label_size]
    psp = PyramidPoolingModule(f[2],
                               feature_map_shape=feature_map_shape,
                               pooling_type=pooling_type)

    net = slim.conv2d(psp, 512, [3, 3], activation_fn=None)
    net = slim.batch_norm(net)
    net = tf.nn.relu(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.dropout(net, keep_prob=(0.9))

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn