def denseASPP(inputs,
              is_training,
              output_stride,
              pre_trained_model,
              classes,
              keep_prob=1.0):

    with tf.contrib.slim.arg_scope(
            resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        logits, end_points = resnet_v2.resnet_v2_101(
            inputs,
            num_classes=None,
            is_training=is_training,
            global_pool=False,
            output_stride=output_stride)

    if is_training:
        exclude = ['resnet_v2_101' + '/logits', 'global_step']
        variables_to_restore = tf.contrib.slim.get_variables_to_restore(
            exclude=exclude)
        tf.train.init_from_checkpoint(
            pre_trained_model,
            {v.name.split(':')[0]: v
             for v in variables_to_restore})

    net = end_points['resnet_v2_101' + '/block4']

    with tf.name_scope("denseASPP"):

        input = denseASPP_block(net, is_training, keep_prob)

    with tf.contrib.slim.arg_scope(
            resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):

        with tf.name_scope("segmentation"):

            input_shape = input.get_shape().as_list()
            input = tf.nn.dropout(input, keep_prob=keep_prob)
            weight_1 = weight_variable([1, 1, input_shape[-1], classes])
            bias = bias_variable([classes])
            input = tf.nn.conv2d(input, weight_1, [1, 1, 1, 1],
                                 padding='SAME') + bias

        with tf.name_scope("upsamling"):
            input_shape = input.get_shape().as_list()
            input = tf.image.resize_bilinear(input, tf.shape(inputs)[1:3])

    output = input
    return output
示例#2
0
    def model(inputs, is_training):
        """Constructs the ResNet model given the inputs."""
        if data_format == 'channels_first':
            # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
            # This provides a large performance boost on GPU. See
            # https://www.tensorflow.org/performance/performance_guide#data_formats
            inputs = tf.transpose(inputs, [0, 3, 1, 2])

        # tf.logging.info('net shape: {}'.format(inputs.shape))
        # encoder
        with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
            logits, end_points = base_model(inputs,
                                            num_classes=None,
                                            is_training=is_training,
                                            global_pool=False,
                                            output_stride=output_stride)

        if is_training:
            exclude = [base_architecture + '/logits', 'global_step']
            variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude)
            tf.train.init_from_checkpoint(pre_trained_model,
                                          {v.name.split(':')[0]: v for v in variables_to_restore})

        inputs_size = tf.shape(inputs)[1:3]

        net = end_points[base_architecture + '/block4']
        encoder_output = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training)

        # encoder_output = lstm(encoder_output)

        with tf.variable_scope("decoder"):
            with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
                with arg_scope([layers.batch_norm], is_training=is_training):
                    with tf.variable_scope("low_level_features"):
                        low_level_features = end_points[base_architecture + '/block1/unit_3/bottleneck_v2/conv1']
                        low_level_features = layers_lib.conv2d(low_level_features, 48,
                                                               [1, 1], stride=1, scope='conv_1x1')
                        low_level_features_size = tf.shape(low_level_features)[1:3]

                    with tf.variable_scope("upsampling_logits"):
                        net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1')
                        net = tf.concat([net, low_level_features], axis=3, name='concat')
                        net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_1')
                        net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_2')
                        net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None,
                                                scope='conv_1x1')
                        logits = tf.image.resize_bilinear(net, inputs_size, name='upsample_2')

        return logits
示例#3
0
def main(_):
    with tf.Graph().as_default():
        url = 'https://upload.wikimedia.org/wikipedia/commons/5/5c/Tigershark3.jpg'
        image_string = urllib.urlopen(url).read()
        image = tf.image.decode_jpeg(image_string, channels=3)
        processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        # Create the model, use the default arg scope to configure the batch norm parameters.
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, _ = resnet_v2.resnet_v2_50(processed_images, num_classes=1001, is_training=False)
        probabilities = tf.nn.softmax(logits)

        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v2_50.ckpt'),
            slim.get_model_variables('resnet_v2_50'))

        with tf.Session() as sess:
            init_fn(sess)
            np_image, probabilities = sess.run([image, probabilities])
            probabilities = np.reshape(probabilities, [1001])
            print(probabilities.shape)
            sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]
            print(sorted_inds)
        plt.figure()
        plt.imshow(np_image.astype(np.uint8))
        plt.axis('off')
        plt.show()

        names = imagenet.create_readable_names_for_imagenet_labels()

        for i in range(5):
            index = sorted_inds[i]
            # Shift the index of a class name by one.
            print('Probability %0.6f%% => [%s]' % (probabilities[index] * 100, names[index + 1]))
示例#4
0
def ASPP(inputs, output_stride, batch_norm_decay, is_training, depth=256):
    if output_stride not in [8, 16]:
      raise ValueError('output_stride must be either 8 or 16.') 
    #
    atrous_rates = [6, 12, 18]
    if output_stride == 8:
        atrous_rates = [2*rate for rate in atrous_rates]
    #
    #why do we need arg_scope of resnet_v2
    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
        with arg_scope([layers.batch_norm], is_training=is_training):
            inputs_size = tf.shape(inputs)[1:3]
            conv_11 = layers_lib.conv2d(inputs, depth, [1, 1], stride = 1, scope = "conv_1x1")
            conv_33_1 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[0], scope = 'conv_3x3_1')
            conv_33_2 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[1], scope = 'conv_3x3_2')
            conv_33_3 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[2], scope = 'conv_3x3_3')
            #
            with tf.variable_scope("image_level_features"):
                image_level_features = tf.reduce_mean(inputs, [1,2], name = 'global_average_pooling', keepdims = True)
                image_level_features = layers_lib.conv2d(image_level_features, depth, [1,1], stride = 1, scope = 'conv_1x1')
                image_level_features = tf.image.resize_bilinear(image_level_features, inputs_size, name='upsample')
            #
            net = tf.concat([conv_11, conv_33_1, conv_33_2, conv_33_3, image_level_features], axis = 3, name = 'concat')
            net = layers_lib.conv2d(net, depth, [1, 1], stride = 1, scope = 'conv_1x1_concat')
            #
            return net
示例#5
0
 def resnet_model(images, is_training, reuse=tf.AUTO_REUSE):
   with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()):
     resnet_fn = RESNET_MODELS[model_name]
     logits, _ = resnet_fn(images, num_classes, is_training=is_training,
                           reuse=reuse)
     logits = tf.reshape(logits, [-1, num_classes])
   return logits
def model(images,
          filter_type,
          filter_trainable,
          weight_decay,
          batch_size,
          is_training,
          num_classes=2):
    with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
        inputs = get_residuals(images, filter_type, filter_trainable)
        _, end_points = resnet_small(inputs,
                                     num_classes=None,
                                     is_training=is_training,
                                     global_pool=False,
                                     output_stride=None,
                                     include_root_block=False)
        net = end_points['resnet_small/block4']
        net = tf.nn.conv2d_transpose(net, tf.Variable(bilinear_upsample_weights(4,64,1024),dtype=tf.float32,name='bilinear_kernel0'), \
                                     [batch_size, tf.shape(end_points['resnet_small/block2'])[1], tf.shape(end_points['resnet_small/block2'])[2], 64], strides=[1, 4, 4, 1], padding="SAME")
        end_points['upsample1'] = net
        net = tf.nn.conv2d_transpose(net, tf.Variable(bilinear_upsample_weights(4,4,64),dtype=tf.float32,name='bilinear_kernel1'), \
                                     [batch_size, tf.shape(inputs)[1], tf.shape(inputs)[2], 4], strides=[1, 4, 4, 1], padding="SAME")
        end_points['upsample2'] = net
        net = layers.batch_norm(net,
                                activation_fn=tf.nn.relu,
                                is_training=is_training,
                                scope='post_norm')
        logits = slim.conv2d(net,
                             num_classes, [5, 5],
                             activation_fn=None,
                             normalizer_fn=None,
                             scope='logits')
        preds = tf.cast(tf.argmax(logits, 3), tf.int32)
        preds_map = tf.nn.softmax(logits)[:, :, :, 1]

        return logits, preds, preds_map, net, end_points, inputs
    def load(self, **kwargs):
        session = kwargs["session"]
        assert isinstance(session, tf.Session)

        x_input = tf.placeholder(self.x_dtype, shape=(None, ) + self.x_shape)
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            resnet_v2.resnet_v2_152(x_input,
                                    num_classes=self.n_class,
                                    is_training=False,
                                    reuse=tf.AUTO_REUSE)

        model_path = get_model_path('resnet_v2_152')
        if not os.path.exists(model_path):
            os.makedirs(model_path)
            urllib.request.urlretrieve(
                'http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz',
                os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz'),
                show_progress)

            tar = tarfile.open(
                os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz'))
            file_names = tar.getnames()
            for file_name in file_names:
                tar.extract(file_name, model_path)

        saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v2'))
        saver.restore(session, os.path.join(model_path, 'resnet_v2_152.ckpt'))
示例#8
0
文件: yolo_net.py 项目: nowgood/yolo
def yolonet(images, is_training=True):
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)
        output_depth = NUM_CLASS + 5 * BOX_PER_CELL
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            bottleneck, _ = resnet_v2.resnet_v2_50(images,
                                                   global_pool=False,
                                                   is_training=is_training)
            with arg_scope([layers.batch_norm], is_training=is_training):
                net = bottleneck
                net = layers_lib.conv2d(net,
                                        512, [1, 1],
                                        normalizer_fn=layers.batch_norm,
                                        scope='yolo_layer1')
                net = layers_lib.conv2d(net,
                                        512, [3, 3],
                                        normalizer_fn=layers.batch_norm,
                                        scope='yolo_layer2')
                net = layers_lib.conv2d(net,
                                        512, [3, 3],
                                        normalizer_fn=layers.batch_norm,
                                        scope='yolo_layer3')
                net = layers_lib.conv2d(net,
                                        output_depth, [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        scope='yolo_output')

    return net
示例#9
0
 def after_create_session(self, session, coord=None):
     if session.run(tf.train.get_or_create_global_step()) > 0:
         self.init_fn(session)
         arg_scope = resnet_v2.resnet_arg_scope()
         with slim.arg_scope(arg_scope):
             gs_init = global_step.assign(0)
             session.run(gs_init)
示例#10
0
def featureExtractor(input,
                     feature_norm,
                     model='101',
                     reuse=False,
                     scope='resnet_v2_101'):
    with tf.variable_scope(scope, reuse=reuse) as scope:
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            input = scale_RGB(input)
            if '50' in model:
                net, end_points = resnet_v2.resnet_v2_50(input,
                                                         global_pool=True,
                                                         is_training=False,
                                                         reuse=reuse)
            elif '101' in model:
                _, end_points = resnet_v2.resnet_v2_101(input,
                                                        global_pool=True,
                                                        is_training=False,
                                                        reuse=reuse)
                #f = end_points['stabNet/pathFinder/featureExtractor/resnet_v2_101/block3/unit_23/bottleneck_v2/conv1'] #(18 X 32) X (18 X 32)
                f = end_points[
                    '{}/resnet_v2_101/block4/unit_2/bottleneck_v2/conv1'.
                    format(scope.name)]  #(9 X 16) X (9 X 16)
        if feature_norm:
            f = featureL2Norm(f)

    return f
示例#11
0
def CNN(inputs):
    with tf.variable_scope("CNN"):
        # layer = slim.conv2d(inputs, 64, [8,8], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None)
        # layer [B H//2 W//4 64]
        # tf.summary.image('zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6)
        # layer = utils_nn.resNet50(layer, True, [2,1])
        # [N H//32 W 2048]
        # tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6)

        # with slim.arg_scope(inception.inception_v3_arg_scope()):
        #     with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True):
        #         layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d")

        # layer = utils_nn.resNet101(inputs, True)
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            layer, _ = resnet_v2.resnet_v2_152(inputs,
                                               None,
                                               is_training=True,
                                               global_pool=False,
                                               output_stride=16)
        # 直接将网络拉到256 [N 1 256 256]
        with tf.variable_scope("Normalize"):
            layer = slim.conv2d(layer,
                                1024, [2, 2], [2, 1],
                                normalizer_fn=slim.batch_norm,
                                activation_fn=None)
            layer = slim.conv2d(layer,
                                512, [1, 1],
                                normalizer_fn=slim.batch_norm,
                                activation_fn=None)
            layer = slim.conv2d(layer,
                                256, [1, 1],
                                normalizer_fn=slim.batch_norm,
                                activation_fn=None)
            return layer
示例#12
0
    def load(self, **kwargs):
        session = kwargs["session"]
        assert isinstance(session, tf.Session)

        x_input = tf.placeholder(tf.float32, shape=(None, ) + self.x_shape)
        with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, _ = resnet_v2.resnet_v2_50(x_input,
                                               self.n_class,
                                               is_training=False,
                                               reuse=tf.AUTO_REUSE)

        model_path = get_model_path('alp')
        url = 'http://download.tensorflow.org/models/adversarial_logit_pairing/imagenet64_alp025_2018_06_26.ckpt.tar.gz'
        fname = os.path.join(model_path, url.split('/')[-1])
        if not os.path.exists(fname):
            if not os.path.exists(model_path):
                os.makedirs(model_path)

            from six.moves import urllib
            urllib.request.urlretrieve(url, fname, show_progress)
            import tarfile
            t = tarfile.open(fname)
            t.extractall(model_path)
            print('Extracted model')

        saver = tf.train.Saver()
        saver.restore(session, fname.split('.tar.gz')[0])
def get_resnet(x_tensor, reuse, is_training, x_batch_size):
    with tf.variable_scope('resnet', reuse=reuse):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            resnet, end_points = resnet_v2.resnet_v2_50(
                x_tensor,
                global_pool=False,
                is_training=is_training,
                reuse=reuse,
                output_stride=32)
        global_pool = tf.reduce_mean(resnet, [1, 2])
        with tf.variable_scope('fc'):
            global_pool = slim.fully_connected(global_pool,
                                               2048,
                                               scope='fc/fc_1')
            global_pool = slim.fully_connected(global_pool,
                                               1024,
                                               scope='fc/fc_2')
            global_pool = slim.fully_connected(global_pool,
                                               512,
                                               scope='fc/fc_3')
            theta = output_layer(global_pool, (grid_h + 1) * (grid_w + 1) * 2)

        with tf.name_scope('gen_theta'):
            id2_loss = tf.reduce_mean(tf.abs(theta)) * id_mul
    return theta, id2_loss, id2_loss
示例#14
0
    def model(inputs, is_training):
        """Constructs the ResNet model given the inputs."""
        if data_format == 'channels_first':
            # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
            # This provides a large performance boost on GPU. See
            # https://www.tensorflow.org/performance/performance_guide#data_formats
            inputs = tf.transpose(inputs, [0, 3, 1, 2])

        with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
            logits, end_points = base_model(inputs,
                                            num_classes=None,
                                            is_training=is_training,
                                            global_pool=False,
                                            output_stride=output_stride)
                                        
        variables_to_restore = None
        if is_training:
            exclude = [base_architecture + '/logits', 'global_step']
            variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude)

        inputs_size = tf.shape(inputs)[1:3]
        net = end_points[base_architecture + '/block4']
        net = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training)
        with tf.variable_scope("upsampling_logits"):
            net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1')
            logits = tf.image.resize_bilinear(net, inputs_size, name='upsample')

        return logits, variables_to_restore
示例#15
0
    def _ConvNet2D(self, x, is_training, reuse=False):

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            f, _ = resnet_v2.resnet_v2_50(x,
                                          num_classes=None,
                                          is_training=is_training,
                                          global_pool=False,
                                          reuse=reuse)
            print("resnet.out.shape: %s" % f.get_shape())
            with tf.variable_scope("ConvNet2D", reuse=reuse):
                f = tf.reduce_mean(f, [1, 2],
                                   name='global_avg_pooling',
                                   keep_dims=True)
                z = slim.conv2d(f,
                                4096, [1, 1],
                                padding='VALID',
                                normalizer_fn=None,
                                scope='f2zfeture')
                z = slim.conv2d(z,
                                self.z_dim, [1, 1],
                                padding='VALID',
                                normalizer_fn=None,
                                scope='z_2d')

                #g_feature = tf.squeeze(g_feature, [1, 2], name='global_spatial_squeeze')

                return tf.expand_dims(z, 1)
示例#16
0
def resnet_v2_50(init_weights):
    tf_compat.reset_default_graph()
    image_size = 224
    inputs = tf_compat.random_normal([1, image_size, image_size, 3])
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, _ = resnet_v2.resnet_v2_50(inputs, 1000, is_training=False)
        return tf_compat.get_default_graph()
示例#17
0
def pyramid_pooling(input, is_training):
    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        with tf.name_scope("pyramid_pooling"):
            input_shape = input.get_shape().as_list()
            num_output_features = input_shape[-1] // LEVEL_SIZE
            with tf.name_scope("pool_1"):
                pool_bin_1 = tf.nn.avg_pool(input, [1, input_shape[1], input_shape[2], 1],
                                            [1, input_shape[1], input_shape[2], 1], padding='VALID')
                weight_1 = weight_variable([1, 1, input_shape[-1], num_output_features])

                output_1 = tf.nn.conv2d(pool_bin_1, weight_1, [1, 1, 1, 1], padding='SAME')
                output_1 = batch_norm(output_1, is_training)
                output_1 = tf.nn.relu(output_1)

                output_1 = tf.image.resize_bilinear(output_1, [input_shape[1], input_shape[2]])

            with tf.name_scope("pool_2"):
                pool_bin_2 = tf.nn.avg_pool(input, [1, input_shape[1] // 2, input_shape[2] // 2, 1],
                                            [1, input_shape[1] // 2, input_shape[2] // 2, 1], padding='VALID')

                weight_2 = weight_variable([1, 1, input_shape[-1], num_output_features])

                output_2 = tf.nn.conv2d(pool_bin_2, weight_2, [1, 1, 1, 1], padding='SAME')
                output_2 = batch_norm(output_2, is_training)
                output_2 = tf.nn.relu(output_2)

                output_2 = tf.image.resize_bilinear(output_2, [input_shape[1], input_shape[2]])

            with tf.name_scope("pool_3"):
                pool_bin_3 = tf.nn.avg_pool(input, [1, input_shape[1] // 3, input_shape[2] // 3, 1],
                                            [1, input_shape[1] // 3, input_shape[2] // 3, 1], padding='VALID')

                weight_3 = weight_variable([1, 1, input_shape[-1], num_output_features])

                output_3 = tf.nn.conv2d(pool_bin_3, weight_3, [1, 1, 1, 1], padding='SAME')
                output_3 = batch_norm(output_3, is_training)
                output_3 = tf.nn.relu(output_3)

                output_3 = tf.image.resize_bilinear(output_3, [input_shape[1], input_shape[2]])


            with tf.name_scope("pool_6"):
                pool_bin_6 = tf.nn.avg_pool(input, [1, input_shape[1] // 6, input_shape[2] // 6, 1],
                                            [1, input_shape[1] // 6, input_shape[2] // 6, 1], padding='VALID')

                weight_6 = weight_variable([1, 1, input_shape[-1], num_output_features])

                output_6 = tf.nn.conv2d(pool_bin_6, weight_6, [1, 1, 1, 1], padding='SAME')
                output_6 = batch_norm(output_6, is_training)
                output_6 = tf.nn.relu(output_6)

                output_6 = tf.image.resize_bilinear(output_6, [input_shape[1], input_shape[2]])


            input = tf.concat([input, output_1], axis=-1)
            input = tf.concat([input, output_2], axis=-1)
            input = tf.concat([input, output_3], axis=-1)
            input = tf.concat([input, output_6], axis=-1)

    return input
示例#18
0
def create_context_path(input_im):

    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        last_layer, end_points = resnet_v2.resnet_v2_101(input_im,
                                                         is_training=True,
                                                         scope='resnet_v2_101',
                                                         global_pool=False)
        frontend_scope = 'resnet_v2_101'
        init_fn = slim.assign_from_checkpoint_fn(
            model_path=os.path.join('models', 'resnet_v2_101.ckpt'),
            var_list=slim.get_model_variables('resnet_v2_101'),
            ignore_missing_vars=True)

        layer_reduced16 = end_points[frontend_scope + '/block2']
        layer_reduced32 = last_layer
        layer_arm16 = arm_module(layer_reduced16, n_filter_maps=512)
        layer_arm32 = arm_module(layer_reduced32, n_filter_maps=2048)
        layer_global_context = tf.reduce_mean(last_layer,
                                              axis=[1, 2],
                                              keepdims=True,
                                              name='global_context')

        ## Combining Context Features
        layer_context1 = tf.math.multiply(layer_arm32, layer_global_context)
        layer_context1 = layers.UpSampling2D(
            size=4, interpolation='bilinear')(layer_context1)
        layer_context2 = layers.UpSampling2D(
            size=2, interpolation='bilinear')(layer_arm16)
        context_output = tf.concat([layer_context1, layer_context2], axis=-1)

        return context_output, init_fn
示例#19
0
def PSPNet(inputs, is_training, output_stride, pre_trained_model, classes):
    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        logits, end_points = resnet_v2.resnet_v2_101(inputs, num_classes=None, is_training=is_training,
                                                     global_pool=False, output_stride=output_stride)

    if is_training:
        exclude = ['resnet_v2_101' + '/logits', 'global_step']
        variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude)
        tf.train.init_from_checkpoint(pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore})

    net = end_points['resnet_v2_101' + '/block4']
    encoder_output = pyramid_pooling(net, is_training)

    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        with tf.name_scope("auli_logits"):

            key = 'resnet_v2_101/block3'
            auxi_logits = end_points[key]

            auli_shape = auxi_logits.get_shape().as_list()
            weight_3 = weight_variable([3, 3, auli_shape[-1], auli_shape[-1] // 4])
            auxi_logits = tf.nn.conv2d(auxi_logits, weight_3, [1, 1, 1, 1], padding='SAME')
            auxi_logits = batch_norm(auxi_logits, is_training)
            auxi_logits = tf.nn.relu(auxi_logits)

            weight_1 = weight_variable([1, 1, auli_shape[-1] // 4, classes])
            bias = bias_variable([classes])
            auxi_logits = tf.nn.conv2d(auxi_logits, weight_1, [1, 1, 1, 1], padding='SAME') + bias

            auxi_logits = tf.image.resize_bilinear(auxi_logits, tf.shape(inputs)[1:3])

        with tf.name_scope("segmentation"):
            encoder_output_shape = encoder_output.get_shape().as_list()
            weight_3 = weight_variable([3, 3, encoder_output_shape[-1], encoder_output_shape[-1] // 4])
            net = tf.nn.conv2d(encoder_output, weight_3, [1, 1, 1, 1], padding='SAME')
            net = batch_norm(net, is_training)
            net = tf.nn.relu(net)

            weight_1 = weight_variable([1, 1, encoder_output_shape[-1] // 4, classes])
            bias = bias_variable([classes])
            net = tf.nn.conv2d(net, weight_1, [1, 1, 1, 1], padding='SAME') + bias

            logits = tf.image.resize_bilinear(net, tf.shape(inputs)[1:3])



    return auxi_logits, logits
示例#20
0
def main(_):
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001
    itr = 30
    a = FLAGS.input_dir
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        img_resize_tensor = tf.placeholder(tf.int32, [2])
        x_input_resize = tf.image.resize_images(
            x_input,
            img_resize_tensor,
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

        shape_tensor = tf.placeholder(tf.int32, [3])
        padded_input = padding_layer_iyswim(x_input_resize, shape_tensor)
        # 330 is the last value to keep 8*8 output, 362 is the last value to keep 9*9 output, stride = 32
        padded_input.set_shape(
            (FLAGS.batch_size, FLAGS.image_resize, FLAGS.image_resize, 3))
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(x_input,
                                                     num_classes=num_classes,
                                                     is_training=False)
        predicted_labels = tf.argmax(end_points['predictions'], 3)
        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            with tf.gfile.Open(FLAGS.output_file, 'w') as out_file:
                for filenames, images in load_images(FLAGS.input_dir,
                                                     batch_shape):
                    if np.random.randint(0, 2, size=1) == 1:
                        images = images[:, :, ::-1, :]
                    resize_shape_ = np.random.randint(310, 331)
                    labels = sess.run(
                        predicted_labels,
                        feed_dict={
                            x_input:
                            images,
                            img_resize_tensor: [resize_shape_] * 2,
                            shape_tensor:
                            np.array([
                                random.randint(
                                    0, FLAGS.image_resize - resize_shape_),
                                random.randint(
                                    0, FLAGS.image_resize - resize_shape_),
                                FLAGS.image_resize
                            ])
                        })
                    labels = labels.flatten()
                    for filename, label in zip(filenames, labels):
                        out_file.write('{0},{1}\n'.format(filename, label))
示例#21
0
def RES(inputs, seq_len, reuse = False):
    with tf.variable_scope("OCR", reuse=reuse):
        print("inputs shape:",inputs.shape)
        # layer = utils_nn.resNet101V2(inputs, True)    # N H W/16 2048
        # layer = utils_nn.resNet50(inputs, True, [2,1]) # (N H/16 W 2048)

        # with slim.arg_scope(inception.inception_v3_arg_scope()):
        #     with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True):
        #         layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d")

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            layer, _ = resnet_v2.resnet_v2_152(inputs,
                                                None,
                                                is_training=True,
                                                global_pool=False,
                                                output_stride=16) 
        print("ResNet shape:",layer.shape)

        # 直接将网络拉到256 [N 1 512 256]
        with tf.variable_scope("Normalize"):
            layer = slim.conv2d(layer, 1024, [2,2], [2,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
            layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
            layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 

        # layer = utils_nn.resNet101(inputs, True)
                    
        # with tf.variable_scope("ResNext"):
        #     layer = slim.conv2d(inputs, 64, [2,4], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None) 
        #     tf.summary.image('1_2_4_zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6)
        #     layer = utils_nn.resNext50(layer, True, [2,1]) # (N H/16 W 2048)
        #     tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6)

        temp_layer = layer
        # with tf.variable_scope("Normalize"):
        #     layer = slim.conv2d(layer, 1024, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
        #     layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
        #     layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
            # layer = slim.conv2d(layer, 128, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) 
       
        # 将图像高度和宽度 // [2, 4]
        # layer = slim.avg_pool2d(layer, [2, 4], [2, 4]) 
        print("ResNet shape:",layer.shape)

        # 增加坐标信息,增加的个数为 embedd_size
        # max_width_height, embedd_size
        # max_width_height 为缩放后的 w 的最大宽度,实际上的最大图片宽度为 max_width_height * 4
        with tf.variable_scope("Coordinates"):
            max_width_height = MAX_IMAGE_WIDTH//8
            embedd_size = 64
            layer = Coordinates(layer, max_width_height, embedd_size)
            print("Coordinates shape:",layer.shape)

        with tf.variable_scope("LSTM"):
            layer = tf.squeeze(layer, squeeze_dims=1)
            print("SEQ shape:",layer.shape)
            layer = LSTM(layer, 256+embedd_size, seq_len)    # N, W*H, 256
            print("lstm shape:",layer.shape)

        return layer, temp_layer
示例#22
0
    def built_network(self, inputs, is_training, dropout_rate):

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(inputs,
                                                     self.opt.num_classes,
                                                     is_training=is_training)
        net = tf.squeeze(net, axis=[1, 2])
        return net
 def resnet_model(images, is_training, reuse=tf.AUTO_REUSE):
     with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()):
         resnet_fn = resnet_v2.resnet_v2_50
         logits, _ = resnet_fn(images,
                               num_classes,
                               is_training=is_training,
                               reuse=reuse)
         logits = tf.reshape(logits, [-1, num_classes])
     return logits
示例#24
0
def deeplab_v3_plus(inputs, is_training, output_stride, pre_trained_model):

    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        logits, end_points = resnet_v2.resnet_v2_101(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride)

    if is_training:
        exclude = ['resnet_v2_101' + '/logits', 'global_step']
        variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude)
        tf.train.init_from_checkpoint(pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore})

    net = end_points['resnet_v2_101' + '/block4']
    encoder_output = aspp(net, output_stride, is_training)

    with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)):
        with tf.name_scope('low_level_features'):
            low_level_features = end_points['resnet_v2_101' + '/block1/unit_3/bottleneck_v2/conv1']
            in_channels = low_level_features.get_shape().as_list()[-1]
            low_level_shape = tf.shape(low_level_features)
            weight_1x1_low_level = weight_variable([1, 1, in_channels, 48], name='weight_1x1_low_level')
            conv_1x1_low_level = tf.nn.conv2d(low_level_features, weight_1x1_low_level, [1, 1, 1, 1], padding='SAME', name='conv_1x1_low_level')
            conv_1x1_low_level = tf.nn.relu(batch_norm(conv_1x1_low_level, is_training), name='relu_1x1_low_level')

            low_level_features_size = low_level_shape[1:3]

        with tf.name_scope("upsamling_logits"):
            net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1')

            net = tf.concat([net, low_level_features], axis=-1, name='concat')
            weight_3x3_upsamle_1 = weight_variable([3, 3, net.get_shape().as_list()[-1], 256], name='weight_3x3_upsamle_1')
            weight_3x3_upsamle_2 = weight_variable([3, 3, 256, 256], name='weight_3x3_upsamle_2')
            weight_3x3_upsamle_3 = weight_variable([1, 1, 256, CLASSES], name='weight_3x3_upsamle_3')  # => weight_1x1_upsamle_3
            bias = bias_variable([CLASSES], name='softmax_bias')

            net = tf.nn.conv2d(net, weight_3x3_upsamle_1, [1, 1, 1, 1], padding='SAME', name='conv_3x3_upsamle_1')
            net = tf.nn.relu(batch_norm(net, is_training), name='conv_3x3_relu_1')
            net = tf.nn.conv2d(net, weight_3x3_upsamle_2, [1, 1, 1, 1], padding='SAME', name='conv_3x3_upsamle_2')
            net = tf.nn.relu(batch_norm(net, is_training), name='conv_3x3_relu_2')
            net = tf.nn.conv2d(net, weight_3x3_upsamle_3, [1, 1, 1, 1], padding='SAME', name='conv_1x1_upsamle_3') + bias

            logits = tf.image.resize_bilinear(net, tf.shape(inputs)[1:3], name='upsample_2')


    return logits
示例#25
0
 def build(self):
     with slim.arg_scope(resnet_v2.resnet_arg_scope()):
         logits, self.endpoints = resnet_v2.resnet_v2_50(
             self.inputs['images'],
             num_classes=self.num_classes,
             is_training=self.is_training)
     self.outputs['logits'] = tf.reshape(logits, [-1, self.num_classes])
     self.outputs['argmax'] = tf.argmax(self.outputs['logits'],
                                        axis=1,
                                        name='output/predict')
示例#26
0
    def built_network(self, inputs1, inputs2, is_training=False):
        inputs = tf.concat([inputs1, inputs2], axis=0)
        with tf.variable_scope("Seg"):
            with tf.contrib.slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=_BATCH_NORM_DECAY)):
                logits, end_points = resnet_v2.resnet_v2_50(
                    inputs, is_training=is_training)

        return end_points
    def atrous_spatial_pyramid_pooling(self, inputs, output_stride, batch_norm_decay, is_training, depth=256):
        """Atrous Spatial Pyramid Pooling.

        Args:
        inputs: A tensor of size [batch, height, width, channels].
        output_stride: The ResNet unit's stride. Determines the rates for atrous convolution.
            the rates are (6, 12, 18) when the stride is 16, and doubled when 8.
        batch_norm_decay: The moving average decay when estimating layer activation
            statistics in batch normalization.
        is_training: A boolean denoting whether the input is for training.
        depth: The depth of the ResNet unit output.

        Returns:
        The atrous spatial pyramid pooling output.
        """
        with tf.variable_scope("aspp"):
            if output_stride not in [8, 16]:
                raise ValueError('output_stride must be either 8 or 16.')

            atrous_rates = [6, 12, 18]
            if output_stride == 8:
                atrous_rates = [2*rate for rate in atrous_rates]

            with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
                with arg_scope([layers.batch_norm], is_training=is_training):
                    inputs_size = tf.shape(inputs)[1:3]
                    # (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16.
                    # the rates are doubled when output stride = 8.
                    conv_1x1 = layers_lib.conv2d(
                        inputs, depth, [1, 1], stride=1, scope="conv_1x1")
                    conv_3x3_1 = layers_lib.conv2d(
                        inputs, depth, [3, 3], stride=1, rate=atrous_rates[0], scope='conv_3x3_1')
                    conv_3x3_2 = layers_lib.conv2d(
                        inputs, depth, [3, 3], stride=1, rate=atrous_rates[1], scope='conv_3x3_2')
                    conv_3x3_3 = layers_lib.conv2d(
                        inputs, depth, [3, 3], stride=1, rate=atrous_rates[2], scope='conv_3x3_3')

                    # (b) the image-level features
                    with tf.variable_scope("image_level_features"):
                        # global average pooling
                        image_level_features = tf.reduce_mean(
                            inputs, [1, 2], name='global_average_pooling', keepdims=True)
                        # 1x1 convolution with 256 filters( and batch normalization)
                        image_level_features = layers_lib.conv2d(image_level_features, depth, [
                                                                1, 1], stride=1, scope='conv_1x1')
                        # bilinearly upsample features
                        image_level_features = tf.image.resize_bilinear(
                            image_level_features, inputs_size, name='upsample')

                    net = tf.concat([conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3,
                                    image_level_features], axis=3, name='concat')
                    net = layers_lib.conv2d(
                        net, depth, [1, 1], stride=1, scope='conv_1x1_concat')

                    return net
示例#28
0
 def get_backbone(self,features):
     if self.flags.model_variant.startswith('xception'):
         assert False,'not implement'
     elif self.flags.model_variant=='resnet_v2_50':
         # inputs has shape [batch, 513, 513, 3]
         with slim.arg_scope(resnet_v2.resnet_arg_scope()):
             net, end_points = resnet_v2.resnet_v2_50(features,
                                             self.num_classes,
                                             is_training=False,
                                             global_pool=False,
                                             output_stride=self.output_stride)
     elif self.flags.model_variant=='resnet_v1_50':
         # The key difference of the full preactivation 'v2' variant compared to the
         # 'v1' variant in [1] is the use of batch normalization before every weight layer.
         with slim.arg_scope(resnet_v1.resnet_arg_scope()):
             net, end_points = resnet_v1.resnet_v1_50(features,
                                             self.num_classes,
                                             is_training=False,
                                             global_pool=False,
                                             output_stride=self.output_stride)
     elif self.flags.model_variant=='resnet_v2_101':
         # inputs has shape [batch, 513, 513, 3]
         with slim.arg_scope(resnet_v2.resnet_arg_scope()):
             net, end_points = resnet_v2.resnet_v2_101(features,
                                             self.num_classes,
                                             is_training=False,
                                             global_pool=False,
                                             output_stride=self.output_stride)
     elif self.flags.model_variant=='resnet_v1_101':
         # The key difference of the full preactivation 'v2' variant compared to the
         # 'v1' variant in [1] is the use of batch normalization before every weight layer.
         with slim.arg_scope(resnet_v1.resnet_arg_scope()):
             net, end_points = resnet_v1.resnet_v1_101(features,
                                             self.num_classes,
                                             is_training=False,
                                             global_pool=False,
                                             output_stride=self.output_stride)
     else:
         assert False,'not implement'
         
     print(end_points.keys())
     print(net)
示例#29
0
 def resnet_model(images, is_training, reuse=tf.AUTO_REUSE):
     with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()):
         resnet_fn = RESNET_MODELS[model_name]
         logits, _ = resnet_fn(images,
                               num_classes,
                               is_training=is_training,
                               reuse=reuse)
         logits = tf.reshape(logits, [
             -1, num_classes
         ])  # from [bs x 1 x 1 x num_classes] to [bs x num_classes]
     return logits
    def __init__(self, inputs, true_labels, is_train=False, num_classes=None):

        self.true_labels = true_labels
        self.NUM_CLASSES = num_classes
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            self.output, self.features = resnet_v2.resnet_v2_50(
                inputs=inputs, num_classes=None, is_training=False)
            self.classifier, _ = cnn.fc(input=self.forward_pass(),
                                        num_outputs=self.NUM_CLASSES,
                                        use_relu=False,
                                        name='classifier')
示例#31
0
def DeepLabNet(input_batch, is_training, num_classes, output_stride = 16, batch_norm_decay = 0.9997, backbone = 'resnet_v2_101'):
    #Use channels_first to boost on GPU
    #Deeplab V3+ with resnet as backbone
    inputs_size = tf.shape(input_batch)[1:3]
    with tf.variable_scope('deeplab'):
        #ResNet as the encoder
        with tf.variable_scope('encoder'):
            if backbone == 'resnet_v2_50':
                base_model = resnet_v2.resnet_v2_50
            else:
                base_model = resnet_v2.resnet_v2_101
            #
            #Implement tensorflow resnetV2
            with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)):
                logits, end_points = base_model(input_batch,
                                                num_classes=None,
                                                is_training=is_training,
                                                global_pool=False,
                                                output_stride=output_stride)
        #ASPP in the middle layers
        with tf.variable_scope('aspp'):
            net = end_points['deeplab/encoder/' + backbone + '/block4']
            encoder_output = ASPP(net, output_stride, batch_norm_decay, is_training)
        #
        #Decoder
        with tf.variable_scope('decoder'):
            with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay = batch_norm_decay)):
                with arg_scope([layers.batch_norm], is_training=is_training):
                    with tf.variable_scope("low_level_features"):
                        low_level_features = end_points['deeplab/encoder/' + backbone + '/block1/unit_3/bottleneck_v2/conv1']
                        low_level_features = layers_lib.conv2d(low_level_features, 48, [1,1], stride = 1, scope = 'conv_1x1')
                        low_level_features_size = tf.shape(low_level_features)[1:3]
                    with tf.variable_scope("upsampling_logits"):
                        net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name = 'upsample_1')
                        net = tf.concat([net, low_level_features], axis = 3, name = 'concat')
                        net = layers_lib.conv2d(net, 256, [3,3], stride = 1, scope = 'conv_3x3_1')
                        net = layers_lib.conv2d(net, 256, [3,3], stride = 1, scope = 'conv_3x3_2')
                        net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn = None, normalizer_fn = None, scope = 'conv_1x1')
                        logits = tf.image.resize_bilinear(net, inputs_size, name = 'upsample_2')
    #
    return logits
示例#32
0
  def build(self, images):
    """Builds a ResNet50 embedder for the input images.

    It assumes that the range of the pixel values in the images tensor is
      [0,255] and should be castable to tf.uint8.

    Args:
      images: a tensor that contains the input images which has the shape of
          NxTxHxWx3 where N is the batch size, T is the maximum length of the
          sequence, H and W are the height and width of the images and C is the
          number of channels.
    Returns:
      The embedding of the input image with the shape of NxTxL where L is the
        embedding size of the output.

    Raises:
      ValueError: if the shape of the input does not agree with the expected
      shape explained in the Args section.
    """
    shape = images.get_shape().as_list()
    if len(shape) != 5:
      raise ValueError(
          'The tensor shape should have 5 elements, {} is provided'.format(
              len(shape)))
    if shape[4] != 3:
      raise ValueError('Three channels are expected for the input image')

    images = tf.cast(images, tf.uint8)
    images = tf.reshape(images,
                        [shape[0] * shape[1], shape[2], shape[3], shape[4]])
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):

      def preprocess_fn(x):
        x = tf.expand_dims(x, 0)
        x = tf.image.resize_bilinear(x, [299, 299],
                                       align_corners=False)
        return(tf.squeeze(x, [0]))

      images = tf.map_fn(preprocess_fn, images, dtype=tf.float32)

      net, _ = resnet_v2.resnet_v2_50(
          images, is_training=False, global_pool=True)
      output = tf.reshape(net, [shape[0], shape[1], -1])
      return output