示例#1
0
文件: network.py 项目: xiaofengShi/CV
def net_fatory(net_name, inputs, train_model, FC=False):
    if net_name == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net, end_points = vgg.vgg_16(inputs,
                                         num_classes=None,
                                         is_training=train_model,
                                         fc_flage=FC)
    elif net_name == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            net, end_points = vgg.vgg_19(inputs,
                                         num_classes=None,
                                         is_training=train_model,
                                         fc_flage=FC)
    elif net_name == 'resnet_v2_50':
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(inputs=inputs,
                                                     num_classes=None,
                                                     is_training=train_model,
                                                     global_pool=False)
    elif net_name == 'resnet_v2_152':
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_152(inputs=inputs,
                                                      num_classes=None,
                                                      is_training=train_model,
                                                      global_pool=False)

    return net, end_points
示例#2
0
def perceptual_loss(real, fake, network="vgg_16"):
    if params.loss.vgg_w <= 0.0:
        return 0.0

    real = real * params.learning.image_std + params.learning.image_mean
    fake = fake * params.learning.image_std + params.learning.image_mean
    real = utils.perceptual_loss_image_preprocess(real)
    fake = utils.perceptual_loss_image_preprocess(fake)
    image = tf.concat([real, fake], axis=0)

    with tf.variable_scope("perceptual_loss"):
        if network == "vgg_16":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                conv1, conv2, conv3 = vgg.vgg_16(image)
        elif network == "vgg_19":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                conv1, conv2, conv3 = vgg.vgg_19(image)
        else:
            raise NotImplementedError("")

        losses = []
        for i, features in enumerate([conv1, conv2, conv3]):
            real, fake = tf.split(features, 2, 0)
            losses.append(params.loss.perceptual_loss.weights[i] *
                          tf.reduce_mean(tf.square(real - fake)))

        return losses[0] + losses[1] + losses[2]
 def arch_vgg16(self,
                X,
                num_classes,
                dropout_keep_prob=0.8,
                is_train=False,
                embedding_size=128):
     arg_scope = vgg_arg_scope()
     with slim.arg_scope(arg_scope):
         net_vis, end_points, _ = vgg_16_conv(X, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1,
                         padding='SAME'):
         with tf.variable_scope('Logits_out'):
             net_vis = slim.avg_pool2d(net_vis,
                                       net_vis.get_shape()[1:3],
                                       padding='VALID',
                                       scope='AvgPool_1a_out')
             # 1 x 1 x 512
             net_vis = slim.dropout(net_vis,
                                    dropout_keep_prob,
                                    scope='Dropout_1b_out')
             net_vis = slim.flatten(net_vis, scope='PreLogitsFlatten_out')
             net_vis = slim.fully_connected(net_vis,
                                            embedding_size,
                                            activation_fn=tf.nn.relu,
                                            scope='Logits_out0')
             net = slim.fully_connected(net_vis,
                                        num_classes,
                                        activation_fn=None,
                                        scope='Logits_out1')
     return net, net_vis
示例#4
0
def Eval(x_img_224, x_img_299, y):

    input_image = x_img_224 - tf.reshape(tf.constant([123.68, 116.78, 103.94]),
                                         [1, 1, 1, 3])

    with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
        logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50(
            input_image,
            num_classes=110,
            is_training=False,
            scope='resnet_v1_50',
            reuse=tf.AUTO_REUSE)
        end_points_res_v1_50['logits'] = tf.squeeze(
            end_points_res_v1_50['resnet_v1_50/logits'], [1, 2])
        end_points_res_v1_50['probs'] = tf.nn.softmax(
            end_points_res_v1_50['logits'])
        res_label = tf.argmax(end_points_res_v1_50['probs'][0], -1)
        y_r = end_points_res_v1_50['probs'][0][y[0]]

    with slim.arg_scope(vgg.vgg_arg_scope()) as scope:
        logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(input_image,
                                                      num_classes=110,
                                                      is_training=False,
                                                      scope='vgg_16',
                                                      reuse=tf.AUTO_REUSE)
        end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8']
        end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits'])
        vgg_label = tf.argmax(end_points_vgg_16['probs'][0], -1)
        y_v = end_points_vgg_16['probs'][0][y[0]]

    return res_label, vgg_label, y_r, y_v
示例#5
0
    def getCNNFeatures(self,
                       input_tensor,
                       fc_dim,
                       out_dim,
                       fc_initializer,
                       use_full=False):
        graph = tf.Graph()

        with graph.as_default():

            with slim.arg_scope(vgg.vgg_arg_scope()):
                logits, end_points = vgg.vgg_16(input_tensor,
                                                is_training=False)
        model_path = os.path.join(self.checkpoints_dir, self.ckpt_name)
        variables_to_restore = tf.contrib.framework.get_variables_to_restore()
        variables_to_restore = [
            var for var in variables_to_restore if 'vgg_16' in var.name
        ]  # only use vgg things!
        init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
            model_path, variables_to_restore)
        pool_result = end_points['vgg_16/pool5']
        flattened = tf.reshape(pool_result, [-1, fc_dim])
        with vs.variable_scope('fc_vgg'):
            W = vs.get_variable("W", [fc_dim, out_dim],
                                initializer=fc_initializer)
            b = vs.get_variable("b", [out_dim], initializer=fc_initializer)
            output = tf.nn.relu(tf.matmul(flattened, W) + b)
        return init_fn, output
示例#6
0
    def style_loss(self, styled_vgg, style_image, layer_names, style_weight,
                   sess):
        style_image_placeholder = tf.placeholder('float',
                                                 shape=style_image.shape)

        with slim.arg_scope(vgg.vgg_arg_scope(reuse=True)):
            _, style_image_vgg = vgg.vgg_19(style_image_placeholder,
                                            num_classes=0,
                                            is_training=False)

        style_loss = 0
        preprocessed_style_image = style_image - np.array([
            ctx.params.R_MEAN, ctx.params.G_MEAN, ctx.params.B_MEAN
        ]).reshape([1, 1, 1, 3])

        for layer_name in layer_names:
            style_image_gram = self.gram_matrix_for_style_image(
                style_image_vgg[layer_name], style_image_placeholder,
                preprocessed_style_image, sess)

            input_image_gram = self.gram_matrix_for_input_image(
                styled_vgg[layer_name])

            style_loss += (2 *
                           tf.nn.l2_loss(input_image_gram -
                                         np.expand_dims(style_image_gram, 0)) /
                           style_image_gram.size)
        return style_weight * style_loss
示例#7
0
文件: utils.py 项目: zxhuang97/AAMS
def extract_image_features(inputs, reuse=True):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points = vgg.vgg_19(inputs,
                                   spatial_squeeze=False,
                                   is_training=False,
                                   reuse=reuse)
    return end_points
示例#8
0
 def arch_multi_vgg16(self,
                      X1,
                      X2,
                      X3,
                      num_classes,
                      dropout_keep_prob=0.8,
                      is_train=False):
     arg_scope = vgg_arg_scope()
     with slim.arg_scope(arg_scope):
         with tf.variable_scope('arch_multi_vgg16_1'):
             net_vis1, end_points1 = vgg_16(X1, is_training=is_train)
         with tf.variable_scope('arch_multi_vgg16_2'):
             net_vis2, end_points2 = vgg_16(X2, is_training=is_train)
         with tf.variable_scope('arch_multi_vgg16_3'):
             net_vis3, end_points3 = vgg_16(X2, is_training=is_train)
         # net_vis3, end_points3 = alexnet_v2(X3, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1,
                         padding='SAME'):
         with tf.variable_scope('Logits_out'):
             net_vis = tf.concat([net_vis1, net_vis2, net_vis3], 3)
             net = slim.conv2d(net_vis,
                               num_classes, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='fc8')
             net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
     return net, net_vis
def inference(X_tensor,number_of_classes,is_training_placeholder):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(inputs = X_tensor,
                                        num_classes = number_of_classes,
                                        is_training = is_training_placeholder,
                                        fc_conv_padding = 'VALID')
    return logits, end_points
def build_graph():    
    image_placeholder = tf.placeholder(tf.float32, shape=[None, None, 3])
    processed_image = vgg_preprocessing.preprocess_image(image_placeholder, image_size, image_size, is_training=False)
    processed_images  = tf.expand_dims(processed_image, 0)
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(processed_images, num_classes=1000, is_training=False)
    probabilities = tf.nn.softmax(logits)
    return probabilities, image_placeholder, end_points
示例#11
0
 def arch_vgg16(self, X, num_classes, dropout_keep_prob=0.8, is_train=False):
     arg_scope = vgg_arg_scope()
     with slim.arg_scope(arg_scope):
         net_vis, end_points = vgg_16(X, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'):
         with tf.variable_scope('Logits_out'):
             net = slim.conv2d(net_vis, num_classes, [1, 1],activation_fn=None,normalizer_fn=None,scope='fc8')
             net = tf.squeeze(net,[1,2], name='fc8/squeezed')
     return net, net_vis
def tower_loss(data_tensor, label_tensor, num_classes, train_mode):

    #vgg = tf.contrib.slim.nets.vgg
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)):
        logits, endpoints_dict = vgg.vgg_16(data_tensor, num_classes=num_classes, is_training=train_mode,dropout_keep_prob=args.dropout_keep_prob)


    loss=tf.losses.sparse_softmax_cross_entropy(labels=label_tensor, logits=logits)

    return loss, logits
 def arch_vgg16_multi_conv(self,
                           X,
                           num_classes,
                           dropout_keep_prob=0.8,
                           is_train=False,
                           embedding_size=64):
     arg_scope = vgg_arg_scope()
     with slim.arg_scope(arg_scope):
         _, end_points, net_c = vgg_16_conv(X, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1,
                         padding='SAME'):
         with tf.variable_scope('Logits_out'):
             #net_1 = slim.max_pool2d(net_c[-5], [32,32], stride=32, padding='VALID', scope='net_c_1')
             #net_1 = slim.conv2d(net_1, net_1.get_shape()[3], [1, 1], scope='net_1')
             #net_2 = slim.max_pool2d(net_c[-4], [16,16], stride=16, padding='VALID', scope='net_c_1')
             #net_2 = slim.conv2d(net_2, net_2.get_shape()[3], [1, 1], scope='net_2')
             #net_3 = slim.max_pool2d(net_c[-3], [8,8], stride=8, padding='VALID', scope='net_c_1')
             #net_3 = slim.conv2d(net_3, net_3.get_shape()[3], [1, 1], scope='net_3')
             net_4 = slim.max_pool2d(net_c[-2], [4, 4],
                                     stride=4,
                                     padding='VALID',
                                     scope='net_c_1')
             net_4 = slim.conv2d(net_4,
                                 net_4.get_shape()[3], [1, 1],
                                 scope='net_4')
             net_5 = slim.max_pool2d(net_c[-1], [2, 2],
                                     stride=2,
                                     padding='VALID',
                                     scope='net_c_1')
             net_5 = slim.conv2d(net_5,
                                 net_5.get_shape()[3], [1, 1],
                                 scope='net_5')
             # net_vis = tf.concat([net_1, net_2, net_3, net_4, net_5],3)
             net_vis = tf.concat([net_4, net_5], 3)
             net_vis = slim.avg_pool2d(net_vis,
                                       net_vis.get_shape()[1:3],
                                       padding='VALID',
                                       scope='AvgPool_1a_out')
             # 1 x 1 x 512
             net_vis = slim.dropout(net_vis,
                                    dropout_keep_prob,
                                    scope='Dropout_1b_out')
             net_vis = slim.flatten(net_vis, scope='PreLogitsFlatten_out')
             net_vis = slim.fully_connected(net_vis,
                                            embedding_size,
                                            activation_fn=tf.nn.relu,
                                            scope='Logits_out0')
             net = slim.fully_connected(net_vis,
                                        num_classes,
                                        activation_fn=None,
                                        scope='Logits_out1')
     return net, net_vis
示例#14
0
def CRAFT_net(inputs, is_trianing=True, reuse=None, weight_decay=0.9):
    with slim.arg_scope(vgg_arg_scope()):
        vgg_res, end_points = vgg_16(inputs)
    with tf.variable_scope('vgg_16', [end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_trianing
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['vgg_16/conv2/conv2_2'],
                end_points['vgg_16/conv3/conv3_3'],
                end_points['vgg_16/conv4/conv4_3'],
                end_points['vgg_16/conv5/conv5_3']
            ]
            net = f[3]
            # VGG end
            net = slim.max_pool2d(net, [3, 3],
                                  stride=1,
                                  padding='SAME',
                                  scope='pool5')  # w/16 512
            net = arous_conv(net, 3, 3, 1024, 6,
                             name='arous_conv')  # w/16 1024
            net = slim.conv2d(net, 1024, [1, 1], padding='SAME',
                              scope='conv6')  # w/16 1024

            # U-net start
            net = tf.concat([net, f[3]], axis=3)  # w/16 1024 + 512
            net = upconvBlock(net, 512, 256)  # w/16 256
            net = upsample(net, (64, 64))
            net = tf.concat([net, f[2]], axis=3)  # w/8 256 + 512
            net = upconvBlock(net, 256, 128)  # w/8 128
            net = upsample(net, (128, 128))
            net = tf.concat([net, f[1]], axis=3)  # w/4 128 + 256
            net = upconvBlock(net, 128, 64)  # w/4 64
            net = upsample(net, (256, 256))
            net = tf.concat([net, f[0]], axis=3)  # w/2 64 + 128
            net = upconvBlock(net, 64, 32)  # w/2 32
            # U-net end

            net = slim.repeat(net, 2, slim.conv2d, 32, [3, 3])  # w/2 32
            net = slim.conv2d(net, 16, [3, 3], padding='SAME')  # w/2 16
            net = slim.conv2d(net, 16, [1, 1], padding='SAME')  # w/2 16
            net = slim.conv2d(net, 2, [1, 1], padding='SAME')  # w/2 2
            return net, end_points
    def build_model(self):
        # get content_img, style_img and define gen_img
        if content_input is not None:
            self.content_path = content_input
        if style_input is not None:
            self.style_path = style_input

        content_img, content_shape = utils.load_content_img(self.content_path)
        style_img = utils.load_style_img(self.style_path, content_shape)
        content_img_shape = content_img.shape
        gen_img = tf.Variable(tf.random_normal(content_img_shape) * 0.256)

        with slim.arg_scope(vgg.vgg_arg_scope()):
            f1, f2, f3, f4, exclude = vgg.vgg_16(
                tf.concat([gen_img, content_img, style_img], axis=0))

            # calculate content_loss and style_loss
            content_loss = utils.cal_content_loss(f3)
            style_loss = utils.cal_style_loss(f1, f2, f3, f4)

            # load vgg model
            vgg_model_path = VGG_MODEL_PATH
            vgg_vars = slim.get_variables_to_restore(include=['vgg_16'],
                                                     exclude=exclude)
            init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars)
            init_fn(self.sess)
            print('vgg_16 weights load done')

            self.gen_img = gen_img
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.content_loss = content_loss
            self.style_loss = style_loss * W_STYLE
            # the total loss
            self.loss = self.content_loss + self.style_loss

            # starter_learning_rate = 1e1
            # global_step = tf.train.get_global_step()
            # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps=500,
            #                                            decay_rate=0.98,
            #                                            staircase=True)
            self.opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
                self.loss, global_step=self.global_step, var_list=gen_img)

        all_var = tf.global_variables()
        init_var = [v for v in all_var if 'vgg_16' not in v.name]
        init = tf.variables_initializer(var_list=init_var)
        self.sess.run(init)

        self.save = tf.train.Saver()
    def vgg_encoding(self, processed_images, is_training, reuse=False):

        with slim.arg_scope(vgg.vgg_arg_scope()):

            fc7 = vgg.vgg_16(processed_images,
                             num_classes=self.no_classes,
                             is_training=is_training,
                             spatial_squeeze=False,
                             fc_conv_padding='VALID',
                             reuse=reuse,
                             return_fc7=True,
                             fc7_size=self.fc7_size)

            return fc7
示例#17
0
def vgg_16(inputs, no_fc=False):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        net, end_points = vgg.vgg_16(inputs,
                                     None,
                                     is_training=False,
                                     spatial_squeeze=False,
                                     fc_conv_padding='SAME',
                                     no_fc=no_fc)

    if no_fc:
        return end_points['vgg_16/pool5'], end_points[
            'vgg_16/pool4'], end_points['vgg_16/pool3']
    else:
        return net, end_points['vgg_16/pool4'], end_points['vgg_16/pool3']
    def build_model(self):
        # get content_img, style_img and define gen_img
        if content_input is not None:
            self.content_path = content_input
        if style_input is not None:
            self.style_path = style_input

        content_img, content_shape = utils.load_content_img(self.content_path)
        style_img = utils.load_style_img(self.style_path, content_shape)
        content_img_shape = content_img.shape
        gen_img = tf.Variable(tf.random_normal(content_img_shape) * 0.256)

        with slim.arg_scope(vgg.vgg_arg_scope()):
            f1, f2, f3, f4, exclude = vgg.vgg_16(tf.concat([gen_img, content_img, style_img], axis=0))

            # calculate content_loss and style_loss
            content_loss = utils.cal_content_loss(f3)
            style_loss = utils.cal_style_loss(f1, f2, f3, f4)

            # load vgg model
            vgg_model_path = VGG_MODEL_PATH
            vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude)
            init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars)
            init_fn(self.sess)
            print('vgg_16 weights load done')

            self.gen_img = gen_img
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            self.content_loss = content_loss
            self.style_loss = style_loss * W_STYLE
            # the total loss
            self.loss = self.content_loss + self.style_loss

            # starter_learning_rate = 1e1
            # global_step = tf.train.get_global_step()
            # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps=500,
            #                                            decay_rate=0.98,
            #                                            staircase=True)
            self.opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.loss, global_step=self.global_step,
                                                                      var_list=gen_img)

        all_var = tf.global_variables()
        init_var = [v for v in all_var if 'vgg_16' not in v.name]
        init = tf.variables_initializer(var_list=init_var)
        self.sess.run(init)

        self.save = tf.train.Saver()
示例#19
0
    def arch_multi_vgg16_conv(self,
                              X1,
                              X2,
                              X3,
                              num_classes,
                              dropout_keep_prob=0.8,
                              is_train=False):
        arg_scope = vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            with tf.variable_scope('arch_multi_vgg16_conv_1'):
                net_vis1, end_points1 = vgg_16_conv(X1, is_training=is_train)
            with tf.variable_scope('arch_multi_vgg16_conv_2'):
                net_vis2, end_points2 = vgg_16_conv(X2, is_training=is_train)
            with tf.variable_scope('arch_multi_vgg16_conv_3'):
                net_vis3, end_points3 = vgg_16_conv(X3, is_training=is_train)
            # net_vis3, end_points3 = alexnet_v2(X3, is_training=is_train)
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            with tf.variable_scope('Logits_out'):
                net_vis1 = slim.avg_pool2d(net_vis1,
                                           net_vis1.get_shape()[1:3],
                                           padding='VALID',
                                           scope='AvgPool_1a_out')
                net_vis2 = slim.avg_pool2d(net_vis2,
                                           net_vis2.get_shape()[1:3],
                                           padding='VALID',
                                           scope='AvgPool_2a_out')
                net_vis3 = slim.avg_pool2d(net_vis3,
                                           net_vis3.get_shape()[1:3],
                                           padding='VALID',
                                           scope='AvgPool_3a_out')
                net_vis = tf.concat([net_vis1, net_vis2, net_vis3], 3)
                # 加入一个全连接
                # net = slim.flatten(net_vis, scope='PreLogitsFlatten_out')
                # net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu, scope='Logits_out0')
                # net = slim.fully_connected(net, num_classes, activation_fn=None,scope='Logits_out1')

                net = slim.conv2d(net_vis,
                                  num_classes, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='fc8')
                net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        return net, net_vis
示例#20
0
def vgg16_fcn_net(image_tensor,
                  number_of_classes,
                  is_training=True,
                  upsample_factor=8):
    # tf.reset_default_graph()

    # Define the model that we want to use -- specify to use only two classes at the last layer
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(image_tensor,
                                        num_classes=number_of_classes,
                                        is_training=is_training,
                                        spatial_squeeze=False,
                                        fc_conv_padding='SAME')

    downsampled_logits_shape = tf.shape(logits)

    img_shape = tf.shape(image_tensor)

    # Calculate the ouput size of the upsampled tensor
    # The shape should be batch_size X width X height X num_classes
    upsampled_logits_shape = tf.stack([
        downsampled_logits_shape[0], img_shape[1], img_shape[2],
        downsampled_logits_shape[3]
    ])

    # Perform the upsampling x2
    upsampled_logits = upsample(logits, 'vgg_16/fc8/t_conv_x2', 2,
                                end_points['vgg_16/pool4'], 'conv_pool4',
                                number_of_classes)
    # Perform the upsampling x2
    upsampled_logits = upsample(upsampled_logits, 'vgg_16/fc8/t_conv_x2_x2', 2,
                                end_points['vgg_16/pool3'], 'conv_pool3',
                                number_of_classes)
    # Perform the upsampling x8
    upsample_filter_tensor_x8 = bilinear_upsample_weights(
        upsample_factor, number_of_classes, 'vgg_16/fc8/t_conv_x8')
    upsampled_logits = tf.nn.conv2d_transpose(
        upsampled_logits,
        upsample_filter_tensor_x8,
        output_shape=upsampled_logits_shape,
        strides=[1, upsample_factor, upsample_factor, 1],
        padding='SAME')

    return upsampled_logits
示例#21
0
def vgg16_fcn8_model(images,
                     num_classes,
                     is_training=False,
                     raw_image_shape=(520 - 170, 800),
                     decoder='fcn8'):
    train_image_shape = (224 * 2, 224 * 3)

    if decoder == 'fcn8':
        decoder_fn = mobilenet_v1_fcn_decoder
    elif decoder == 'fcn8_upsample':
        decoder_fn = mobilenet_v1_fcn8_upsample_decoder
    else:
        raise ValueError("the decoder should be either fcn8 or fcn8_upsample")

    if images.dtype != tf.uint8:
        raise ValueError("the image should be uint8")

    images = tf.image.resize_images(images, size=train_image_shape)
    tf.summary.image('input_image_after_rescale_and_resize',
                     tf.expand_dims(images[0], 0))

    processed_images = tf.map_fn(vgg_preprocessing.vgg_image_rescale,
                                 images,
                                 dtype=tf.float32)

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(vgg.vgg_arg_scope()):
        # 1000 classes instead of 1001.
        logits, end_points = vgg.vgg_16(processed_images,
                                        num_classes=1000,
                                        is_training=is_training,
                                        spatial_squeeze=False)

    layer4 = end_points['vgg_16/pool3']
    layer6 = end_points['vgg_16/pool4']
    layer13 = end_points['vgg_16/pool5']

    last_layer = decoder_fn(layer13, layer4, layer6, num_classes)

    last_layer = post_process_logits(end_points, last_layer, raw_image_shape,
                                     train_image_shape)

    return last_layer, end_points
def forward_tran_advers(x_img=None, label_index=None, number_of_classes=5,layer_name=None,Training=True):
    # 根据logits,label_index,计算目标函数对conv层maps的梯度
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16_adversarial(inputs = x_img,\
                                        num_classes = number_of_classes,\
                                        is_training = Training,\
                                        fc_conv_padding = 'VALID')
    
    prob=tf.nn.softmax(logits)
    if Training==True:
        prob_max_label = label_index
    else:
        prob_max_label = tf.argmax(prob,1)
    label_hot = tf.one_hot(prob_max_label, number_of_classes)
    cost = (-1) * tf.reduce_sum(tf.multiply( tf.log(prob), label_hot ), axis=1)
    y_c = tf.reduce_sum(tf.multiply( logits, label_hot), axis=1)  # Grad CAM 
    #target_conv_layer=end_points[layer_name]
    target_conv_layer=x_img
    #gb_grad = tf.gradients(cost, x_img)[0]                                  # guided Grad-CAM
    target_grad_ac = tf.gradients(y_c, target_conv_layer)[0]                 # Grad CAM 
    target_grad_yc= tf.gradients( tf.exp(y_c), target_conv_layer)[0]         # Grad CAM ++
    return target_conv_layer,target_grad_ac,target_grad_yc,logits,end_points   
示例#23
0
    def build_model(self):
        train_imgs = tools.load_train_img(TRAIN_DATA_DIR, self.batch_size, self.img_size)
        style_imgs = tools.load_style_img(STYLE_IMAGE_PATH)

        with slim.arg_scope(model.arg_scope()):
            gen_img, variables = model.inference(train_imgs, reuse=False, name='transform')

            with slim.arg_scope(vgg.vgg_arg_scope()):
                gen_img_processed = [tf.image.per_image_standardization(image) for image in
                                     tf.unstack(gen_img, axis=0, num=self.batch_size)]

                f1, f2, f3, f4, exclude = vgg.vgg_16(tf.concat([gen_img_processed, train_imgs, style_imgs], axis=0))

                gen_f, img_f, _ = tf.split(f4, 3, 0)
                content_loss = tf.nn.l2_loss(gen_f - img_f) / tf.to_float(tf.size(gen_f))

                style_loss = model.styleloss(f1, f2, f3, f4)

                vgg_model_path = VGG_MODEL_PATH
                vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude)
                init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars)
                init_fn(self.sess)
                print("vgg's weights load done")

            self.gen_img = gen_img
            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            self.content_loss = content_loss
            self.style_loss = style_loss * self.style_w
            self.loss = self.content_loss + self.style_loss
            self.learn_rate = tf.train.exponential_decay(self.learn_rate_base, self.global_step, 1,
                                                         self.learn_rate_decay, staircase=True)
            self.opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, global_step=self.global_step,
                                                                        var_list=variables)

        all_var = tf.global_variables()
        init_var = [v for v in all_var if 'vgg_16' not in v.name]
        init = tf.variables_initializer(var_list=init_var)
        self.sess.run(init)
        self.save = tf.train.Saver(var_list=variables)
示例#24
0
upsample_factor = 16
number_of_classes = 21

log_folder = os.path.join(FLAGS.output_dir, 'train')

vgg_checkpoint_path = FLAGS.checkpoint_path

# Creates a variable to hold the global_step.
global_step = tf.Variable(0,
                          trainable=False,
                          name='global_step',
                          dtype=tf.int64)

# Define the model that we want to use -- specify to use only two classes at the last layer
with slim.arg_scope(vgg.vgg_arg_scope()):
    logits, end_points = vgg.vgg_16(image_tensor,
                                    num_classes=number_of_classes,
                                    is_training=is_training_placeholder,
                                    spatial_squeeze=False,
                                    fc_conv_padding='SAME')

downsampled_logits_shape = tf.shape(logits)

img_shape = tf.shape(image_tensor)

# Calculate the ouput size of the upsampled tensor
# The shape should be batch_size X width X height X num_classes
upsampled_logits_shape = tf.stack([
    downsampled_logits_shape[0], img_shape[1], img_shape[2],
    downsampled_logits_shape[3]
示例#25
0
    def init_model(self):
        slim = tf.contrib.slim

        # 初始化
        tf.reset_default_graph()

        # 输入图片
        image_tensor = tf.placeholder(tf.float32,
                                      shape=(1, None, None, 3),
                                      name='image_tensor')
        orig_img_tensor = tf.placeholder(tf.uint8,
                                         shape=(1, None, None, 3),
                                         name='orig_img_tensor')

        # 初始化模型
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(image_tensor,
                                            num_classes=self.number_of_classes,
                                            is_training=False,
                                            spatial_squeeze=False,
                                            fc_conv_padding='SAME')

        downsampled_logits_shape = tf.shape(logits)

        img_shape = tf.shape(image_tensor)

        # Calculate the ouput size of the upsampled tensor
        # The shape should be batch_size X width X height X num_classes
        upsampled_logits_shape = tf.stack([
            downsampled_logits_shape[0], img_shape[1], img_shape[2],
            downsampled_logits_shape[3]
        ])

        pool4_feature = end_points['vgg_16/pool4']
        with tf.variable_scope('vgg_16/fc8'):
            aux_logits_16s = slim.conv2d(
                pool4_feature,
                self.number_of_classes, [1, 1],
                activation_fn=None,
                weights_initializer=tf.zeros_initializer,
                scope='conv_pool4')

        # Perform the upsampling
        upsample_filter_np_x2 = bilinear_upsample_weights(
            2,  # upsample_factor,
            self.number_of_classes)

        upsample_filter_tensor_x2 = tf.Variable(upsample_filter_np_x2,
                                                name='vgg_16/fc8/t_conv_x2')

        upsampled_logits = tf.nn.conv2d_transpose(
            logits,
            upsample_filter_tensor_x2,
            output_shape=tf.shape(aux_logits_16s),
            strides=[1, 2, 2, 1],
            padding='SAME')

        upsampled_logits = upsampled_logits + aux_logits_16s

        upsample_filter_np_x16 = bilinear_upsample_weights(
            self.upsample_factor, self.number_of_classes)

        upsample_filter_tensor_x16 = tf.Variable(upsample_filter_np_x16,
                                                 name='vgg_16/fc8/t_conv_x16')
        upsampled_logits = tf.nn.conv2d_transpose(
            upsampled_logits,
            upsample_filter_tensor_x16,
            output_shape=upsampled_logits_shape,
            strides=[1, self.upsample_factor, self.upsample_factor, 1],
            padding='SAME')

        # Tensor to get the final prediction for each pixel -- pay
        # attention that we don't need softmax in this case because
        # we only need the final decision. If we also need the respective
        # probabilities we will have to apply softmax.
        pred = tf.argmax(upsampled_logits, axis=3, name='predictions')

        probabilities = tf.nn.softmax(upsampled_logits, name='probabilities')

        # 恢复模型,从训练好的模型中恢复参数
        checkpoint_path = tf.train.latest_checkpoint(self.ckpt)
        assert checkpoint_path, "no checkpoint exist, cant perform predict."
        variables_to_restore = slim.get_model_variables()
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess = tf.Session(config=sess_config)
        init_op = tf.global_variables_initializer()
        init_local_op = tf.local_variables_initializer()

        saver = tf.train.Saver(max_to_keep=1)
        # Run the initializers.
        sess.run(init_op)
        sess.run(init_local_op)
        saver.restore(sess, checkpoint_path)
        logging.debug('checkpoint restored from [{0}]'.format(checkpoint_path))

        return sess, pred, orig_img_tensor, probabilities
示例#26
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import vgg

inputs = tf.placeholder(tf.float32, (None, 224, 224, 3), name='inputs')
r, g, b = tf.split(axis=3, num_or_size_splits=3, value=inputs * 255.0)
VGG_MEAN = [103.939, 116.779, 123.68]

bgr = tf.concat(values=[b - VGG_MEAN[0], g - VGG_MEAN[1], r - VGG_MEAN[2]],
                axis=3)
with tf.contrib.slim.arg_scope(vgg.vgg_arg_scope()):
    fc8, endpoints = vgg.vgg_16(bgr, is_training=False)

for k, v in endpoints.iteritems():
    print(k, v)
示例#27
0
def fcn(image_tensor, upsample_factor, number_of_classes, annotation_tensor):
    # Define the model that we want to use -- specify to use only two classes at the last layer
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, end_points = vgg.vgg_16(image_tensor,
                                        num_classes=number_of_classes,
                                        spatial_squeeze=False,
                                        fc_conv_padding='SAME')
    downsampled_logits_shape = tf.shape(logits)
    img_shape = tf.shape(image_tensor)

    # Calculate the ouput size of the upsampled tensor
    # The shape should be batch_size X width X height X num_classes
    upsampled_logits_shape = tf.stack([
        downsampled_logits_shape[0], img_shape[1], img_shape[2],
        downsampled_logits_shape[3]
    ])
    if upsample_factor == 32:
        upsample_filter_np_x32 = bilinear_upsample_weights(
            upsample_factor, number_of_classes)
        upsample_filter_tensor_x32 = tf.Variable(upsample_filter_np_x32,
                                                 name='vgg_16/fc8/t_conv_x32')
        upsampled_logits = tf.nn.conv2d_transpose(
            upsampled_logits,
            upsample_filter_tensor_x32,
            output_shape=upsampled_logits_shape,
            strides=[1, upsample_factor, upsample_factor, 1],
            padding='SAME')
    elif upsample_factor == 16:
        pool4_feature = end_points['vgg_16/pool4']
        with tf.variable_scope('vgg_16/fc8'):
            aux_logits_16s = slim.conv2d(
                pool4_feature,
                number_of_classes, [1, 1],
                activation_fn=None,
                weights_initializer=tf.zeros_initializer,
                scope='conv_pool4')

        # Perform the upsampling
        upsample_filter_np_x2 = bilinear_upsample_weights(
            2,  # upsample_factor,
            number_of_classes)

        upsample_filter_tensor_x2 = tf.Variable(upsample_filter_np_x2,
                                                name='vgg_16/fc8/t_conv_x2')

        upsampled_logits = tf.nn.conv2d_transpose(
            logits,
            upsample_filter_tensor_x2,
            output_shape=tf.shape(aux_logits_16s),
            strides=[1, 2, 2, 1],
            padding='SAME')

        upsampled_logits = upsampled_logits + aux_logits_16s

        upsample_filter_np_x16 = bilinear_upsample_weights(
            upsample_factor, number_of_classes)

        upsample_filter_tensor_x16 = tf.Variable(upsample_filter_np_x16,
                                                 name='vgg_16/fc8/t_conv_x16')
        upsampled_logits = tf.nn.conv2d_transpose(
            upsampled_logits,
            upsample_filter_tensor_x16,
            output_shape=upsampled_logits_shape,
            strides=[1, upsample_factor, upsample_factor, 1],
            padding='SAME')
    elif upsample_factor == 8:
        pool3_feature = end_points['vgg_16/pool3']
        with tf.variable_scope('vgg_16/fc8'):
            aux_logits_8s = slim.conv2d(
                pool3_feature,
                number_of_classes, [1, 1],
                activation_fn=None,
                weights_initializer=tf.zeros_initializer,
                scope='conv_pool3')

        pool4_feature = end_points['vgg_16/pool4']
        with tf.variable_scope('vgg_16/fc8'):
            aux_logits_16s = slim.conv2d(
                pool4_feature,
                number_of_classes, [1, 1],
                activation_fn=None,
                weights_initializer=tf.zeros_initializer,
                scope='conv_pool4')

        # 对fc8结果做 upsampling,得到16s
        upsample_filter_np_x2 = bilinear_upsample_weights(
            2,  # upsample_factor,
            number_of_classes)
        upsample_filter_tensor_x2_1 = tf.Variable(
            upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2_1')
        upsampled_logits = tf.nn.conv2d_transpose(
            logits,
            upsample_filter_tensor_x2_1,
            output_shape=tf.shape(aux_logits_16s),
            strides=[1, 2, 2, 1],
            padding='SAME')
        # 求和之后再做一次 upsampling,得到8s
        upsampled_logits = upsampled_logits + aux_logits_16s
        upsample_filter_tensor_x2_2 = tf.Variable(
            upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2_2')
        upsampled_logits = tf.nn.conv2d_transpose(
            upsampled_logits,
            upsample_filter_tensor_x2_2,
            output_shape=tf.shape(aux_logits_8s),
            strides=[1, 2, 2, 1],
            padding='SAME')

        upsampled_logits = upsampled_logits + aux_logits_8s

        upsample_filter_np_x8 = bilinear_upsample_weights(
            upsample_factor, number_of_classes)
        upsample_filter_tensor_x8 = tf.Variable(upsample_filter_np_x8,
                                                name='vgg_16/fc8/t_conv_x8')
        upsampled_logits = tf.nn.conv2d_transpose(
            upsampled_logits,
            upsample_filter_tensor_x8,
            output_shape=upsampled_logits_shape,
            strides=[1, upsample_factor, upsample_factor, 1],
            padding='SAME')

    lbl_onehot = tf.one_hot(annotation_tensor, number_of_classes)
    cross_entropies = tf.nn.softmax_cross_entropy_with_logits(
        logits=upsampled_logits, labels=lbl_onehot)

    cross_entropy_loss = tf.reduce_mean(tf.reduce_sum(cross_entropies,
                                                      axis=-1))

    return upsampled_logits, cross_entropy_loss
示例#28
0
def Graph(x, y, res_weight, vgg_weight, inc_weight, y_pred_res, y_pred_vgg,
          y_pred_inc, raw_image):
    num_classes = 110
    batch_size = 1
    weight = [args.resnet_weight, args.vgg_weight]
    bias = tf.reshape(tf.constant([123.68, 116.78, 103.94]), [1, 1, 1, 3])
    x_int = tf.image.resize_bilinear(x, [224, 224], align_corners=True)

    x_int = x_int - bias

    with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
        logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50(
            x_int,
            num_classes=num_classes,
            is_training=False,
            scope='resnet_v1_50',
            reuse=tf.AUTO_REUSE)
        end_points_res_v1_50['logits'] = tf.squeeze(
            end_points_res_v1_50['resnet_v1_50/logits'], [1, 2])

    with slim.arg_scope(vgg.vgg_arg_scope()) as scope:
        logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(x_int,
                                                      num_classes=num_classes,
                                                      is_training=False,
                                                      scope='vgg_16',
                                                      reuse=tf.AUTO_REUSE)
        end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8']

    one_hot = tf.one_hot(y, num_classes)

    sum_prob = tf.clip_by_value(
        (res_weight * y_pred_res + vgg_weight * y_pred_vgg), 0,
        args.confidence)
    logits_resnet = end_points_res_v1_50['logits']
    logits_vgg = end_points_vgg_16['logits']

    logits = res_weight * logits_resnet + vgg_weight * logits_vgg
    cross_entropy = tf.losses.softmax_cross_entropy(one_hot,
                                                    logits,
                                                    label_smoothing=0.0,
                                                    weights=1.0)
    grad = tf.gradients([cross_entropy], [x])[0]
    #grad = tf.layers.dropout(grad,noise_shape=[1,299,299,3])

    if args.norm:
        grad = grad / tf.norm(grad)
    else:
        #grad = grad/tf.reshape(tf.norm(grad,axis=-1),[1,299,299,1])
        grad = tf.transpose(grad, [0, 3, 1, 2])
        grad = grad / tf.reshape(
            tf.norm(tf.reshape(grad, [batch_size, 3, -1]), axis=2),
            [batch_size, 3, 1, 1])
        grad = tf.transpose(grad, [0, 2, 3, 1])

    if args.is_mask:
        mask = tf.ones(shape=[
            int(299 - 2 * args.mask_size),
            int(299 - 2 * args.mask_size), 3
        ])
        mask = tf.pad(
            mask,
            tf.constant([[args.mask_size, args.mask_size],
                         [args.mask_size, args.mask_size], [0, 0]]))
        grad = grad * mask

    alpha = args.maxa - (args.maxa - args.mina) / (args.confidence) * sum_prob
    x = x - alpha * grad  #*tf.concat([tf.ones([299,299,1]),tf.ones([299,299,1]),tf.zeros([299,299,1])],-1)
    x = tf.clip_by_value(x, 0, 255)
    out_x = x - raw_image
    out_x = tf.floor(tf.abs(out_x)) * tf.sign(out_x) + raw_image
    out_x = tf.round(tf.clip_by_value(out_x, 0, 255))

    return x, out_x
def net_arg_scope():
    if net_type == 'resnet':
        return resnet_v1.resnet_arg_scope()
    elif net_type == 'vgg':
        return vgg.vgg_arg_scope(False)
示例#30
0
文件: train.py 项目: zsmj610/fcn-vgg
feed_dict_to_use = {is_training_placeholder: True}

upsample_factor = 16
number_of_classes = 21

log_folder = os.path.join(FLAGS.output_dir, 'train')

vgg_checkpoint_path = FLAGS.checkpoint_path

# Creates a variable to hold the global_step.
global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64)


# Define the model that we want to use -- specify to use only two classes at the last layer
with slim.arg_scope(vgg.vgg_arg_scope()):
    logits, end_points = vgg.vgg_16(image_tensor,
                                    num_classes=number_of_classes,
                                    is_training=is_training_placeholder,
                                    spatial_squeeze=False,
                                    fc_conv_padding='SAME')

downsampled_logits_shape = tf.shape(logits)

img_shape = tf.shape(image_tensor)

# Calculate the ouput size of the upsampled tensor
# The shape should be batch_size X width X height X num_classes
upsampled_logits_shape = tf.stack([
                                  downsampled_logits_shape[0],
                                  img_shape[1],
示例#31
0
def run_training():

    #1.create log and model saved dir according to the datetime
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    models_dir = os.path.join("saved_models", subdir, "models")
    if not os.path.isdir(models_dir):  # Create the model directory if it doesn't exist
        os.makedirs(models_dir)
    logs_dir = os.path.join("saved_models", subdir, "logs")
    if not os.path.isdir(logs_dir):  # Create the log directory if it doesn't exist
        os.makedirs(logs_dir)
    topn_models_dir = os.path.join("saved_models", subdir, "topn")#topn dir used for save top accuracy model
    if not os.path.isdir(topn_models_dir):  # Create the topn model directory if it doesn't exist
        os.makedirs(topn_models_dir)
    topn_file=open(os.path.join(topn_models_dir,"topn_acc.txt"),"a+")
    topn_file.close()


    #2.load dataset and define placeholder
    conf=config.get_config()
    train_dataset=input_dataset.TFRecordDataset(conf)
    train_iterator,train_next_element = train_dataset.generateDataset( dataset_path=conf.train_dataset_path,batch_suize=conf.batch_size)
    test_dataset=input_dataset.TFRecordDataset(conf)
    test_iterator,test_next_element = test_dataset.generateDataset( dataset_path=conf.test_dataset_path,batch_size=conf.batch_size,test_mode=1)

    phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
    images_placeholder = tf.placeholder(name='input', shape=[None, conf.input_img_height,conf.input_img_width, 3], dtype=tf.float32)
    labels_placeholder = tf.placeholder(name='labels', shape=[None, ], dtype=tf.int64)

    # Create the model.
    #with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(batch_norm_updates_collections=None)):
        #predictions, end_points = mobilenet_v1.mobilenet_v1(images_placeholder,is_training=phase_train_placeholder,num_classes=3,prediction_fn=False)

    with slim.arg_scope(vgg.vgg_arg_scope()):
        predictions, end_points = vgg.vgg_a(images_placeholder,num_classes=3,is_training=phase_train_placeholder)

    output=tf.argmax(predictions,1,name="output")
    
    softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=predictions, labels=labels_placeholder),name="loss")
    tf.add_to_collection('losses', softmax_loss)


    correct_prediction = tf.equal(tf.argmax(predictions,1),labels_placeholder )
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #adjust learning rate
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(conf.learning_rate,global_step,conf.learning_rate_decay_step,conf.learning_rate_decay_rate,staircase=True)


    custom_loss=tf.get_collection("losses")
    regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    total_loss=tf.add_n(custom_loss+regularization_losses,name='total_loss')


    #optimize loss and update
    train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1).minimize(total_loss,global_step=global_step)
    #train_op=tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True).minimize(total_loss,global_step=global_step)

    saver=tf.train.Saver(tf.trainable_variables(),max_to_keep=5)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        for epoch in range(conf.max_nrof_epochs):
            sess.run(train_iterator.initializer)
            while True:
                use_time=0
                try:
                    images_train, labels_train = sess.run(train_next_element)

                    start_time=time.time()
                    input_dict={phase_train_placeholder:True,images_placeholder:images_train,labels_placeholder:labels_train}
                    step,lr,train_loss,_,train_accuracy = sess.run([global_step,
                                                                            learning_rate,
                                                                            total_loss,
                                                                            train_op,
                                                                            accuracy],
                                                                          feed_dict=input_dict)

                    end_time=time.time()
                    use_time+=(end_time-start_time)

                    #display train result
                    if(step%conf.display_iter==0):
                        print ("step:%d lr:%f time:%.3f total_loss:%.3f acc:%.3f epoch:%d"%(step,lr,use_time,train_loss,train_accuracy,epoch) )
                        use_time=0
                    if (step%conf.test_save_iter==0):
                        filename_cpkt = os.path.join(models_dir, "%d.ckpt"%step)
                        saver.save(sess, filename_cpkt)
                        #evaluate(models_dir)
                        sess.run(test_iterator.initializer)
                        total_acc=0
                        test_cnt=0
                        
                        while True:
                            try:
                                test_cnt+=1
                                test_img,test_label=(sess.run(test_next_element) )
                                fd={images_placeholder:test_img,labels_placeholder:test_label,phase_train_placeholder: False}
                                acc=sess.run(accuracy,feed_dict=fd)
                                total_acc+=acc
                            except tf.errors.OutOfRangeError:
                                valid_acc=(total_acc*1.0/test_cnt)*100
                                print ("test accuracy %.2f"%valid_acc )
                                with open(os.path.join(topn_models_dir,"topn_acc.txt"),"a+")as tmp_f:
                                        tmp_f.write("step : %d  accuracy : %f\n"%(step,valid_acc) )
                                if valid_acc>conf.topn_threshold:
                                    shutil.copyfile(os.path.join(models_dir, "%d.ckpt.meta"%step),os.path.join(topn_models_dir, "%d.ckpt.meta"%step))
                                    shutil.copyfile(os.path.join(models_dir, "%d.ckpt.index"%step),os.path.join(topn_models_dir, "%d.ckpt.index"%step))
                                    shutil.copyfile(os.path.join(models_dir, "%d.ckpt.data-00000-of-00001"%step),os.path.join(topn_models_dir, "%d.ckpt.data-00000-of-00001"%step))
                                break
                        
                except tf.errors.OutOfRangeError:
                    print("End of epoch ")
                    break
示例#32
0
def run_hand(all_video_list, video_output_parent_path, use_bn, train_vgg,
             checkpoint_path, backbone_net_ckpt_path):
    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        img_size = tf.placeholder(dtype=tf.int32,
                                  shape=(2, ),
                                  name='original_image_size')

    img_normalized = raw_img / 255 - 0.5

    # define vgg19
    with slim.arg_scope(vgg.vgg_arg_scope()):
        vgg_outputs, end_points = vgg.vgg_19(img_normalized)

    # get net graph
    logger.info('initializing model...')
    net = PafNet(inputs_x=vgg_outputs, hm_channel_num=2, use_bn=use_bn)
    hm_pre, added_layers_out = net.gen_hand_net()

    hm_up = tf.image.resize_area(hm_pre[5], img_size)
    # cpm_up = tf.image.resize_area(cpm_pre[5], img_size)
    # hm_up = hm_pre[5]
    # cpm_up = cpm_pre[5]
    smoother = Smoother({'data': hm_up}, 25, 3.0)
    gaussian_heatMat = smoother.get_output()

    max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat,
                                      window_shape=(3, 3),
                                      pooling_type='MAX',
                                      padding='SAME')
    tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor),
                            gaussian_heatMat, tf.zeros_like(gaussian_heatMat))

    logger.info('initialize saver...')
    # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers')
    # trainable_var_list = []
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19')

    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'),
                              name='vgg_restorer')
    saver = tf.train.Saver(trainable_var_list)
    logger.info('initialize session...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        logger.info('restoring vgg weights...')
        restorer.restore(sess, backbone_net_ckpt_path)
        logger.info('restoring from checkpoint...')
        saver.restore(
            sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
        logger.info('initialization done')
        action_list = all_video_list.keys()
        for action in action_list:
            video_list = all_video_list[action]
            for video in video_list:
                dir_name = video.split('/')
                name = dir_name[-2]
                save_path = video_output_parent_path + '/' + name

                anno_loader = cut_body_part(anno_file=save_path + '/' +
                                            action + '_lstm.json',
                                            coco_images=save_path + '/pics/')
                img_info = []
                anno_info = []
                for img, hand_list, img_meta, anno in tqdm(
                        anno_loader.crop_part()):
                    for hand in hand_list:
                        position = hand['position']
                        ori_h = position[3] - position[1] + 1
                        ori_w = position[2] - position[0] + 1
                        peaks_origin, heatmap_origin = sess.run(
                            [
                                tensor_peaks,
                                hm_up,
                            ],
                            feed_dict={
                                raw_img: hand['hand'][np.newaxis, :, :, :],
                                img_size: [ori_h, ori_w]
                            })
                        re_origin = np.where(
                            peaks_origin[0, :, :,
                                         0] == np.max(peaks_origin[0, :, :,
                                                                   0]))
                        peaks_flip, heatmap_flip = sess.run(
                            [
                                tensor_peaks,
                                hm_up,
                            ],
                            feed_dict={
                                raw_img:
                                np.fliplr(hand['hand'][np.newaxis, :, :, :]),
                                img_size: [ori_h, ori_w]
                            })
                        peaks_flip = np.fliplr(peaks_flip)
                        re_flip = np.where(
                            peaks_flip[0, :, :,
                                       0] == np.max(peaks_flip[0, :, :, 0]))
                        anno['keypoints'][hand['idx'] * 3] = int(
                            position[0] +
                            (re_origin[1][0] + re_flip[1][0]) / 2)
                        anno['keypoints'][hand['idx'] * 3 + 1] = int(
                            position[1] +
                            (re_origin[0][0] + re_flip[0][0]) / 2)
                    anno_info.append(anno)
                    img_info.append(img_meta)
                ref = {"images": img_info, "annotations": anno_info}
                with open(
                        save_path + '/' + action + '.json'.split('.')[0] +
                        '_hand_coco' + '.json', "w") as f:
                    json.dump(ref, f)
                    print('writed to ' + save_path + '/' + action +
                          '.json'.split('.')[0] + '_hand_coco' + '.json')