Exemplo n.º 1
0
    def __mobilenetV2feature_sequence_extraction(self, inputdata, reuseflag):
        """
        use inception V3 model
        :param inputdata: eg. batch*128*128*1
        :return:
        """
        # arg_scope = inception_v3_arg_scope()
        # with slim.arg_scope(arg_scope) as scope:
        #     shape=inputdata.get_shape().as_list()
        #     # if self.phase.lower() == 'train':
        #     #     logits, end_points = inception_v3(inputdata, is_training=True,reuse=reuseflag,num_classes=1001)
        #     # else:
        #     #     logits, end_points = inception_v3(inputdata, is_training=False, reuse=reuseflag, num_classes=1001)
        #     logits, end_points = inception_v3(inputdata, is_training=False, reuse=reuseflag, num_classes=1001)
        #     # fc_out=slim.fully_connected(logits,512,scope='fc512')
        #     return end_points['PreLogits']   # batch ,1,1,2048

        # with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
        #     logits, endpoints = mobilenet_v2.mobilenet(inputdata, depth_multiplier=1.0,reuse=reuseflag)
        #     return endpoints['global_pool']  # batch ,1,1,1280

        if self.__use_bn:
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=True)):
                logits, endpoints = mobilenet_v2.mobilenet(
                    inputdata, depth_multiplier=1.0, reuse=reuseflag)
                return endpoints['global_pool']  # batch ,1,1,1280
        else:
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=False)):
                logits, endpoints = mobilenet_v2.mobilenet(
                    inputdata, depth_multiplier=1.0, reuse=reuseflag)
                return endpoints['global_pool']  # batch ,1,1,1280
Exemplo n.º 2
0
 def testDivisibleBy(self):
     tf.reset_default_graph()
     mobilenet_v2.mobilenet(tf.placeholder(tf.float32, (10, 224, 224, 16)),
                            conv_defs=mobilenet_v2.V2_DEF,
                            divisible_by=16,
                            min_depth=32)
     s = [
         op.outputs[0].get_shape().as_list()[-1]
         for op in find_ops('Conv2D')
     ]
     s = set(s)
     self.assertSameElements(
         [32, 64, 96, 160, 192, 320, 384, 576, 960, 1280, 1001], s)
Exemplo n.º 3
0
    def model(self, features):
        input_layer = features

        # Replace missing values by 0
        hidden_layer = tf.where(tf.is_nan(input_layer),
                                tf.zeros_like(input_layer), input_layer)

        if self.metadata.get_tensor_shape()[0] != -1:
            #hidden_layer = tf.squeeze(hidden_layer, axis=[1])
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=True)):
                logits, endpoints = mobilenet_v2.mobilenet(
                    hidden_layer, self.input_size)
            hidden_layer = tf.contrib.layers.conv2d(
                inputs=endpoints['feature_maps'],
                num_outputs=1280,
                kernel_size=1,
                stride=1,
                activation_fn=None)
            hidden_layer = tf.reduce_mean(input_tensor=hidden_layer,
                                          axis=[1, 2])
        else:
            tensor_shape = hidden_layer.get_shape().as_list()
            tensor_reshape = tf.shape(hidden_layer)
            hidden_layer = tf.reshape(hidden_layer, [-1] + tensor_shape[-3:])
            if tensor_shape[-1] == 1:
                hidden_layer = tf.tile(hidden_layer, [1, 1, 1, 3])
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=True)):
                logits, endpoints = mobilenet_v2.mobilenet(
                    hidden_layer, self.input_size)

            hidden_layer = endpoints['feature_maps']
            feature_shape = hidden_layer.get_shape().as_list()
            hidden_layer = tf.reshape(
                hidden_layer, [-1, self.number_of_frames] + feature_shape[-3:])
            hidden_layer = tf.reduce_mean(hidden_layer, axis=1)
            hidden_layer = tf.layers.flatten(hidden_layer)

        hidden_layer = tf.layers.dense(inputs=hidden_layer,
                                       units=256,
                                       activation=tf.nn.relu)
        hidden_layer = tf.layers.dropout(inputs=hidden_layer,
                                         rate=0.5,
                                         training=self.is_training)

        logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim)
        sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor")
        return logits, sigmoid_tensor
Exemplo n.º 4
0
 def testDivisibleByWithArgScope(self):
     tf.reset_default_graph()
     # Verifies that depth_multiplier arg scope actually works
     # if no default min_depth is provided.
     with slim.arg_scope((mobilenet.depth_multiplier, ), min_depth=32):
         mobilenet_v2.mobilenet(tf.placeholder(tf.float32,
                                               (10, 224, 224, 2)),
                                conv_defs=mobilenet_v2.V2_DEF,
                                depth_multiplier=0.1)
         s = [
             op.outputs[0].get_shape().as_list()[-1]
             for op in find_ops('Conv2D')
         ]
         s = set(s)
         self.assertSameElements(s, [32, 192, 128, 1001])
    def _select_model(self, model, class_num, root_pretrain):
        self.input = tf.placeholder(tf.float32, [None, self.image_size[1], self.image_size[0], 3], name='image_input')
        #self.labels = tf.placeholder(tf.int64, [None]) # hcw cutmix
        self.labels = tf.placeholder(tf.float32, [None, None])  # hcw cutmix
        self.is_training = tf.placeholder(tf.bool, name='is_training')  #hcw
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')   #hcw

        if model == 'inception_resnet_v2':
            with slim.arg_scope(inception_resnet_v2_arg_scope()):
                logits, end_points = inception_resnet_v2(self.input, class_num, is_training = self.is_training, dropout_keep_prob = self.keep_prob) #hcw
                self.exclude = ['InceptionResnetV2/AuxLogits', 'InceptionResnetV2/Logits'] 
                self.last_layer_name = 'Predictions'
                self.path_pretrain = root_pretrain + 'inception_resnet_v2.ckpt'
        elif model == 'resnet_v2_50':
            with slim.arg_scope(resnet_arg_scope()):
                logits, end_points = resnet_v2_50(self.input, class_num, is_training = self.is_training)
                self.exclude = ['resnet_v2_50/logits']
                self.last_layer_name = 'predictions'
                self.path_pretrain = root_pretrain + 'resnet_v2_50.ckpt'
        elif model == 'mobilenet_v2':
            logits, end_points = mobilenet(self.input, class_num, is_training = self.is_training, depth_multiplier=0.5, finegrain_classification_mode=True)
            self.exclude = ['MobilenetV2/Logits']
            self.last_layer_name = 'Predictions'
            self.path_pretrain = root_pretrain + 'mobilenet_v2_0.5_128.ckpt'
            # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that
            # 'finegrain_classification_mode' is set to True, which means the embedding
            # layer will not be shrinked when given a depth-multiplier < 1.0.
        else:
            raise ValueError('Error: the model is not available.')

        return logits, end_points
Exemplo n.º 6
0
 def __init__(self):
     self.X = tf.placeholder(tf.float32,[None,None,3])
     images = tf.expand_dims(self.X,axis=0)
     images = tf.image.resize_images(images,[224,224])
     images = tf.map_fn(lambda image: tf.image.per_image_standardization(image), images)
     with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
         self.logits, endpoints = mobilenet_v2.mobilenet(images,num_classes=20)
Exemplo n.º 7
0
    def testFineGrained(self):
        tf.reset_default_graph()
        # Verifies that depth_multiplier arg scope actually works
        # if no default min_depth is provided.

        mobilenet_v2.mobilenet(tf.placeholder(tf.float32, (10, 224, 224, 2)),
                               conv_defs=mobilenet_v2.V2_DEF,
                               depth_multiplier=0.01,
                               finegrain_classification_mode=True)
        s = [
            op.outputs[0].get_shape().as_list()[-1]
            for op in find_ops('Conv2D')
        ]
        s = set(s)
        # All convolutions will be 8->48, except for the last one.
        self.assertSameElements(s, [8, 48, 1001, 1280])
def build_frontend(inputs,
                   frontend,
                   is_training=True,
                   pretrained_dir="models"):
    if frontend == 'ResNet50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(
                inputs, is_training=is_training, scope='resnet_v2_50')
            frontend_scope = 'resnet_v2_50'
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'),
                var_list=slim.get_model_variables('resnet_v2_50'),
                ignore_missing_vars=True)
    elif frontend == 'ResNet101':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_101(
                inputs, is_training=is_training, scope='resnet_v2_101')
            frontend_scope = 'resnet_v2_101'
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'),
                var_list=slim.get_model_variables('resnet_v2_101'),
                ignore_missing_vars=True)
    elif frontend == 'ResNet152':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_152(
                inputs, is_training=is_training, scope='resnet_v2_152')
            frontend_scope = 'resnet_v2_152'
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'),
                var_list=slim.get_model_variables('resnet_v2_152'),
                ignore_missing_vars=True)
    elif frontend == 'MobileNetV2':
        with slim.arg_scope(mobilenet_v2.training_scope()):
            logits, end_points = mobilenet_v2.mobilenet(
                inputs,
                is_training=is_training,
                scope='mobilenet_v2',
                base_only=True)
            frontend_scope = 'mobilenet_v2'
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=os.path.join(pretrained_dir,
                                        'mobilenet_v2_1.4_224.ckpt'),
                var_list=slim.get_model_variables('mobilenet_v2'),
                ignore_missing_vars=True)
    elif frontend == 'InceptionV4':
        with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
            logits, end_points = inception_v4.inception_v4(
                inputs, is_training=is_training, scope='inception_v4')
            frontend_scope = 'inception_v4'
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=os.path.join(pretrained_dir, 'inception_v4.ckpt'),
                var_list=slim.get_model_variables('inception_v4'),
                ignore_missing_vars=True)
    else:
        raise ValueError(
            "Unsupported fronetnd model '%s'. This function only supports ResNet50, ResNet101, ResNet152, and MobileNetV2"
            % (frontend))

    return logits, end_points, frontend_scope, init_fn
Exemplo n.º 9
0
def main():
    with tf.Graph().as_default():
        args = parser.parse_args()  
        batch_size = args.batch_size 
        content_image_filenames = list(absoluteFilePaths(args.content_image_dir))
        style_image_filenames = list(absoluteFilePaths(args.style_image_dir))
        
        content_dataset = tf.data.Dataset.from_tensor_slices(tf.constant(content_image_filenames))
        content_dataset = content_dataset.map(read_image, num_parallel_calls=4)
        content_dataset = content_dataset.map(resize_content_image, num_parallel_calls=4)
        content_dataset = content_dataset.shuffle(1000) 
        content_dataset = content_dataset.batch(batch_size)
        content_dataset.prefetch(1)
        content_iterator = content_dataset.make_one_shot_iterator()
        content_batch = content_iterator.get_next()
        
        style_dataset = tf.data.Dataset.from_tensor_slices(style_image_filenames)
        style_dataset = style_dataset.map(read_image, num_parallel_calls=4)
        style_dataset = style_dataset.map(augment_image, num_parallel_calls=4)
        style_dataset = style_dataset.shuffle(1000) 
        style_dataset = style_dataset.batch(batch_size)
        style_dataset.prefetch(1)
        style_iterator = style_dataset.make_one_shot_iterator()
        style_batch = style_iterator.get_next()

        with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
            with tf.name_scope("content_endpoints"):
                _, content_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(content_batch, [224, 224]))
            with tf.name_scope("style_input_endpoints"):
                _, style_input_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(style_batch, [224, 224]))
            
            style_params = model.style_prediction_network(style_batch,style_input_endpoints["layer_18/output"])
            stylized_image = model.style_transformer_network(content_batch, style_params)

            with tf.name_scope("stylized_image_endpoints"):
                _, stylized_image_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(stylized_image, [224, 224]))
        loss = losses.total_loss(CONTENT_WEIGHT, content_batch, STYLE_WEIGHT, style_batch, stylized_image, TV_WEIGHT) 
        
        ema = tf.train.ExponentialMovingAverage(0.999)
        vars = ema.variables_to_restore()
        saver = tf.train.Saver(vars)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer()) 
            saver.restore(sess, args.mobile_net)
            loss = sess.run(loss)
Exemplo n.º 10
0
    def testImageSizes(self):
        for input_size, output_size in [(224, 7), (192, 6), (160, 5), (128, 4),
                                        (96, 3)]:
            tf.reset_default_graph()
            _, ep = mobilenet_v2.mobilenet(
                tf.placeholder(tf.float32, (10, input_size, input_size, 3)))

            self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3],
                             [output_size] * 2)
Exemplo n.º 11
0
def _tower_fn(is_training, dp_keep_prob, weight_decay, feature, label,
              data_format, num_layers, batch_norm_decay, batch_norm_epsilon,
              params):

    if params.model_name == 'mobilenet_v2':
        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=True,
                                            dropout_keep_prob=dp_keep_prob)):
            logits, end_points = mobilenet_v2.mobilenet(
                feature, num_classes=num_classes, prediction_fn=None)
    if params.model_name == 'mobilenet_v1':
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(is_training=False)):
            logits, end_points = mobilenet_v1.mobilenet_v1(
                feature, num_classes=num_classes)

    tower_pred = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits)
    }
    #tower_loss = tf.losses.softmax_cross_entropy(
    #              logits=logits, onehot_labels=tf.one_hot(label, depth=num_classes))
    tower_loss = tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                        labels=label)
    tower_loss = tf.reduce_mean(tower_loss)
    model_params = tf.trainable_variables()
    depthwise_params = [v for v in model_params if 'depthwise' in v.op.name]

    if params.weight_decay_not_used_on_depthwise:
        model_params_for_weight_decay = [
            v for v in model_params if v not in depthwise_params
        ]
    else:
        model_params_for_weight_decay = model_params

    tower_loss += weight_decay * tf.add_n(
        [tf.nn.l2_loss(v) for v in model_params_for_weight_decay])
    tower_grad = tf.gradients(tower_loss, model_params)
    return tower_loss, zip(tower_grad, model_params), tower_pred
Exemplo n.º 12
0
    next_element = iterator.get_next()

    feature = {
        'train/image': tf.FixedLenFeature([batch_size, 224, 224, 3],
                                          tf.float32),
        'train/label': tf.FixedLenFeature([batch_size, 1], tf.int64)
    }

    sess.run(iterator.initializer)
    raw = sess.run(next_element)
    data = tf.parse_example(raw, feature)

    # Note: arg_scope is optional for inference.
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=True)):
        logits, endpoints = mobilenet_v2.mobilenet(data['train/image'])
    # Restore using exponential moving average since it produces (1.5-2%) higher
    # accuracy
    ema = tf.train.ExponentialMovingAverage(0.999)
    vars = ema.variables_to_restore()

    saver = tf.train.Saver(vars)
    checkpoint = 'd:/models/mobilenet_v2/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt'
    saver.restore(sess, checkpoint)

    #loss.. and optimizer
    loss = tf.losses.sparse_softmax_cross_entropy(data['train/label'], logits)
    op = tf.train.AdamOptimizer()
    op.minimize(loss)

    sess.run(op)
    def build(self, features):
        n_heatmaps = 17
        paf_nfields = 18
        paf_nvectors = 2
        paf_nscales = 0
        if self.backbone == 'mobilenet_v1':
            logits, end_points = mobilenet_v1(
                features,
                num_classes=False,
                is_training=self.is_training,
                depth_multiplier=self.depth_multiplier)
            backbone_end = end_points['Conv2d_13_pointwise']  #1, 36, 46, 54
            nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields,
                                paf_nvectors, paf_nscales)

            end_points['PAF'] = nets
            end_points['outputs'] = [nets]
        elif self.backbone == 'mobilenet_v2':
            with tf.contrib.slim.arg_scope(
                    training_scope(is_training=self.is_training)):
                logits, end_points = mobilenet(
                    features,
                    num_classes=False,
                    depth_multiplier=self.depth_multiplier)
            backbone_end = end_points['layer_19']
            nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields,
                                paf_nvectors, paf_nscales)

            end_points['PAF'] = nets
            end_points['outputs'] = [nets]
        elif self.backbone == 'shufflenet_v2':
            basenet = ShuffleNetV2(depth_multiplier=self.depth_multiplier,
                                   is_training=self.is_training)
            end_points = basenet.build(features)
            backbone_end = end_points['base_net/out']
            nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields,
                                paf_nvectors, paf_nscales)

            end_points['PAF'] = nets
            end_points['outputs'] = [nets]

        elif self.backbone == 'mobilenet_thin':
            out = MobilenetNetworkThin({'image': features},
                                       conv_width=0.75,
                                       conv_width2=0.50,
                                       trainable=self.is_training)
            end_points = out.get_layer()

            thin_hm = end_points['MConv_Stage6_L2_5']
            hm_ch1 = tf.layers.conv2d(thin_hm,
                                      128,
                                      kernel_size=[1, 1],
                                      name='hm_channel1')
            ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1')
            hm_out = tf.layers.conv2d(ps1,
                                      17,
                                      kernel_size=[1, 1],
                                      name='hm_channel2')
            hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out')

            thin_paf = end_points['MConv_Stage6_L1_5']
            paf_ch1 = tf.layers.conv2d(thin_paf,
                                       256,
                                       kernel_size=[1, 1],
                                       name='paf_channel1')
            ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2')
            paf_out = tf.layers.conv2d(ps2,
                                       36,
                                       kernel_size=[1, 1],
                                       name='paf_channel2')
            paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out')

            end_points['heat_map'] = hm
            end_points['PAF'] = paf

        elif self.backbone == 'mobilenet_thin_s2d1':
            out = MobilenetNetworkThin({'image': features},
                                       conv_width=0.75,
                                       conv_width2=0.50,
                                       trainable=self.is_training)
            end_points = out.get_layer()
            ###HEATMAP
            thin_hm = end_points['MConv_Stage6_L2_5']
            s2d_hm = tf.space_to_depth(thin_hm,
                                       block_size=int(2),
                                       data_format='NHWC',
                                       name='space_to_depth_hm')
            hm_duc = self.DUC(s2d_hm,
                              filters=512,
                              upscale_factor=2,
                              is_training=self.is_training,
                              scope='DUC_hm')
            hm_ch1 = tf.layers.conv2d(hm_duc,
                                      128,
                                      kernel_size=[1, 1],
                                      name='hm_channel1')
            ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1')
            hm_out = tf.layers.conv2d(ps1,
                                      17,
                                      kernel_size=[1, 1],
                                      name='hm_conv')
            hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out')
            ###PAF
            thin_paf = end_points['MConv_Stage6_L1_5']
            s2d_paf = tf.space_to_depth(thin_paf,
                                        block_size=int(2),
                                        data_format='NHWC',
                                        name='space_to_depth_paf')
            paf_duc = self.DUC(s2d_paf,
                               filters=512,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC_paf')
            paf_ch1 = tf.layers.conv2d(paf_duc,
                                       256,
                                       kernel_size=[1, 1],
                                       name='paf_channel1')
            ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2')
            paf_out = tf.layers.conv2d(ps2,
                                       36,
                                       kernel_size=[1, 1],
                                       name='paf_conv')
            paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out')
            end_points['heat_map'] = hm
            end_points['PAF'] = paf

        elif self.backbone == 'mobilenet_thin_FPN':
            out = MobilenetNetworkThin({'image': features},
                                       conv_width=0.75,
                                       conv_width2=0.50,
                                       trainable=self.is_training)
            end_points = out.get_layer()

            ###HEATMAP
            thin_hm = end_points['MConv_Stage6_L2_5']
            classes_hm1 = tf.layers.conv2d(thin_hm,
                                           128,
                                           3,
                                           strides=2,
                                           name='cls1')
            classes_hm2 = tf.layers.conv2d(classes_hm1,
                                           256,
                                           3,
                                           strides=2,
                                           name='cls2')
            con1_hm2 = tf.layers.conv2d(classes_hm2, 256, 1, name='1con2')
            duc_hm2 = self.DUC(con1_hm2,
                               filters=512,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC_hm2')
            pad_hm2 = tf.pad(duc_hm2, [[0, 0], [1, 1], [1, 1], [0, 0]],
                             name='pad_hm2')
            con1_hm1 = tf.layers.conv2d(classes_hm1, 512, 1, name='1con1')
            concat_feat = tf.concat(values=[con1_hm1, pad_hm2],
                                    axis=3,
                                    name='concat_feat_p1')
            duc_hm1 = self.DUC(concat_feat,
                               filters=256,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC_hm1')
            pad_hm1 = tf.pad(duc_hm1, [[0, 0], [1, 1], [1, 1], [0, 0]],
                             name='pad_hm1')
            hm_duc = tf.concat(values=[pad_hm1, thin_hm],
                               axis=3,
                               name='concat_feat_p2')
            hm_ch1 = tf.layers.conv2d(hm_duc,
                                      128,
                                      kernel_size=[1, 1],
                                      name='hm_channel1')
            ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1')
            hm_out = tf.layers.conv2d(ps1,
                                      17,
                                      kernel_size=[1, 1],
                                      name='hm_conv')
            hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out')

            ###PAF
            thin_paf = end_points['MConv_Stage6_L1_5']
            classes_paf1 = tf.layers.conv2d(thin_paf,
                                            128,
                                            3,
                                            strides=2,
                                            name='cls1_paf')
            classes_paf2 = tf.layers.conv2d(classes_paf1,
                                            256,
                                            3,
                                            strides=2,
                                            name='cls2_paf')
            con1_paf2 = tf.layers.conv2d(classes_paf2,
                                         256,
                                         1,
                                         name='1con2_paf')
            duc_paf2 = self.DUC(con1_paf2,
                                filters=512,
                                upscale_factor=2,
                                is_training=self.is_training,
                                scope='DUC_paf2')
            pad_paf2 = tf.pad(duc_paf2, [[0, 0], [1, 1], [1, 1], [0, 0]],
                              name='pad_paf2')
            con1_paf1 = tf.layers.conv2d(classes_paf1,
                                         512,
                                         1,
                                         name='1con1_paf')
            concat_feat_paf = tf.concat(values=[con1_paf1, pad_paf2],
                                        axis=3,
                                        name='concat_feat_p1_paf')
            duc_paf1 = self.DUC(concat_feat_paf,
                                filters=256,
                                upscale_factor=2,
                                is_training=self.is_training,
                                scope='DUC_paf1')
            pad_paf1 = tf.pad(duc_paf1, [[0, 0], [1, 1], [1, 1], [0, 0]],
                              name='pad_paf1')
            paf_duc = tf.concat(values=[pad_paf1, thin_paf],
                                axis=3,
                                name='concat_feat_p2_paf')
            paf_ch1 = tf.layers.conv2d(paf_duc,
                                       256,
                                       kernel_size=[1, 1],
                                       name='paf_channel1')
            ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2')
            paf_out = tf.layers.conv2d(ps2,
                                       36,
                                       kernel_size=[1, 1],
                                       name='paf_conv')
            paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out')

            end_points['heat_map'] = hm
            end_points['PAF'] = paf

        elif self.backbone == 'hrnet':
            end_points = dict()
            out = HRNet(features)
            backbone_end = out
            s2d_1 = tf.space_to_depth(backbone_end,
                                      block_size=int(4),
                                      data_format='NHWC',
                                      name='space_to_depth_1')
            paf_cov1 = tf.layers.conv2d(
                s2d_1,
                64,  #38
                kernel_size=[1, 1],
                name='paf_cov1')
            s2d_2 = tf.space_to_depth(paf_cov1,
                                      block_size=int(2),
                                      data_format='NHWC',
                                      name='space_to_depth_2')
            paf = tf.layers.conv2d(
                s2d_2,
                36,  #38
                kernel_size=[1, 1],
                name='paf_conv')
            concat_feat = tf.concat(values=[s2d_1, paf_cov1],
                                    axis=3,
                                    name='concat_feat')

            ps1 = self.PixelShuffle(concat_feat, 2, scope='PixelShuffle1')
            hm_duc1 = self.DUC(ps1,
                               filters=512,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC1')
            hm_duc2 = self.DUC(hm_duc1,
                               filters=256,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC2')
            s2d_3 = tf.space_to_depth(paf_cov1,
                                      block_size=int(2),
                                      data_format='NHWC',
                                      name='space_to_depth_3')
            hm = tf.layers.conv2d(
                s2d_2,
                17,  #38
                kernel_size=[1, 1],
                name='hm_conv')
            hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out')
            paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out')
            end_points['heat_map'] = hm_out
            end_points['PAF'] = paf_out

        elif self.backbone == 'hrnet_tiny':
            end_points = dict()
            out = HRNet(features)
            backbone_end = out
            conv_paf1 = tf.layers.conv2d(backbone_end,
                                         128,
                                         3,
                                         strides=2,
                                         name='paf_conv1')
            conv_paf2 = tf.layers.conv2d(conv_paf1,
                                         128,
                                         3,
                                         strides=2,
                                         name='paf_conv2')
            conv_paf3 = tf.layers.conv2d(conv_paf2,
                                         128,
                                         3,
                                         strides=1,
                                         name='paf_conv3')
            conv_paf4 = tf.layers.conv2d(conv_paf3,
                                         128,
                                         3,
                                         strides=2,
                                         name='paf_conv4')
            pad_paf = tf.pad(conv_paf4, [[0, 0], [1, 1], [1, 1], [0, 0]],
                             name='paf_pad')
            paf_ch1 = tf.layers.conv2d(pad_paf,
                                       256,
                                       kernel_size=[1, 1],
                                       name='paf_channel1')
            ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2')
            paf = tf.layers.conv2d(ps2,
                                   36,
                                   kernel_size=[1, 1],
                                   name='paf_conv')

            conv_hm1 = tf.layers.conv2d(backbone_end,
                                        128,
                                        3,
                                        strides=2,
                                        name='hm_conv1')
            conv_hm2 = tf.layers.conv2d(conv_hm1,
                                        128,
                                        3,
                                        strides=2,
                                        name='hm_conv2')
            conv_hm3 = tf.layers.conv2d(conv_hm2,
                                        128,
                                        3,
                                        strides=1,
                                        name='hm_conv3')
            conv_hm4 = tf.layers.conv2d(conv_hm3,
                                        128,
                                        3,
                                        strides=2,
                                        name='hm_conv4')
            pad_hm = tf.pad(conv_hm4, [[0, 0], [1, 1], [1, 1], [0, 0]],
                            name='hm_pad')
            hm_ch1 = tf.layers.conv2d(pad_hm,
                                      128,
                                      kernel_size=[1, 1],
                                      name='hm_channel1')
            ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1')
            hm = tf.layers.conv2d(ps1, 17, kernel_size=[1, 1], name='hm_conv')

            hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out')
            paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out')
            end_points['heat_map'] = hm_out
            end_points['PAF'] = paf_out

        elif self.backbone == 'higher_hrnet':
            is_training = True
            end_points = dict()
            backbone_end = HRNet(features)
            #Downsampling
            downsample1 = tf.layers.conv2d(backbone_end,
                                           64,
                                           1,
                                           strides=2,
                                           name='downsample_1')
            bn_downsample1 = tf.layers.batch_normalization(
                downsample1, name='downsample_1_bn', training=is_training)
            downsample1 = tf.nn.relu(bn_downsample1)
            downsample2 = tf.layers.conv2d(downsample1,
                                           64,
                                           1,
                                           strides=2,
                                           name='downsample_2')
            bn_downsample2 = tf.layers.batch_normalization(
                downsample2, name='downsample2_bn', training=is_training)
            downsample2 = tf.keras.activations.relu(
                bn_downsample2)  #1/4 input size (1, 92, 108, 128)
            conv_paf3 = tf.layers.conv2d(downsample2,
                                         64,
                                         1,
                                         strides=2,
                                         name='paf_conv3')
            bn_downsample3 = tf.layers.batch_normalization(
                conv_paf3, name='downsample3_bn', training=is_training)
            downsample3 = tf.keras.activations.relu(
                bn_downsample3)  #(1, 46, 54, 128)

            #paf layer
            paf_final_conv1 = tf.layers.conv2d(downsample3,
                                               192,
                                               1,
                                               strides=1,
                                               name='final_conv1_paf')
            paf_final_conv2 = tf.layers.conv2d(paf_final_conv1,
                                               192,
                                               1,
                                               strides=1,
                                               name='final_conv2_paf')
            paf_output = tf.concat(values=[paf_final_conv2, downsample3],
                                   axis=3,
                                   name='ouput_paf')
            paf_adjust = tf.layers.conv2d(paf_output,
                                          36,
                                          1,
                                          strides=1,
                                          name='adjust_paf')

            #FinalLayer
            final_conv1 = tf.layers.conv2d(downsample2,
                                           192,
                                           1,
                                           strides=1,
                                           name='final_conv1')
            final_conv2 = tf.layers.conv2d(final_conv1,
                                           192,
                                           1,
                                           strides=1,
                                           name='final_conv2')
            conc_final_conv2 = tf.concat(values=[final_conv2, downsample2],
                                         axis=3,
                                         name='concat_finalconv2_downsam2')
            #Deconv block
            ps1 = self.DUC(conc_final_conv2,
                           filters=32,
                           upscale_factor=2,
                           is_training=self.is_training,
                           scope='DUC1')
            ps2 = self.DUC(ps1,
                           filters=32,
                           upscale_factor=2,
                           is_training=self.is_training,
                           scope='DUC2')
            s2d_1 = tf.space_to_depth(ps2,
                                      block_size=int(4),
                                      data_format='NHWC',
                                      name='space_to_depth_1')
            s2d_2 = tf.space_to_depth(s2d_1,
                                      block_size=int(2),
                                      data_format='NHWC',
                                      name='space_to_depth_2')
            #BasicLayer
            basic1 = self.HR_BasicBlock(s2d_2,
                                        filters=32,
                                        is_training=self.is_training,
                                        scope='basic_block1')
            basic2 = self.HR_BasicBlock(basic1,
                                        filters=32,
                                        is_training=self.is_training,
                                        scope='basic_block2')
            basic3 = self.HR_BasicBlock(basic2,
                                        filters=32,
                                        is_training=self.is_training,
                                        scope='basic_block3')
            basic4 = self.HR_BasicBlock(basic3,
                                        filters=32,
                                        is_training=self.is_training,
                                        scope='basic_block4')
            basic4 = tf.nn.relu(basic4)
            pad_basic4 = tf.pad(basic4, [[0, 0], [1, 1], [1, 1], [0, 0]],
                                name='basic4_padding')
            adjust = tf.layers.conv2d(pad_basic4,
                                      17,
                                      3,
                                      strides=1,
                                      name='adjust')

            hm_out = tf.transpose(adjust, [0, 3, 1, 2], name='hm_out')
            paf_out = tf.transpose(paf_adjust, [0, 3, 1, 2], name='paf_out')
            end_points['heat_map'] = hm_out
            end_points['PAF'] = paf_out

        elif self.backbone == 'pre_hrnet':
            ######
            end_points = dict()
            hrnet = preHRnet(cfgfile='/cfgs/w30_s4.cfg')
            backbone_end = hrnet.forward_train(features)
            print(backbone_end)

        return end_points
    def build(self, features):
        n_heatmaps = 17
        paf_nfields = 18
        paf_nvectors = 2
        paf_nscales = 0
        if self.backbone == 'mobilenet_v1':
            logits, end_points = mobilenet_v1(
                features,
                num_classes=False,
                is_training=self.is_training,
                depth_multiplier=self.depth_multiplier)
            backbone_end = end_points['Conv2d_13_pointwise']  #1, 36, 46, 54
            print(backbone_end)

        elif self.backbone == 'mobilenet_v2':
            with tf.contrib.slim.arg_scope(
                    training_scope(is_training=self.is_training)):
                logits, end_points = mobilenet(
                    features,
                    num_classes=False,
                    depth_multiplier=self.depth_multiplier)
            backbone_end = end_points['layer_19']

        elif self.backbone == 'shufflenet_v2':
            basenet = ShuffleNetV2(depth_multiplier=self.depth_multiplier,
                                   is_training=self.is_training)
            end_points = basenet.build(features)
            backbone_end = end_points['base_net/out']

        elif self.backbone == 'mobilenet_thin':
            out = MobilenetNetworkThin({'image': features},
                                       conv_width=0.75,
                                       conv_width2=0.50,
                                       trainable=self.is_training)
            end_points = out.get_layer()
            thin_hm = end_points['MConv_Stage6_L2_5']
            hm_out = tf.layers.conv2d(thin_hm,
                                      17,
                                      kernel_size=[1, 1],
                                      name='hm_conv')
            hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out')
            thin_paf = end_points['MConv_Stage6_L1_5']
            paf_out = tf.layers.conv2d(thin_paf,
                                       36,
                                       kernel_size=[1, 1],
                                       name='paf_conv')
            paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out')

        elif self.backbone == 'hrnet':
            end_points = dict()
            out = HRNet(features)
            backbone_end = out
            s2d_1 = tf.space_to_depth(backbone_end,
                                      block_size=int(4),
                                      data_format='NHWC',
                                      name='space_to_depth_1')
            paf_cov1 = tf.layers.conv2d(
                s2d_1,
                64,  #38
                kernel_size=[1, 1],
                name='paf_cov1')
            s2d_2 = tf.space_to_depth(paf_cov1,
                                      block_size=int(2),
                                      data_format='NHWC',
                                      name='space_to_depth_2')
            paf = tf.layers.conv2d(
                s2d_2,
                36,  #38
                kernel_size=[1, 1],
                name='paf_conv')
            concat_feat = tf.concat(values=[s2d_1, paf_cov1],
                                    axis=3,
                                    name='concat_feat')

            ps1 = self.PixelShuffle(concat_feat, 2, scope='PixelShuffle1')
            hm_duc1 = self.DUC(ps1,
                               filters=512,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC1')
            hm_duc2 = self.DUC(hm_duc1,
                               filters=256,
                               upscale_factor=2,
                               is_training=self.is_training,
                               scope='DUC2')
            s2d_3 = tf.space_to_depth(paf_cov1,
                                      block_size=int(2),
                                      data_format='NHWC',
                                      name='space_to_depth_3')
            hm = tf.layers.conv2d(
                s2d_2,
                17,  #38
                kernel_size=[1, 1],
                name='hm_conv')
            hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out')
            paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out')
            end_points['heat_map'] = hm_out
            end_points['PAF'] = paf_out

        if self.backbone == 'mobilenet_thin':
            end_points['heat_map'] = hm
            end_points['PAF'] = paf

        if self.backbone == 'pafmodel':
            with tf.contrib.slim.arg_scope(
                    training_scope(is_training=self.is_training)):
                logits, end_points = mobilenet(
                    features,
                    num_classes=False,
                    depth_multiplier=self.depth_multiplier)
                backbone_end = end_points['layer_19']
            ps1 = self.PixelShuffle(backbone_end, 2, scope='PixelShuffle1')
            paf_duc1 = self.DUC(ps1,
                                filters=512,
                                upscale_factor=2,
                                is_training=self.is_training,
                                scope='PAF_DUC1')
            paf_duc2 = self.DUC(paf_duc1,
                                filters=256,
                                upscale_factor=2,
                                is_training=self.is_training,
                                scope='PAF_DUC2')
            paf_conv_feature1 = tf.space_to_depth(paf_duc2,
                                                  block_size=int(2),
                                                  data_format='NHWC',
                                                  name='space_to_depth_1')
            paf_conv_out1 = tf.layers.conv2d(
                paf_conv_feature1,
                20,  #38
                kernel_size=[3, 3],
                name='PAF_output')
            paf_duc2_pad = tf.pad(paf_duc2, [[0, 0], [1, 1], [1, 1], [0, 0]],
                                  name='duc2_padding')
            paf_conv_out = tf.layers.conv2d(
                paf_duc2_pad,
                20,  #38
                kernel_size=[3, 3],
                name='PAF_conv')
            paf_conv_feature = tf.space_to_depth(paf_conv_out,
                                                 block_size=int(4),
                                                 data_format='NHWC',
                                                 name='space_to_depth_2')
            concat_feat = tf.concat(values=[ps1, paf_conv_feature],
                                    axis=3,
                                    name='concat_feat')

            duc1 = self.DUC(concat_feat,
                            filters=512,
                            upscale_factor=2,
                            is_training=self.is_training,
                            scope='DUC1')
            duc2 = self.DUC(duc1,
                            filters=256,
                            upscale_factor=2,
                            is_training=self.is_training,
                            scope='DUC2')
            hm_feature = tf.space_to_depth(duc2,
                                           block_size=int(2),
                                           data_format='NHWC',
                                           name='space_to_depth_3')
            hm_out = tf.layers.conv2d(
                hm_feature,
                self.number_keypoints,  #38
                kernel_size=[3, 3],
                name='output')

            conv_out = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out')
            paf_conv_out = tf.transpose(paf_conv_out1, [0, 3, 1, 2],
                                        name='paf_out')
            end_points['heat_map'] = conv_out
            end_points['PAF'] = paf_conv_out

        return end_points
    def model_fn(self, features, labels, mode):
        """Auto-Scaling 3D CNN model.

    For more information on how to write a model function, see:
      https://www.tensorflow.org/guide/custom_estimators#write_a_model_function
    """
        input_layer = features

        # Replace missing values by 0
        hidden_layer = tf.where(tf.is_nan(input_layer),
                                tf.zeros_like(input_layer), input_layer)

        if self.input_size > 28:
            hidden_layer = tf.squeeze(hidden_layer, axis=[1])
            with tf.contrib.slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=True)):
                logits, endpoints = mobilenet_v2.mobilenet(
                    hidden_layer, self.input_size)
            hidden_layer = tf.contrib.layers.conv2d(
                inputs=endpoints['feature_maps'],
                num_outputs=1280,
                kernel_size=1,
                stride=1,
                activation_fn=None)
            hidden_layer = tf.reduce_mean(input_tensor=hidden_layer,
                                          axis=[1, 2])
        else:
            REASONABLE_NUM_ENTRIES = 1000
            num_filters = 32  # The number of filters is fixed
            while True:
                shape = hidden_layer.shape
                kernel_size = [
                    min(3, shape[1]),
                    min(3, shape[2]),
                    min(3, shape[3])
                ]
                hidden_layer = tf.layers.conv3d(inputs=hidden_layer,
                                                filters=num_filters,
                                                kernel_size=kernel_size)
                kernel_size = [
                    min(1, shape[1]),
                    min(1, shape[2]),
                    min(1, shape[3])
                ]
                hidden_layer = tf.layers.conv3d(inputs=hidden_layer,
                                                filters=num_filters,
                                                kernel_size=kernel_size)
                pool_size = [
                    min(2, shape[1]),
                    min(2, shape[2]),
                    min(2, shape[3])
                ]
                hidden_layer = tf.layers.max_pooling3d(
                    inputs=hidden_layer,
                    pool_size=pool_size,
                    strides=pool_size,
                    padding='valid',
                    data_format='channels_last')
                if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES:
                    break
            hidden_layer = tf.layers.flatten(hidden_layer)

        hidden_layer = tf.layers.dense(inputs=hidden_layer,
                                       units=256,
                                       activation=tf.nn.relu)
        hidden_layer = tf.layers.dropout(
            inputs=hidden_layer,
            rate=0.5,
            training=mode == tf.estimator.ModeKeys.TRAIN)

        logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim)
        sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor")

        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # "classes": binary_predictions,
            # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": sigmoid_tensor
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        # For multi-label classification, a correct loss is sigmoid cross entropy
        loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
        #loss = focal_loss(prediction_tensor=logits, target_tensor=labels)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer(0.001)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        assert mode == tf.estimator.ModeKeys.EVAL
        eval_metric_ops = {
            "accuracy":
            tf.metrics.accuracy(labels=labels,
                                predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Exemplo n.º 16
0
    args = parser.parse_args()

    checkpoint_path = args.checkpoint_path
    logger.info('checkpoint_path: ' + checkpoint_path)

    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32, shape=[None, 619, 654, 3])
        img_size = tf.placeholder(dtype=tf.int32, shape=(2,), name='original_image_size')

    img_normalized = raw_img / 255 - 0.5

    layers = {}
    name = ""
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        logits, endpoints = mobilenet_v2.mobilenet(img_normalized)
        for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]):
            layers['%s%s' % (name, k)] = tensor
            print(k, tensor.shape)
    def upsample(input, target):
        return tf.image.resize_bilinear(input, tf.constant([target.shape[1].value, target.shape[2].value]), align_corners=False)
    
    mobilenet_feature = tf.concat([layers['layer_7/output'], upsample(layers['layer_14/output'], layers['layer_7/output'])], 3)
    
    # get net graph
    logger.info('initializing model...')
    # net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn)
    # hm_pre, cpm_pre, added_layers_out = net.gen_net()
    net = PafNet(inputs_x=mobilenet_feature, stage_num=6, hm_channel_num=19, use_bn=args.use_bn)
    hm_pre, paf_pre, added_layers_out = net.gen_net()
Exemplo n.º 17
0
import tensorflow.contrib.slim as slim
import time
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

tf.reset_default_graph()
sess = tf.InteractiveSession()

X = tf.placeholder(tf.float32, [None, None, None])
images = tf.expand_dims(X, axis=0)
images = tf.image.resize_images(images, [224, 224])
images = tf.image.grayscale_to_rgb(images)
images = images / 128. - 1
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
    logits, endpoints = mobilenet_v2.mobilenet(images)
logits = tf.nn.relu6(logits)
emotion_logits = slim.fully_connected(
    logits,
    7,
    activation_fn=None,
    weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
    weights_regularizer=slim.l2_regularizer(1e-5),
    scope='emo/emotion_1',
    reuse=False)

with tf.variable_scope("age"):
    age1 = op.expanded_conv(endpoints['layer_16'], 160, stride=1)
    age2 = op.expanded_conv(age1, 320, stride=1)
    age3 = mobilenet.global_pool(
        op.expanded_conv(age2, 1280, stride=1, kernel_size=[1, 1]))
Exemplo n.º 18
0
def main():
    tf.reset_default_graph()

    global_step = tf.Variable(0, name='global_step', trainable=False)
    # label without one-hot
    batch_train, batch_labels = get_batch(train, train_label, IMG_W, IMG_H,
                                          BATCH_SIZE, CAPACITY)

    # network, set is_training=False when predict img
    # with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params):
    #     # logits, _ = inception_v3.inception_v3(batch_train, num_classes=N_CLASSES, is_training=True)
    #     logits, _ = resnet_v2.resnet_v2_152(batch_train, num_classes=N_CLASSES, is_training=True)
    #     logits = tf.reshape(logits, [-1, N_CLASSES])

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params,
                        weights_regularizer=slim.l2_regularizer(weight_decay)):

        # with slim.arg_scope(mobilenet_v2.training_scope()):
        logits, _ = mobilenet_v2.mobilenet(batch_train,
                                           num_classes=N_CLASSES,
                                           is_training=True)
    print(logits.get_shape())

    # loss
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=batch_labels)
    loss = tf.reduce_mean(cross_entropy, name='loss')
    regularization_losses_n = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([loss] + regularization_losses_n, name='total_loss')

    tf.summary.scalar('train_loss', loss)

    # optimizer
    lr = tf.train.exponential_decay(learning_rate=init_lr,
                                    global_step=global_step,
                                    decay_steps=decay_steps,
                                    decay_rate=0.1)
    tf.summary.scalar('learning_rate', lr)

    # set optimizer, trainable variable

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        if freeze_basemodel:
            trainable_variable = get_trainable_variables()
            for var in trainable_variable:
                print("only train variable:", var)

            optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(
                loss, global_step=global_step, var_list=trainable_variable)
        else:
            print("train all variable")
            optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(
                loss, global_step=global_step)  #train all var

    # accuracy
    correct = tf.nn.in_top_k(logits, batch_labels, 1)
    correct = tf.cast(correct, tf.float16)
    accuracy = tf.reduce_mean(correct)
    tf.summary.scalar('train_acc', accuracy)

    summary_op = tf.summary.merge_all()
    sess = tf.Session(config=config)
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)

    # load model

    load_finetune_model = slim.assign_from_checkpoint_fn(
        finetune_model, get_finetuned_variables(), ignore_missing_vars=True)

    saver = tf.train.Saver(max_to_keep=100)
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    #saver.restore(sess, logs_train_dir+'/model.ckpt-174000')

    print('Loading finetune model from %s' % finetune_model)
    load_finetune_model(sess)

    try:
        for step in range(MAX_STEP):
            if coord.should_stop():
                break

            _, learning_rate, tra_loss, tra_acc = sess.run(
                [optimizer, lr, loss, accuracy])
            if step % display_step == 0:
                print(
                    'Epoch:%3d/%d, Step:%6d/%d, lr:%f, train loss:%.4f, train acc:%.2f%%'
                    % (step / one_epoch_step + 1, MAX_STEP / one_epoch_step,
                       step + display_step, MAX_STEP, learning_rate, tra_loss,
                       tra_acc * 100.0))

                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)

            if step % 500 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                print("save model", checkpoint_path)

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
def train():
    parser = argparse.ArgumentParser(
        description='Training codes for Openpose using Tensorflow')
    parser.add_argument('--batch_size', type=str, default=10)
    parser.add_argument('--continue_training', type=bool, default=False)
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default='checkpoints/train/mn_sepconv_33')
    # parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt')
    parser.add_argument(
        '--backbone_net_ckpt_path',
        type=str,
        default='checkpoints/mobilenet/mobilenet_v2_1.0_96.ckpt')
    parser.add_argument('--train_vgg', type=bool, default=True)
    parser.add_argument('--annot_path',
                        type=str,
                        default='./COCO/annotations/')
    parser.add_argument('--img_path', type=str, default='./COCO/images/')
    # parser.add_argument('--annot_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/'
    #                             'person_keypoints_val2017.json')
    # parser.add_argument('--img_path_val', type=str,
    #                     default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/'
    #                             'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/')
    parser.add_argument('--save_checkpoint_frequency', type=str, default=1000)
    parser.add_argument('--save_summary_frequency', type=str, default=100)
    parser.add_argument('--stage_num', type=str, default=6)
    parser.add_argument('--hm_channels', type=str, default=19)
    parser.add_argument('--paf_channels', type=str, default=38)
    parser.add_argument('--input-width', type=int, default=368)
    parser.add_argument('--input-height', type=int, default=368)
    parser.add_argument('--max_echos', type=str, default=5)
    parser.add_argument('--use_bn', type=bool, default=False)
    parser.add_argument('--loss_func', type=str, default='l2')
    args = parser.parse_args()

    if not args.continue_training:
        start_time = time.localtime(time.time())
        checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' %
                                                  start_time[0:6])
        os.mkdir(checkpoint_path)
    else:
        checkpoint_path = args.checkpoint_path

    logger = logging.getLogger('train')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler(checkpoint_path + '/train_log.log')
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    logger.addHandler(fh)
    logger.info(args)
    logger.info('checkpoint_path: ' + checkpoint_path)

    # define input placeholder
    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32,
                                 shape=[args.batch_size, 368, 368, 3])
        # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels])
        # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels])
        hm = tf.placeholder(dtype=tf.float32,
                            shape=[args.batch_size, 46, 46, args.hm_channels])
        paf = tf.placeholder(
            dtype=tf.float32,
            shape=[args.batch_size, 46, 46, args.paf_channels])

    # defien data loader
    logger.info('initializing data loader...')
    set_network_input_wh(args.input_width, args.input_height)
    scale = 8
    set_network_scale(scale)
    df = get_dataflow_batch(args.annot_path,
                            True,
                            args.batch_size,
                            img_path=args.img_path)
    steps_per_echo = df.size()
    enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100)
    q_inp, q_heat, q_vect = enqueuer.dequeue()
    q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split(
        q_heat, 1), tf.split(q_vect, 1)
    img_normalized = q_inp_split[0] / 255 - 0.5  # [-0.5, 0.5]

    df_valid = get_dataflow_batch(args.annot_path,
                                  False,
                                  args.batch_size,
                                  img_path=args.img_path)
    df_valid.reset_state()
    validation_cache = []

    logger.info('initializing model...')
    # define vgg19
    # with slim.arg_scope(vgg.vgg_arg_scope()):
    #     vgg_outputs, end_points = vgg.vgg_19(img_normalized)
    #     with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):
    #         logits, endpoints = mobilenet_v2.mobilenet(img_normalized)
    layers = {}
    name = ""
    with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
        logits, endpoints = mobilenet_v2.mobilenet(img_normalized)
        for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]):
            layers['%s%s' % (name, k)] = tensor
            # print(k, tensor.shape)
    def upsample(input, target):
        return tf.image.resize_bilinear(
            input,
            tf.constant([target.shape[1].value, target.shape[2].value]),
            align_corners=False)

    mobilenet_feature = tf.concat([
        layers['layer_7/output'],
        upsample(layers['layer_14/output'], layers['layer_7/output'])
    ], 3)

    # pdb.set_trace()
    # get net graph
    net = PafNet(inputs_x=mobilenet_feature,
                 stage_num=args.stage_num,
                 hm_channel_num=args.hm_channels,
                 use_bn=args.use_bn)
    hm_pre, paf_pre, added_layers_out = net.gen_net()

    # two kinds of loss
    losses = []
    with tf.name_scope('loss'):
        for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)):
            if args.loss_func == 'square':
                hm_loss = tf.reduce_sum(
                    tf.square(tf.concat(l1, axis=0) - q_heat_split[0]))
                paf_loss = tf.reduce_sum(
                    tf.square(tf.concat(l2, axis=0) - q_vect_split[0]))
                losses.append(tf.reduce_sum([hm_loss, paf_loss]))
                logger.info('use square loss')
            else:
                hm_loss = tf.nn.l2_loss(
                    tf.concat(l1, axis=0) - q_heat_split[0])
                paf_loss = tf.nn.l2_loss(
                    tf.concat(l2, axis=0) - q_vect_split[0])
                losses.append(tf.reduce_mean([hm_loss, paf_loss]))
                logger.info('use l2 loss')
        loss = tf.reduce_sum(losses) / args.batch_size

    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(1e-4,
                                               global_step,
                                               steps_per_echo,
                                               0.5,
                                               staircase=True)
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if args.train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2')
    with tf.name_scope('train'):
        train = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       epsilon=1e-8).minimize(
                                           loss=loss,
                                           global_step=global_step,
                                           var_list=trainable_var_list)
    logger.info('initialize saver...')
    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2'),
                              name='mobilenet_restorer')
    saver = tf.train.Saver(trainable_var_list)

    logger.info('initialize tensorboard')
    tf.summary.scalar("lr", learning_rate)
    tf.summary.scalar("loss2", loss)
    tf.summary.histogram('img_normalized', img_normalized)
    tf.summary.histogram('mobilenet_outputs', logits)
    tf.summary.histogram('added_layers_out', added_layers_out)
    tf.summary.image('mobilenet_out',
                     tf.transpose(logits[0:1, :, :, :], perm=[3, 1, 2, 0]),
                     max_outputs=512)
    tf.summary.image('added_layers_out',
                     tf.transpose(added_layers_out[0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=128)
    tf.summary.image('paf_gt',
                     tf.transpose(q_vect_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=38)
    tf.summary.image('hm_gt',
                     tf.transpose(q_heat_split[0][0:1, :, :, :],
                                  perm=[3, 1, 2, 0]),
                     max_outputs=19)
    for i in range(args.stage_num):
        tf.summary.image('hm_pre_stage_%d' % i,
                         tf.transpose(hm_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=19)
        tf.summary.image('paf_pre_stage_%d' % i,
                         tf.transpose(paf_pre[i][0:1, :, :, :],
                                      perm=[3, 1, 2, 0]),
                         max_outputs=38)
    tf.summary.image('input', img_normalized, max_outputs=4)

    logger.info('initialize session...')
    merged = tf.summary.merge_all()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        writer = tf.summary.FileWriter(checkpoint_path, sess.graph)
        sess.run(tf.group(tf.global_variables_initializer()))
        if args.backbone_net_ckpt_path is not None:
            logger.info('restoring mobilenet weights from %s' %
                        args.backbone_net_ckpt_path)
            restorer.restore(sess, args.backbone_net_ckpt_path)
        if args.continue_training:
            saver.restore(
                sess,
                tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
            logger.info('restoring from checkpoint...')
        logger.info('start training...')
        coord = tf.train.Coordinator()
        enqueuer.set_coordinator(coord)
        enqueuer.start()
        while True:
            best_checkpoint = float('inf')
            for _ in tqdm(range(steps_per_echo), ):
                total_loss, _, gs_num = sess.run([loss, train, global_step])
                echo = gs_num / steps_per_echo

                if gs_num % args.save_summary_frequency == 0:
                    total_loss, gs_num, summary, lr = sess.run(
                        [loss, global_step, merged, learning_rate])
                    writer.add_summary(summary, gs_num)
                    logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' %
                                (echo, gs_num, total_loss, lr))

                if gs_num % args.save_checkpoint_frequency == 0:
                    valid_loss = 0
                    if len(validation_cache) == 0:
                        for images_test, heatmaps, vectmaps in tqdm(
                                df_valid.get_data()):
                            validation_cache.append(
                                (images_test, heatmaps, vectmaps))
                        df_valid.reset_state()
                        del df_valid
                        df_valid = None

                    for images_test, heatmaps, vectmaps in validation_cache:
                        valid_loss += sess.run(loss,
                                               feed_dict={
                                                   q_inp: images_test,
                                                   q_vect: vectmaps,
                                                   q_heat: heatmaps
                                               })

                    if valid_loss / len(validation_cache) <= best_checkpoint:
                        best_checkpoint = valid_loss / len(validation_cache)
                        saver.save(sess,
                                   save_path=checkpoint_path + '/' + 'model',
                                   global_step=gs_num)
                        logger.info(
                            'best_checkpoint = %f, saving checkpoint to ' %
                            best_checkpoint + checkpoint_path + '/' +
                            'model-%d' % gs_num)

                    else:
                        logger.info('loss = %f drop' % valid_loss /
                                    len(validation_cache))

                if echo >= args.max_echos:
                    sess.close()
                    return 0
Exemplo n.º 20
0
# For simplicity we just decode jpeg inside tensorflow.
# But one can provide any input obviously.
file_input = tf.placeholder(tf.string, ())

image = tf.image.decode_jpeg(tf.read_file(file_input))

images = tf.expand_dims(image, 0)
images = tf.cast(images, tf.float32) / 128. - 1
images.set_shape((None, None, None, 3))
images = tf.image.resize_images(images, (224, 224))

# Note: arg_scope is optional for inference.
with tf.contrib.slim.arg_scope(
        mobilenet_v2.training_scope(is_training=False)):  #change1
    logits, endpoints = mobilenet_v2.mobilenet(images,
                                               num_classes=3,
                                               is_training=False)
#logits,end_points = mobilenet_v2.mobilenet(images, num_classes=5, is_training=False)

# Restore using exponential moving average since it produces (1.5-2%) higher
# accuracy
ema = tf.train.ExponentialMovingAverage(0.999)
vars = ema.variables_to_restore()

saver = tf.train.Saver(vars)

from IPython import display
import pylab
#from datasets import imagenet
import PIL
display.display(display.Image('./test_images/laugh1.jpg'))