def __mobilenetV2feature_sequence_extraction(self, inputdata, reuseflag): """ use inception V3 model :param inputdata: eg. batch*128*128*1 :return: """ # arg_scope = inception_v3_arg_scope() # with slim.arg_scope(arg_scope) as scope: # shape=inputdata.get_shape().as_list() # # if self.phase.lower() == 'train': # # logits, end_points = inception_v3(inputdata, is_training=True,reuse=reuseflag,num_classes=1001) # # else: # # logits, end_points = inception_v3(inputdata, is_training=False, reuse=reuseflag, num_classes=1001) # logits, end_points = inception_v3(inputdata, is_training=False, reuse=reuseflag, num_classes=1001) # # fc_out=slim.fully_connected(logits,512,scope='fc512') # return end_points['PreLogits'] # batch ,1,1,2048 # with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): # logits, endpoints = mobilenet_v2.mobilenet(inputdata, depth_multiplier=1.0,reuse=reuseflag) # return endpoints['global_pool'] # batch ,1,1,1280 if self.__use_bn: with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet( inputdata, depth_multiplier=1.0, reuse=reuseflag) return endpoints['global_pool'] # batch ,1,1,1280 else: with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet( inputdata, depth_multiplier=1.0, reuse=reuseflag) return endpoints['global_pool'] # batch ,1,1,1280
def testDivisibleBy(self): tf.reset_default_graph() mobilenet_v2.mobilenet(tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, divisible_by=16, min_depth=32) s = [ op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D') ] s = set(s) self.assertSameElements( [32, 64, 96, 160, 192, 320, 384, 576, 960, 1280, 1001], s)
def model(self, features): input_layer = features # Replace missing values by 0 hidden_layer = tf.where(tf.is_nan(input_layer), tf.zeros_like(input_layer), input_layer) if self.metadata.get_tensor_shape()[0] != -1: #hidden_layer = tf.squeeze(hidden_layer, axis=[1]) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet( hidden_layer, self.input_size) hidden_layer = tf.contrib.layers.conv2d( inputs=endpoints['feature_maps'], num_outputs=1280, kernel_size=1, stride=1, activation_fn=None) hidden_layer = tf.reduce_mean(input_tensor=hidden_layer, axis=[1, 2]) else: tensor_shape = hidden_layer.get_shape().as_list() tensor_reshape = tf.shape(hidden_layer) hidden_layer = tf.reshape(hidden_layer, [-1] + tensor_shape[-3:]) if tensor_shape[-1] == 1: hidden_layer = tf.tile(hidden_layer, [1, 1, 1, 3]) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet( hidden_layer, self.input_size) hidden_layer = endpoints['feature_maps'] feature_shape = hidden_layer.get_shape().as_list() hidden_layer = tf.reshape( hidden_layer, [-1, self.number_of_frames] + feature_shape[-3:]) hidden_layer = tf.reduce_mean(hidden_layer, axis=1) hidden_layer = tf.layers.flatten(hidden_layer) hidden_layer = tf.layers.dense(inputs=hidden_layer, units=256, activation=tf.nn.relu) hidden_layer = tf.layers.dropout(inputs=hidden_layer, rate=0.5, training=self.is_training) logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim) sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor") return logits, sigmoid_tensor
def testDivisibleByWithArgScope(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. with slim.arg_scope((mobilenet.depth_multiplier, ), min_depth=32): mobilenet_v2.mobilenet(tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1) s = [ op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D') ] s = set(s) self.assertSameElements(s, [32, 192, 128, 1001])
def _select_model(self, model, class_num, root_pretrain): self.input = tf.placeholder(tf.float32, [None, self.image_size[1], self.image_size[0], 3], name='image_input') #self.labels = tf.placeholder(tf.int64, [None]) # hcw cutmix self.labels = tf.placeholder(tf.float32, [None, None]) # hcw cutmix self.is_training = tf.placeholder(tf.bool, name='is_training') #hcw self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') #hcw if model == 'inception_resnet_v2': with slim.arg_scope(inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2(self.input, class_num, is_training = self.is_training, dropout_keep_prob = self.keep_prob) #hcw self.exclude = ['InceptionResnetV2/AuxLogits', 'InceptionResnetV2/Logits'] self.last_layer_name = 'Predictions' self.path_pretrain = root_pretrain + 'inception_resnet_v2.ckpt' elif model == 'resnet_v2_50': with slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v2_50(self.input, class_num, is_training = self.is_training) self.exclude = ['resnet_v2_50/logits'] self.last_layer_name = 'predictions' self.path_pretrain = root_pretrain + 'resnet_v2_50.ckpt' elif model == 'mobilenet_v2': logits, end_points = mobilenet(self.input, class_num, is_training = self.is_training, depth_multiplier=0.5, finegrain_classification_mode=True) self.exclude = ['MobilenetV2/Logits'] self.last_layer_name = 'Predictions' self.path_pretrain = root_pretrain + 'mobilenet_v2_0.5_128.ckpt' # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that # 'finegrain_classification_mode' is set to True, which means the embedding # layer will not be shrinked when given a depth-multiplier < 1.0. else: raise ValueError('Error: the model is not available.') return logits, end_points
def __init__(self): self.X = tf.placeholder(tf.float32,[None,None,3]) images = tf.expand_dims(self.X,axis=0) images = tf.image.resize_images(images,[224,224]) images = tf.map_fn(lambda image: tf.image.per_image_standardization(image), images) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)): self.logits, endpoints = mobilenet_v2.mobilenet(images,num_classes=20)
def testFineGrained(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. mobilenet_v2.mobilenet(tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01, finegrain_classification_mode=True) s = [ op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D') ] s = set(s) # All convolutions will be 8->48, except for the last one. self.assertSameElements(s, [8, 48, 1001, 1280])
def build_frontend(inputs, frontend, is_training=True, pretrained_dir="models"): if frontend == 'ResNet50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( inputs, is_training=is_training, scope='resnet_v2_50') frontend_scope = 'resnet_v2_50' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'), var_list=slim.get_model_variables('resnet_v2_50'), ignore_missing_vars=True) elif frontend == 'ResNet101': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_101( inputs, is_training=is_training, scope='resnet_v2_101') frontend_scope = 'resnet_v2_101' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'), var_list=slim.get_model_variables('resnet_v2_101'), ignore_missing_vars=True) elif frontend == 'ResNet152': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_152( inputs, is_training=is_training, scope='resnet_v2_152') frontend_scope = 'resnet_v2_152' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'), var_list=slim.get_model_variables('resnet_v2_152'), ignore_missing_vars=True) elif frontend == 'MobileNetV2': with slim.arg_scope(mobilenet_v2.training_scope()): logits, end_points = mobilenet_v2.mobilenet( inputs, is_training=is_training, scope='mobilenet_v2', base_only=True) frontend_scope = 'mobilenet_v2' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join(pretrained_dir, 'mobilenet_v2_1.4_224.ckpt'), var_list=slim.get_model_variables('mobilenet_v2'), ignore_missing_vars=True) elif frontend == 'InceptionV4': with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, end_points = inception_v4.inception_v4( inputs, is_training=is_training, scope='inception_v4') frontend_scope = 'inception_v4' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join(pretrained_dir, 'inception_v4.ckpt'), var_list=slim.get_model_variables('inception_v4'), ignore_missing_vars=True) else: raise ValueError( "Unsupported fronetnd model '%s'. This function only supports ResNet50, ResNet101, ResNet152, and MobileNetV2" % (frontend)) return logits, end_points, frontend_scope, init_fn
def main(): with tf.Graph().as_default(): args = parser.parse_args() batch_size = args.batch_size content_image_filenames = list(absoluteFilePaths(args.content_image_dir)) style_image_filenames = list(absoluteFilePaths(args.style_image_dir)) content_dataset = tf.data.Dataset.from_tensor_slices(tf.constant(content_image_filenames)) content_dataset = content_dataset.map(read_image, num_parallel_calls=4) content_dataset = content_dataset.map(resize_content_image, num_parallel_calls=4) content_dataset = content_dataset.shuffle(1000) content_dataset = content_dataset.batch(batch_size) content_dataset.prefetch(1) content_iterator = content_dataset.make_one_shot_iterator() content_batch = content_iterator.get_next() style_dataset = tf.data.Dataset.from_tensor_slices(style_image_filenames) style_dataset = style_dataset.map(read_image, num_parallel_calls=4) style_dataset = style_dataset.map(augment_image, num_parallel_calls=4) style_dataset = style_dataset.shuffle(1000) style_dataset = style_dataset.batch(batch_size) style_dataset.prefetch(1) style_iterator = style_dataset.make_one_shot_iterator() style_batch = style_iterator.get_next() with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): with tf.name_scope("content_endpoints"): _, content_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(content_batch, [224, 224])) with tf.name_scope("style_input_endpoints"): _, style_input_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(style_batch, [224, 224])) style_params = model.style_prediction_network(style_batch,style_input_endpoints["layer_18/output"]) stylized_image = model.style_transformer_network(content_batch, style_params) with tf.name_scope("stylized_image_endpoints"): _, stylized_image_endpoints = mobilenet_v2.mobilenet(tf.image.resize_images(stylized_image, [224, 224])) loss = losses.total_loss(CONTENT_WEIGHT, content_batch, STYLE_WEIGHT, style_batch, stylized_image, TV_WEIGHT) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, args.mobile_net) loss = sess.run(loss)
def testImageSizes(self): for input_size, output_size in [(224, 7), (192, 6), (160, 5), (128, 4), (96, 3)]: tf.reset_default_graph() _, ep = mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, input_size, input_size, 3))) self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3], [output_size] * 2)
def _tower_fn(is_training, dp_keep_prob, weight_decay, feature, label, data_format, num_layers, batch_norm_decay, batch_norm_epsilon, params): if params.model_name == 'mobilenet_v2': with slim.arg_scope( mobilenet_v2.training_scope(is_training=True, dropout_keep_prob=dp_keep_prob)): logits, end_points = mobilenet_v2.mobilenet( feature, num_classes=num_classes, prediction_fn=None) if params.model_name == 'mobilenet_v1': with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=False)): logits, end_points = mobilenet_v1.mobilenet_v1( feature, num_classes=num_classes) tower_pred = { 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits) } #tower_loss = tf.losses.softmax_cross_entropy( # logits=logits, onehot_labels=tf.one_hot(label, depth=num_classes)) tower_loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=label) tower_loss = tf.reduce_mean(tower_loss) model_params = tf.trainable_variables() depthwise_params = [v for v in model_params if 'depthwise' in v.op.name] if params.weight_decay_not_used_on_depthwise: model_params_for_weight_decay = [ v for v in model_params if v not in depthwise_params ] else: model_params_for_weight_decay = model_params tower_loss += weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in model_params_for_weight_decay]) tower_grad = tf.gradients(tower_loss, model_params) return tower_loss, zip(tower_grad, model_params), tower_pred
next_element = iterator.get_next() feature = { 'train/image': tf.FixedLenFeature([batch_size, 224, 224, 3], tf.float32), 'train/label': tf.FixedLenFeature([batch_size, 1], tf.int64) } sess.run(iterator.initializer) raw = sess.run(next_element) data = tf.parse_example(raw, feature) # Note: arg_scope is optional for inference. with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet(data['train/image']) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) checkpoint = 'd:/models/mobilenet_v2/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt' saver.restore(sess, checkpoint) #loss.. and optimizer loss = tf.losses.sparse_softmax_cross_entropy(data['train/label'], logits) op = tf.train.AdamOptimizer() op.minimize(loss) sess.run(op)
def build(self, features): n_heatmaps = 17 paf_nfields = 18 paf_nvectors = 2 paf_nscales = 0 if self.backbone == 'mobilenet_v1': logits, end_points = mobilenet_v1( features, num_classes=False, is_training=self.is_training, depth_multiplier=self.depth_multiplier) backbone_end = end_points['Conv2d_13_pointwise'] #1, 36, 46, 54 nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields, paf_nvectors, paf_nscales) end_points['PAF'] = nets end_points['outputs'] = [nets] elif self.backbone == 'mobilenet_v2': with tf.contrib.slim.arg_scope( training_scope(is_training=self.is_training)): logits, end_points = mobilenet( features, num_classes=False, depth_multiplier=self.depth_multiplier) backbone_end = end_points['layer_19'] nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields, paf_nvectors, paf_nscales) end_points['PAF'] = nets end_points['outputs'] = [nets] elif self.backbone == 'shufflenet_v2': basenet = ShuffleNetV2(depth_multiplier=self.depth_multiplier, is_training=self.is_training) end_points = basenet.build(features) backbone_end = end_points['base_net/out'] nets = self.headnet('paf', backbone_end, n_heatmaps, paf_nfields, paf_nvectors, paf_nscales) end_points['PAF'] = nets end_points['outputs'] = [nets] elif self.backbone == 'mobilenet_thin': out = MobilenetNetworkThin({'image': features}, conv_width=0.75, conv_width2=0.50, trainable=self.is_training) end_points = out.get_layer() thin_hm = end_points['MConv_Stage6_L2_5'] hm_ch1 = tf.layers.conv2d(thin_hm, 128, kernel_size=[1, 1], name='hm_channel1') ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1') hm_out = tf.layers.conv2d(ps1, 17, kernel_size=[1, 1], name='hm_channel2') hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out') thin_paf = end_points['MConv_Stage6_L1_5'] paf_ch1 = tf.layers.conv2d(thin_paf, 256, kernel_size=[1, 1], name='paf_channel1') ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2') paf_out = tf.layers.conv2d(ps2, 36, kernel_size=[1, 1], name='paf_channel2') paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm end_points['PAF'] = paf elif self.backbone == 'mobilenet_thin_s2d1': out = MobilenetNetworkThin({'image': features}, conv_width=0.75, conv_width2=0.50, trainable=self.is_training) end_points = out.get_layer() ###HEATMAP thin_hm = end_points['MConv_Stage6_L2_5'] s2d_hm = tf.space_to_depth(thin_hm, block_size=int(2), data_format='NHWC', name='space_to_depth_hm') hm_duc = self.DUC(s2d_hm, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC_hm') hm_ch1 = tf.layers.conv2d(hm_duc, 128, kernel_size=[1, 1], name='hm_channel1') ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1') hm_out = tf.layers.conv2d(ps1, 17, kernel_size=[1, 1], name='hm_conv') hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out') ###PAF thin_paf = end_points['MConv_Stage6_L1_5'] s2d_paf = tf.space_to_depth(thin_paf, block_size=int(2), data_format='NHWC', name='space_to_depth_paf') paf_duc = self.DUC(s2d_paf, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC_paf') paf_ch1 = tf.layers.conv2d(paf_duc, 256, kernel_size=[1, 1], name='paf_channel1') ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2') paf_out = tf.layers.conv2d(ps2, 36, kernel_size=[1, 1], name='paf_conv') paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm end_points['PAF'] = paf elif self.backbone == 'mobilenet_thin_FPN': out = MobilenetNetworkThin({'image': features}, conv_width=0.75, conv_width2=0.50, trainable=self.is_training) end_points = out.get_layer() ###HEATMAP thin_hm = end_points['MConv_Stage6_L2_5'] classes_hm1 = tf.layers.conv2d(thin_hm, 128, 3, strides=2, name='cls1') classes_hm2 = tf.layers.conv2d(classes_hm1, 256, 3, strides=2, name='cls2') con1_hm2 = tf.layers.conv2d(classes_hm2, 256, 1, name='1con2') duc_hm2 = self.DUC(con1_hm2, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC_hm2') pad_hm2 = tf.pad(duc_hm2, [[0, 0], [1, 1], [1, 1], [0, 0]], name='pad_hm2') con1_hm1 = tf.layers.conv2d(classes_hm1, 512, 1, name='1con1') concat_feat = tf.concat(values=[con1_hm1, pad_hm2], axis=3, name='concat_feat_p1') duc_hm1 = self.DUC(concat_feat, filters=256, upscale_factor=2, is_training=self.is_training, scope='DUC_hm1') pad_hm1 = tf.pad(duc_hm1, [[0, 0], [1, 1], [1, 1], [0, 0]], name='pad_hm1') hm_duc = tf.concat(values=[pad_hm1, thin_hm], axis=3, name='concat_feat_p2') hm_ch1 = tf.layers.conv2d(hm_duc, 128, kernel_size=[1, 1], name='hm_channel1') ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1') hm_out = tf.layers.conv2d(ps1, 17, kernel_size=[1, 1], name='hm_conv') hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out') ###PAF thin_paf = end_points['MConv_Stage6_L1_5'] classes_paf1 = tf.layers.conv2d(thin_paf, 128, 3, strides=2, name='cls1_paf') classes_paf2 = tf.layers.conv2d(classes_paf1, 256, 3, strides=2, name='cls2_paf') con1_paf2 = tf.layers.conv2d(classes_paf2, 256, 1, name='1con2_paf') duc_paf2 = self.DUC(con1_paf2, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC_paf2') pad_paf2 = tf.pad(duc_paf2, [[0, 0], [1, 1], [1, 1], [0, 0]], name='pad_paf2') con1_paf1 = tf.layers.conv2d(classes_paf1, 512, 1, name='1con1_paf') concat_feat_paf = tf.concat(values=[con1_paf1, pad_paf2], axis=3, name='concat_feat_p1_paf') duc_paf1 = self.DUC(concat_feat_paf, filters=256, upscale_factor=2, is_training=self.is_training, scope='DUC_paf1') pad_paf1 = tf.pad(duc_paf1, [[0, 0], [1, 1], [1, 1], [0, 0]], name='pad_paf1') paf_duc = tf.concat(values=[pad_paf1, thin_paf], axis=3, name='concat_feat_p2_paf') paf_ch1 = tf.layers.conv2d(paf_duc, 256, kernel_size=[1, 1], name='paf_channel1') ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2') paf_out = tf.layers.conv2d(ps2, 36, kernel_size=[1, 1], name='paf_conv') paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm end_points['PAF'] = paf elif self.backbone == 'hrnet': end_points = dict() out = HRNet(features) backbone_end = out s2d_1 = tf.space_to_depth(backbone_end, block_size=int(4), data_format='NHWC', name='space_to_depth_1') paf_cov1 = tf.layers.conv2d( s2d_1, 64, #38 kernel_size=[1, 1], name='paf_cov1') s2d_2 = tf.space_to_depth(paf_cov1, block_size=int(2), data_format='NHWC', name='space_to_depth_2') paf = tf.layers.conv2d( s2d_2, 36, #38 kernel_size=[1, 1], name='paf_conv') concat_feat = tf.concat(values=[s2d_1, paf_cov1], axis=3, name='concat_feat') ps1 = self.PixelShuffle(concat_feat, 2, scope='PixelShuffle1') hm_duc1 = self.DUC(ps1, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC1') hm_duc2 = self.DUC(hm_duc1, filters=256, upscale_factor=2, is_training=self.is_training, scope='DUC2') s2d_3 = tf.space_to_depth(paf_cov1, block_size=int(2), data_format='NHWC', name='space_to_depth_3') hm = tf.layers.conv2d( s2d_2, 17, #38 kernel_size=[1, 1], name='hm_conv') hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out') paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm_out end_points['PAF'] = paf_out elif self.backbone == 'hrnet_tiny': end_points = dict() out = HRNet(features) backbone_end = out conv_paf1 = tf.layers.conv2d(backbone_end, 128, 3, strides=2, name='paf_conv1') conv_paf2 = tf.layers.conv2d(conv_paf1, 128, 3, strides=2, name='paf_conv2') conv_paf3 = tf.layers.conv2d(conv_paf2, 128, 3, strides=1, name='paf_conv3') conv_paf4 = tf.layers.conv2d(conv_paf3, 128, 3, strides=2, name='paf_conv4') pad_paf = tf.pad(conv_paf4, [[0, 0], [1, 1], [1, 1], [0, 0]], name='paf_pad') paf_ch1 = tf.layers.conv2d(pad_paf, 256, kernel_size=[1, 1], name='paf_channel1') ps2 = self.PixelShuffle(paf_ch1, 2, scope='PixelShuffle2') paf = tf.layers.conv2d(ps2, 36, kernel_size=[1, 1], name='paf_conv') conv_hm1 = tf.layers.conv2d(backbone_end, 128, 3, strides=2, name='hm_conv1') conv_hm2 = tf.layers.conv2d(conv_hm1, 128, 3, strides=2, name='hm_conv2') conv_hm3 = tf.layers.conv2d(conv_hm2, 128, 3, strides=1, name='hm_conv3') conv_hm4 = tf.layers.conv2d(conv_hm3, 128, 3, strides=2, name='hm_conv4') pad_hm = tf.pad(conv_hm4, [[0, 0], [1, 1], [1, 1], [0, 0]], name='hm_pad') hm_ch1 = tf.layers.conv2d(pad_hm, 128, kernel_size=[1, 1], name='hm_channel1') ps1 = self.PixelShuffle(hm_ch1, 2, scope='PixelShuffle1') hm = tf.layers.conv2d(ps1, 17, kernel_size=[1, 1], name='hm_conv') hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out') paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm_out end_points['PAF'] = paf_out elif self.backbone == 'higher_hrnet': is_training = True end_points = dict() backbone_end = HRNet(features) #Downsampling downsample1 = tf.layers.conv2d(backbone_end, 64, 1, strides=2, name='downsample_1') bn_downsample1 = tf.layers.batch_normalization( downsample1, name='downsample_1_bn', training=is_training) downsample1 = tf.nn.relu(bn_downsample1) downsample2 = tf.layers.conv2d(downsample1, 64, 1, strides=2, name='downsample_2') bn_downsample2 = tf.layers.batch_normalization( downsample2, name='downsample2_bn', training=is_training) downsample2 = tf.keras.activations.relu( bn_downsample2) #1/4 input size (1, 92, 108, 128) conv_paf3 = tf.layers.conv2d(downsample2, 64, 1, strides=2, name='paf_conv3') bn_downsample3 = tf.layers.batch_normalization( conv_paf3, name='downsample3_bn', training=is_training) downsample3 = tf.keras.activations.relu( bn_downsample3) #(1, 46, 54, 128) #paf layer paf_final_conv1 = tf.layers.conv2d(downsample3, 192, 1, strides=1, name='final_conv1_paf') paf_final_conv2 = tf.layers.conv2d(paf_final_conv1, 192, 1, strides=1, name='final_conv2_paf') paf_output = tf.concat(values=[paf_final_conv2, downsample3], axis=3, name='ouput_paf') paf_adjust = tf.layers.conv2d(paf_output, 36, 1, strides=1, name='adjust_paf') #FinalLayer final_conv1 = tf.layers.conv2d(downsample2, 192, 1, strides=1, name='final_conv1') final_conv2 = tf.layers.conv2d(final_conv1, 192, 1, strides=1, name='final_conv2') conc_final_conv2 = tf.concat(values=[final_conv2, downsample2], axis=3, name='concat_finalconv2_downsam2') #Deconv block ps1 = self.DUC(conc_final_conv2, filters=32, upscale_factor=2, is_training=self.is_training, scope='DUC1') ps2 = self.DUC(ps1, filters=32, upscale_factor=2, is_training=self.is_training, scope='DUC2') s2d_1 = tf.space_to_depth(ps2, block_size=int(4), data_format='NHWC', name='space_to_depth_1') s2d_2 = tf.space_to_depth(s2d_1, block_size=int(2), data_format='NHWC', name='space_to_depth_2') #BasicLayer basic1 = self.HR_BasicBlock(s2d_2, filters=32, is_training=self.is_training, scope='basic_block1') basic2 = self.HR_BasicBlock(basic1, filters=32, is_training=self.is_training, scope='basic_block2') basic3 = self.HR_BasicBlock(basic2, filters=32, is_training=self.is_training, scope='basic_block3') basic4 = self.HR_BasicBlock(basic3, filters=32, is_training=self.is_training, scope='basic_block4') basic4 = tf.nn.relu(basic4) pad_basic4 = tf.pad(basic4, [[0, 0], [1, 1], [1, 1], [0, 0]], name='basic4_padding') adjust = tf.layers.conv2d(pad_basic4, 17, 3, strides=1, name='adjust') hm_out = tf.transpose(adjust, [0, 3, 1, 2], name='hm_out') paf_out = tf.transpose(paf_adjust, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm_out end_points['PAF'] = paf_out elif self.backbone == 'pre_hrnet': ###### end_points = dict() hrnet = preHRnet(cfgfile='/cfgs/w30_s4.cfg') backbone_end = hrnet.forward_train(features) print(backbone_end) return end_points
def build(self, features): n_heatmaps = 17 paf_nfields = 18 paf_nvectors = 2 paf_nscales = 0 if self.backbone == 'mobilenet_v1': logits, end_points = mobilenet_v1( features, num_classes=False, is_training=self.is_training, depth_multiplier=self.depth_multiplier) backbone_end = end_points['Conv2d_13_pointwise'] #1, 36, 46, 54 print(backbone_end) elif self.backbone == 'mobilenet_v2': with tf.contrib.slim.arg_scope( training_scope(is_training=self.is_training)): logits, end_points = mobilenet( features, num_classes=False, depth_multiplier=self.depth_multiplier) backbone_end = end_points['layer_19'] elif self.backbone == 'shufflenet_v2': basenet = ShuffleNetV2(depth_multiplier=self.depth_multiplier, is_training=self.is_training) end_points = basenet.build(features) backbone_end = end_points['base_net/out'] elif self.backbone == 'mobilenet_thin': out = MobilenetNetworkThin({'image': features}, conv_width=0.75, conv_width2=0.50, trainable=self.is_training) end_points = out.get_layer() thin_hm = end_points['MConv_Stage6_L2_5'] hm_out = tf.layers.conv2d(thin_hm, 17, kernel_size=[1, 1], name='hm_conv') hm = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out') thin_paf = end_points['MConv_Stage6_L1_5'] paf_out = tf.layers.conv2d(thin_paf, 36, kernel_size=[1, 1], name='paf_conv') paf = tf.transpose(paf_out, [0, 3, 1, 2], name='paf_out') elif self.backbone == 'hrnet': end_points = dict() out = HRNet(features) backbone_end = out s2d_1 = tf.space_to_depth(backbone_end, block_size=int(4), data_format='NHWC', name='space_to_depth_1') paf_cov1 = tf.layers.conv2d( s2d_1, 64, #38 kernel_size=[1, 1], name='paf_cov1') s2d_2 = tf.space_to_depth(paf_cov1, block_size=int(2), data_format='NHWC', name='space_to_depth_2') paf = tf.layers.conv2d( s2d_2, 36, #38 kernel_size=[1, 1], name='paf_conv') concat_feat = tf.concat(values=[s2d_1, paf_cov1], axis=3, name='concat_feat') ps1 = self.PixelShuffle(concat_feat, 2, scope='PixelShuffle1') hm_duc1 = self.DUC(ps1, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC1') hm_duc2 = self.DUC(hm_duc1, filters=256, upscale_factor=2, is_training=self.is_training, scope='DUC2') s2d_3 = tf.space_to_depth(paf_cov1, block_size=int(2), data_format='NHWC', name='space_to_depth_3') hm = tf.layers.conv2d( s2d_2, 17, #38 kernel_size=[1, 1], name='hm_conv') hm_out = tf.transpose(hm, [0, 3, 1, 2], name='hm_out') paf_out = tf.transpose(paf, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = hm_out end_points['PAF'] = paf_out if self.backbone == 'mobilenet_thin': end_points['heat_map'] = hm end_points['PAF'] = paf if self.backbone == 'pafmodel': with tf.contrib.slim.arg_scope( training_scope(is_training=self.is_training)): logits, end_points = mobilenet( features, num_classes=False, depth_multiplier=self.depth_multiplier) backbone_end = end_points['layer_19'] ps1 = self.PixelShuffle(backbone_end, 2, scope='PixelShuffle1') paf_duc1 = self.DUC(ps1, filters=512, upscale_factor=2, is_training=self.is_training, scope='PAF_DUC1') paf_duc2 = self.DUC(paf_duc1, filters=256, upscale_factor=2, is_training=self.is_training, scope='PAF_DUC2') paf_conv_feature1 = tf.space_to_depth(paf_duc2, block_size=int(2), data_format='NHWC', name='space_to_depth_1') paf_conv_out1 = tf.layers.conv2d( paf_conv_feature1, 20, #38 kernel_size=[3, 3], name='PAF_output') paf_duc2_pad = tf.pad(paf_duc2, [[0, 0], [1, 1], [1, 1], [0, 0]], name='duc2_padding') paf_conv_out = tf.layers.conv2d( paf_duc2_pad, 20, #38 kernel_size=[3, 3], name='PAF_conv') paf_conv_feature = tf.space_to_depth(paf_conv_out, block_size=int(4), data_format='NHWC', name='space_to_depth_2') concat_feat = tf.concat(values=[ps1, paf_conv_feature], axis=3, name='concat_feat') duc1 = self.DUC(concat_feat, filters=512, upscale_factor=2, is_training=self.is_training, scope='DUC1') duc2 = self.DUC(duc1, filters=256, upscale_factor=2, is_training=self.is_training, scope='DUC2') hm_feature = tf.space_to_depth(duc2, block_size=int(2), data_format='NHWC', name='space_to_depth_3') hm_out = tf.layers.conv2d( hm_feature, self.number_keypoints, #38 kernel_size=[3, 3], name='output') conv_out = tf.transpose(hm_out, [0, 3, 1, 2], name='hm_out') paf_conv_out = tf.transpose(paf_conv_out1, [0, 3, 1, 2], name='paf_out') end_points['heat_map'] = conv_out end_points['PAF'] = paf_conv_out return end_points
def model_fn(self, features, labels, mode): """Auto-Scaling 3D CNN model. For more information on how to write a model function, see: https://www.tensorflow.org/guide/custom_estimators#write_a_model_function """ input_layer = features # Replace missing values by 0 hidden_layer = tf.where(tf.is_nan(input_layer), tf.zeros_like(input_layer), input_layer) if self.input_size > 28: hidden_layer = tf.squeeze(hidden_layer, axis=[1]) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet( hidden_layer, self.input_size) hidden_layer = tf.contrib.layers.conv2d( inputs=endpoints['feature_maps'], num_outputs=1280, kernel_size=1, stride=1, activation_fn=None) hidden_layer = tf.reduce_mean(input_tensor=hidden_layer, axis=[1, 2]) else: REASONABLE_NUM_ENTRIES = 1000 num_filters = 32 # The number of filters is fixed while True: shape = hidden_layer.shape kernel_size = [ min(3, shape[1]), min(3, shape[2]), min(3, shape[3]) ] hidden_layer = tf.layers.conv3d(inputs=hidden_layer, filters=num_filters, kernel_size=kernel_size) kernel_size = [ min(1, shape[1]), min(1, shape[2]), min(1, shape[3]) ] hidden_layer = tf.layers.conv3d(inputs=hidden_layer, filters=num_filters, kernel_size=kernel_size) pool_size = [ min(2, shape[1]), min(2, shape[2]), min(2, shape[3]) ] hidden_layer = tf.layers.max_pooling3d( inputs=hidden_layer, pool_size=pool_size, strides=pool_size, padding='valid', data_format='channels_last') if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES: break hidden_layer = tf.layers.flatten(hidden_layer) hidden_layer = tf.layers.dense(inputs=hidden_layer, units=256, activation=tf.nn.relu) hidden_layer = tf.layers.dropout( inputs=hidden_layer, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim) sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor") predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # "classes": binary_predictions, # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": sigmoid_tensor } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) # For multi-label classification, a correct loss is sigmoid cross entropy loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) #loss = focal_loss(prediction_tensor=logits, target_tensor=labels) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) assert mode == tf.estimator.ModeKeys.EVAL eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
args = parser.parse_args() checkpoint_path = args.checkpoint_path logger.info('checkpoint_path: ' + checkpoint_path) with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[None, 619, 654, 3]) img_size = tf.placeholder(dtype=tf.int32, shape=(2,), name='original_image_size') img_normalized = raw_img / 255 - 0.5 layers = {} name = "" with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): logits, endpoints = mobilenet_v2.mobilenet(img_normalized) for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]): layers['%s%s' % (name, k)] = tensor print(k, tensor.shape) def upsample(input, target): return tf.image.resize_bilinear(input, tf.constant([target.shape[1].value, target.shape[2].value]), align_corners=False) mobilenet_feature = tf.concat([layers['layer_7/output'], upsample(layers['layer_14/output'], layers['layer_7/output'])], 3) # get net graph logger.info('initializing model...') # net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn) # hm_pre, cpm_pre, added_layers_out = net.gen_net() net = PafNet(inputs_x=mobilenet_feature, stage_num=6, hm_channel_num=19, use_bn=args.use_bn) hm_pre, paf_pre, added_layers_out = net.gen_net()
import tensorflow.contrib.slim as slim import time import os os.environ['CUDA_VISIBLE_DEVICES'] = '' tf.reset_default_graph() sess = tf.InteractiveSession() X = tf.placeholder(tf.float32, [None, None, None]) images = tf.expand_dims(X, axis=0) images = tf.image.resize_images(images, [224, 224]) images = tf.image.grayscale_to_rgb(images) images = images / 128. - 1 with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet(images) logits = tf.nn.relu6(logits) emotion_logits = slim.fully_connected( logits, 7, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(1e-5), scope='emo/emotion_1', reuse=False) with tf.variable_scope("age"): age1 = op.expanded_conv(endpoints['layer_16'], 160, stride=1) age2 = op.expanded_conv(age1, 320, stride=1) age3 = mobilenet.global_pool( op.expanded_conv(age2, 1280, stride=1, kernel_size=[1, 1]))
def main(): tf.reset_default_graph() global_step = tf.Variable(0, name='global_step', trainable=False) # label without one-hot batch_train, batch_labels = get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # network, set is_training=False when predict img # with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): # # logits, _ = inception_v3.inception_v3(batch_train, num_classes=N_CLASSES, is_training=True) # logits, _ = resnet_v2.resnet_v2_152(batch_train, num_classes=N_CLASSES, is_training=True) # logits = tf.reshape(logits, [-1, N_CLASSES]) with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): # with slim.arg_scope(mobilenet_v2.training_scope()): logits, _ = mobilenet_v2.mobilenet(batch_train, num_classes=N_CLASSES, is_training=True) print(logits.get_shape()) # loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name='loss') regularization_losses_n = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n([loss] + regularization_losses_n, name='total_loss') tf.summary.scalar('train_loss', loss) # optimizer lr = tf.train.exponential_decay(learning_rate=init_lr, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1) tf.summary.scalar('learning_rate', lr) # set optimizer, trainable variable update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if freeze_basemodel: trainable_variable = get_trainable_variables() for var in trainable_variable: print("only train variable:", var) optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( loss, global_step=global_step, var_list=trainable_variable) else: print("train all variable") optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( loss, global_step=global_step) #train all var # accuracy correct = tf.nn.in_top_k(logits, batch_labels, 1) correct = tf.cast(correct, tf.float16) accuracy = tf.reduce_mean(correct) tf.summary.scalar('train_acc', accuracy) summary_op = tf.summary.merge_all() sess = tf.Session(config=config) train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) # load model load_finetune_model = slim.assign_from_checkpoint_fn( finetune_model, get_finetuned_variables(), ignore_missing_vars=True) saver = tf.train.Saver(max_to_keep=100) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #saver.restore(sess, logs_train_dir+'/model.ckpt-174000') print('Loading finetune model from %s' % finetune_model) load_finetune_model(sess) try: for step in range(MAX_STEP): if coord.should_stop(): break _, learning_rate, tra_loss, tra_acc = sess.run( [optimizer, lr, loss, accuracy]) if step % display_step == 0: print( 'Epoch:%3d/%d, Step:%6d/%d, lr:%f, train loss:%.4f, train acc:%.2f%%' % (step / one_epoch_step + 1, MAX_STEP / one_epoch_step, step + display_step, MAX_STEP, learning_rate, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 500 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print("save model", checkpoint_path) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def train(): parser = argparse.ArgumentParser( description='Training codes for Openpose using Tensorflow') parser.add_argument('--batch_size', type=str, default=10) parser.add_argument('--continue_training', type=bool, default=False) parser.add_argument('--checkpoint_path', type=str, default='checkpoints/train/mn_sepconv_33') # parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt') parser.add_argument( '--backbone_net_ckpt_path', type=str, default='checkpoints/mobilenet/mobilenet_v2_1.0_96.ckpt') parser.add_argument('--train_vgg', type=bool, default=True) parser.add_argument('--annot_path', type=str, default='./COCO/annotations/') parser.add_argument('--img_path', type=str, default='./COCO/images/') # parser.add_argument('--annot_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/' # 'person_keypoints_val2017.json') # parser.add_argument('--img_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/') parser.add_argument('--save_checkpoint_frequency', type=str, default=1000) parser.add_argument('--save_summary_frequency', type=str, default=100) parser.add_argument('--stage_num', type=str, default=6) parser.add_argument('--hm_channels', type=str, default=19) parser.add_argument('--paf_channels', type=str, default=38) parser.add_argument('--input-width', type=int, default=368) parser.add_argument('--input-height', type=int, default=368) parser.add_argument('--max_echos', type=str, default=5) parser.add_argument('--use_bn', type=bool, default=False) parser.add_argument('--loss_func', type=str, default='l2') args = parser.parse_args() if not args.continue_training: start_time = time.localtime(time.time()) checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' % start_time[0:6]) os.mkdir(checkpoint_path) else: checkpoint_path = args.checkpoint_path logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) fh = logging.FileHandler(checkpoint_path + '/train_log.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) logger.info(args) logger.info('checkpoint_path: ' + checkpoint_path) # define input placeholder with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[args.batch_size, 368, 368, 3]) # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) paf = tf.placeholder( dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) # defien data loader logger.info('initializing data loader...') set_network_input_wh(args.input_width, args.input_height) scale = 8 set_network_scale(scale) df = get_dataflow_batch(args.annot_path, True, args.batch_size, img_path=args.img_path) steps_per_echo = df.size() enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100) q_inp, q_heat, q_vect = enqueuer.dequeue() q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split( q_heat, 1), tf.split(q_vect, 1) img_normalized = q_inp_split[0] / 255 - 0.5 # [-0.5, 0.5] df_valid = get_dataflow_batch(args.annot_path, False, args.batch_size, img_path=args.img_path) df_valid.reset_state() validation_cache = [] logger.info('initializing model...') # define vgg19 # with slim.arg_scope(vgg.vgg_arg_scope()): # vgg_outputs, end_points = vgg.vgg_19(img_normalized) # with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): # logits, endpoints = mobilenet_v2.mobilenet(img_normalized) layers = {} name = "" with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): logits, endpoints = mobilenet_v2.mobilenet(img_normalized) for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]): layers['%s%s' % (name, k)] = tensor # print(k, tensor.shape) def upsample(input, target): return tf.image.resize_bilinear( input, tf.constant([target.shape[1].value, target.shape[2].value]), align_corners=False) mobilenet_feature = tf.concat([ layers['layer_7/output'], upsample(layers['layer_14/output'], layers['layer_7/output']) ], 3) # pdb.set_trace() # get net graph net = PafNet(inputs_x=mobilenet_feature, stage_num=args.stage_num, hm_channel_num=args.hm_channels, use_bn=args.use_bn) hm_pre, paf_pre, added_layers_out = net.gen_net() # two kinds of loss losses = [] with tf.name_scope('loss'): for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)): if args.loss_func == 'square': hm_loss = tf.reduce_sum( tf.square(tf.concat(l1, axis=0) - q_heat_split[0])) paf_loss = tf.reduce_sum( tf.square(tf.concat(l2, axis=0) - q_vect_split[0])) losses.append(tf.reduce_sum([hm_loss, paf_loss])) logger.info('use square loss') else: hm_loss = tf.nn.l2_loss( tf.concat(l1, axis=0) - q_heat_split[0]) paf_loss = tf.nn.l2_loss( tf.concat(l2, axis=0) - q_vect_split[0]) losses.append(tf.reduce_mean([hm_loss, paf_loss])) logger.info('use l2 loss') loss = tf.reduce_sum(losses) / args.batch_size global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(1e-4, global_step, steps_per_echo, 0.5, staircase=True) trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if args.train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2') with tf.name_scope('train'): train = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-8).minimize( loss=loss, global_step=global_step, var_list=trainable_var_list) logger.info('initialize saver...') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2'), name='mobilenet_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize tensorboard') tf.summary.scalar("lr", learning_rate) tf.summary.scalar("loss2", loss) tf.summary.histogram('img_normalized', img_normalized) tf.summary.histogram('mobilenet_outputs', logits) tf.summary.histogram('added_layers_out', added_layers_out) tf.summary.image('mobilenet_out', tf.transpose(logits[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=512) tf.summary.image('added_layers_out', tf.transpose(added_layers_out[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=128) tf.summary.image('paf_gt', tf.transpose(q_vect_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('hm_gt', tf.transpose(q_heat_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) for i in range(args.stage_num): tf.summary.image('hm_pre_stage_%d' % i, tf.transpose(hm_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) tf.summary.image('paf_pre_stage_%d' % i, tf.transpose(paf_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('input', img_normalized, max_outputs=4) logger.info('initialize session...') merged = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(checkpoint_path, sess.graph) sess.run(tf.group(tf.global_variables_initializer())) if args.backbone_net_ckpt_path is not None: logger.info('restoring mobilenet weights from %s' % args.backbone_net_ckpt_path) restorer.restore(sess, args.backbone_net_ckpt_path) if args.continue_training: saver.restore( sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) logger.info('restoring from checkpoint...') logger.info('start training...') coord = tf.train.Coordinator() enqueuer.set_coordinator(coord) enqueuer.start() while True: best_checkpoint = float('inf') for _ in tqdm(range(steps_per_echo), ): total_loss, _, gs_num = sess.run([loss, train, global_step]) echo = gs_num / steps_per_echo if gs_num % args.save_summary_frequency == 0: total_loss, gs_num, summary, lr = sess.run( [loss, global_step, merged, learning_rate]) writer.add_summary(summary, gs_num) logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' % (echo, gs_num, total_loss, lr)) if gs_num % args.save_checkpoint_frequency == 0: valid_loss = 0 if len(validation_cache) == 0: for images_test, heatmaps, vectmaps in tqdm( df_valid.get_data()): validation_cache.append( (images_test, heatmaps, vectmaps)) df_valid.reset_state() del df_valid df_valid = None for images_test, heatmaps, vectmaps in validation_cache: valid_loss += sess.run(loss, feed_dict={ q_inp: images_test, q_vect: vectmaps, q_heat: heatmaps }) if valid_loss / len(validation_cache) <= best_checkpoint: best_checkpoint = valid_loss / len(validation_cache) saver.save(sess, save_path=checkpoint_path + '/' + 'model', global_step=gs_num) logger.info( 'best_checkpoint = %f, saving checkpoint to ' % best_checkpoint + checkpoint_path + '/' + 'model-%d' % gs_num) else: logger.info('loss = %f drop' % valid_loss / len(validation_cache)) if echo >= args.max_echos: sess.close() return 0
# For simplicity we just decode jpeg inside tensorflow. # But one can provide any input obviously. file_input = tf.placeholder(tf.string, ()) image = tf.image.decode_jpeg(tf.read_file(file_input)) images = tf.expand_dims(image, 0) images = tf.cast(images, tf.float32) / 128. - 1 images.set_shape((None, None, None, 3)) images = tf.image.resize_images(images, (224, 224)) # Note: arg_scope is optional for inference. with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): #change1 logits, endpoints = mobilenet_v2.mobilenet(images, num_classes=3, is_training=False) #logits,end_points = mobilenet_v2.mobilenet(images, num_classes=5, is_training=False) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) from IPython import display import pylab #from datasets import imagenet import PIL display.display(display.Image('./test_images/laugh1.jpg'))