def net_fatory(net_name, inputs, train_model, FC=False): if net_name == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_16(inputs, num_classes=None, is_training=train_model, fc_flage=FC) elif net_name == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_19(inputs, num_classes=None, is_training=train_model, fc_flage=FC) elif net_name == 'resnet_v2_50': with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(inputs=inputs, num_classes=None, is_training=train_model, global_pool=False) elif net_name == 'resnet_v2_152': with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_152(inputs=inputs, num_classes=None, is_training=train_model, global_pool=False) return net, end_points
def perceptual_loss(real, fake, network="vgg_16"): if params.loss.vgg_w <= 0.0: return 0.0 real = real * params.learning.image_std + params.learning.image_mean fake = fake * params.learning.image_std + params.learning.image_mean real = utils.perceptual_loss_image_preprocess(real) fake = utils.perceptual_loss_image_preprocess(fake) image = tf.concat([real, fake], axis=0) with tf.variable_scope("perceptual_loss"): if network == "vgg_16": with slim.arg_scope(vgg.vgg_arg_scope()): conv1, conv2, conv3 = vgg.vgg_16(image) elif network == "vgg_19": with slim.arg_scope(vgg.vgg_arg_scope()): conv1, conv2, conv3 = vgg.vgg_19(image) else: raise NotImplementedError("") losses = [] for i, features in enumerate([conv1, conv2, conv3]): real, fake = tf.split(features, 2, 0) losses.append(params.loss.perceptual_loss.weights[i] * tf.reduce_mean(tf.square(real - fake))) return losses[0] + losses[1] + losses[2]
def arch_vgg16(self, X, num_classes, dropout_keep_prob=0.8, is_train=False, embedding_size=128): arg_scope = vgg_arg_scope() with slim.arg_scope(arg_scope): net_vis, end_points, _ = vgg_16_conv(X, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net_vis = slim.avg_pool2d(net_vis, net_vis.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_out') # 1 x 1 x 512 net_vis = slim.dropout(net_vis, dropout_keep_prob, scope='Dropout_1b_out') net_vis = slim.flatten(net_vis, scope='PreLogitsFlatten_out') net_vis = slim.fully_connected(net_vis, embedding_size, activation_fn=tf.nn.relu, scope='Logits_out0') net = slim.fully_connected(net_vis, num_classes, activation_fn=None, scope='Logits_out1') return net, net_vis
def Eval(x_img_224, x_img_299, y): input_image = x_img_224 - tf.reshape(tf.constant([123.68, 116.78, 103.94]), [1, 1, 1, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( input_image, num_classes=110, is_training=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) res_label = tf.argmax(end_points_res_v1_50['probs'][0], -1) y_r = end_points_res_v1_50['probs'][0][y[0]] with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(input_image, num_classes=110, is_training=False, scope='vgg_16', reuse=tf.AUTO_REUSE) end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) vgg_label = tf.argmax(end_points_vgg_16['probs'][0], -1) y_v = end_points_vgg_16['probs'][0][y[0]] return res_label, vgg_label, y_r, y_v
def getCNNFeatures(self, input_tensor, fc_dim, out_dim, fc_initializer, use_full=False): graph = tf.Graph() with graph.as_default(): with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(input_tensor, is_training=False) model_path = os.path.join(self.checkpoints_dir, self.ckpt_name) variables_to_restore = tf.contrib.framework.get_variables_to_restore() variables_to_restore = [ var for var in variables_to_restore if 'vgg_16' in var.name ] # only use vgg things! init_fn = tf.contrib.framework.assign_from_checkpoint_fn( model_path, variables_to_restore) pool_result = end_points['vgg_16/pool5'] flattened = tf.reshape(pool_result, [-1, fc_dim]) with vs.variable_scope('fc_vgg'): W = vs.get_variable("W", [fc_dim, out_dim], initializer=fc_initializer) b = vs.get_variable("b", [out_dim], initializer=fc_initializer) output = tf.nn.relu(tf.matmul(flattened, W) + b) return init_fn, output
def style_loss(self, styled_vgg, style_image, layer_names, style_weight, sess): style_image_placeholder = tf.placeholder('float', shape=style_image.shape) with slim.arg_scope(vgg.vgg_arg_scope(reuse=True)): _, style_image_vgg = vgg.vgg_19(style_image_placeholder, num_classes=0, is_training=False) style_loss = 0 preprocessed_style_image = style_image - np.array([ ctx.params.R_MEAN, ctx.params.G_MEAN, ctx.params.B_MEAN ]).reshape([1, 1, 1, 3]) for layer_name in layer_names: style_image_gram = self.gram_matrix_for_style_image( style_image_vgg[layer_name], style_image_placeholder, preprocessed_style_image, sess) input_image_gram = self.gram_matrix_for_input_image( styled_vgg[layer_name]) style_loss += (2 * tf.nn.l2_loss(input_image_gram - np.expand_dims(style_image_gram, 0)) / style_image_gram.size) return style_weight * style_loss
def extract_image_features(inputs, reuse=True): with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(inputs, spatial_squeeze=False, is_training=False, reuse=reuse) return end_points
def arch_multi_vgg16(self, X1, X2, X3, num_classes, dropout_keep_prob=0.8, is_train=False): arg_scope = vgg_arg_scope() with slim.arg_scope(arg_scope): with tf.variable_scope('arch_multi_vgg16_1'): net_vis1, end_points1 = vgg_16(X1, is_training=is_train) with tf.variable_scope('arch_multi_vgg16_2'): net_vis2, end_points2 = vgg_16(X2, is_training=is_train) with tf.variable_scope('arch_multi_vgg16_3'): net_vis3, end_points3 = vgg_16(X2, is_training=is_train) # net_vis3, end_points3 = alexnet_v2(X3, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net_vis = tf.concat([net_vis1, net_vis2, net_vis3], 3) net = slim.conv2d(net_vis, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') net = tf.squeeze(net, [1, 2], name='fc8/squeezed') return net, net_vis
def inference(X_tensor,number_of_classes,is_training_placeholder): with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(inputs = X_tensor, num_classes = number_of_classes, is_training = is_training_placeholder, fc_conv_padding = 'VALID') return logits, end_points
def build_graph(): image_placeholder = tf.placeholder(tf.float32, shape=[None, None, 3]) processed_image = vgg_preprocessing.preprocess_image(image_placeholder, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) return probabilities, image_placeholder, end_points
def arch_vgg16(self, X, num_classes, dropout_keep_prob=0.8, is_train=False): arg_scope = vgg_arg_scope() with slim.arg_scope(arg_scope): net_vis, end_points = vgg_16(X, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net = slim.conv2d(net_vis, num_classes, [1, 1],activation_fn=None,normalizer_fn=None,scope='fc8') net = tf.squeeze(net,[1,2], name='fc8/squeezed') return net, net_vis
def tower_loss(data_tensor, label_tensor, num_classes, train_mode): #vgg = tf.contrib.slim.nets.vgg with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)): logits, endpoints_dict = vgg.vgg_16(data_tensor, num_classes=num_classes, is_training=train_mode,dropout_keep_prob=args.dropout_keep_prob) loss=tf.losses.sparse_softmax_cross_entropy(labels=label_tensor, logits=logits) return loss, logits
def arch_vgg16_multi_conv(self, X, num_classes, dropout_keep_prob=0.8, is_train=False, embedding_size=64): arg_scope = vgg_arg_scope() with slim.arg_scope(arg_scope): _, end_points, net_c = vgg_16_conv(X, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): #net_1 = slim.max_pool2d(net_c[-5], [32,32], stride=32, padding='VALID', scope='net_c_1') #net_1 = slim.conv2d(net_1, net_1.get_shape()[3], [1, 1], scope='net_1') #net_2 = slim.max_pool2d(net_c[-4], [16,16], stride=16, padding='VALID', scope='net_c_1') #net_2 = slim.conv2d(net_2, net_2.get_shape()[3], [1, 1], scope='net_2') #net_3 = slim.max_pool2d(net_c[-3], [8,8], stride=8, padding='VALID', scope='net_c_1') #net_3 = slim.conv2d(net_3, net_3.get_shape()[3], [1, 1], scope='net_3') net_4 = slim.max_pool2d(net_c[-2], [4, 4], stride=4, padding='VALID', scope='net_c_1') net_4 = slim.conv2d(net_4, net_4.get_shape()[3], [1, 1], scope='net_4') net_5 = slim.max_pool2d(net_c[-1], [2, 2], stride=2, padding='VALID', scope='net_c_1') net_5 = slim.conv2d(net_5, net_5.get_shape()[3], [1, 1], scope='net_5') # net_vis = tf.concat([net_1, net_2, net_3, net_4, net_5],3) net_vis = tf.concat([net_4, net_5], 3) net_vis = slim.avg_pool2d(net_vis, net_vis.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_out') # 1 x 1 x 512 net_vis = slim.dropout(net_vis, dropout_keep_prob, scope='Dropout_1b_out') net_vis = slim.flatten(net_vis, scope='PreLogitsFlatten_out') net_vis = slim.fully_connected(net_vis, embedding_size, activation_fn=tf.nn.relu, scope='Logits_out0') net = slim.fully_connected(net_vis, num_classes, activation_fn=None, scope='Logits_out1') return net, net_vis
def CRAFT_net(inputs, is_trianing=True, reuse=None, weight_decay=0.9): with slim.arg_scope(vgg_arg_scope()): vgg_res, end_points = vgg_16(inputs) with tf.variable_scope('vgg_16', [end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_trianing } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['vgg_16/conv2/conv2_2'], end_points['vgg_16/conv3/conv3_3'], end_points['vgg_16/conv4/conv4_3'], end_points['vgg_16/conv5/conv5_3'] ] net = f[3] # VGG end net = slim.max_pool2d(net, [3, 3], stride=1, padding='SAME', scope='pool5') # w/16 512 net = arous_conv(net, 3, 3, 1024, 6, name='arous_conv') # w/16 1024 net = slim.conv2d(net, 1024, [1, 1], padding='SAME', scope='conv6') # w/16 1024 # U-net start net = tf.concat([net, f[3]], axis=3) # w/16 1024 + 512 net = upconvBlock(net, 512, 256) # w/16 256 net = upsample(net, (64, 64)) net = tf.concat([net, f[2]], axis=3) # w/8 256 + 512 net = upconvBlock(net, 256, 128) # w/8 128 net = upsample(net, (128, 128)) net = tf.concat([net, f[1]], axis=3) # w/4 128 + 256 net = upconvBlock(net, 128, 64) # w/4 64 net = upsample(net, (256, 256)) net = tf.concat([net, f[0]], axis=3) # w/2 64 + 128 net = upconvBlock(net, 64, 32) # w/2 32 # U-net end net = slim.repeat(net, 2, slim.conv2d, 32, [3, 3]) # w/2 32 net = slim.conv2d(net, 16, [3, 3], padding='SAME') # w/2 16 net = slim.conv2d(net, 16, [1, 1], padding='SAME') # w/2 16 net = slim.conv2d(net, 2, [1, 1], padding='SAME') # w/2 2 return net, end_points
def build_model(self): # get content_img, style_img and define gen_img if content_input is not None: self.content_path = content_input if style_input is not None: self.style_path = style_input content_img, content_shape = utils.load_content_img(self.content_path) style_img = utils.load_style_img(self.style_path, content_shape) content_img_shape = content_img.shape gen_img = tf.Variable(tf.random_normal(content_img_shape) * 0.256) with slim.arg_scope(vgg.vgg_arg_scope()): f1, f2, f3, f4, exclude = vgg.vgg_16( tf.concat([gen_img, content_img, style_img], axis=0)) # calculate content_loss and style_loss content_loss = utils.cal_content_loss(f3) style_loss = utils.cal_style_loss(f1, f2, f3, f4) # load vgg model vgg_model_path = VGG_MODEL_PATH vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude) init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars) init_fn(self.sess) print('vgg_16 weights load done') self.gen_img = gen_img self.global_step = tf.Variable(0, name='global_step', trainable=False) self.content_loss = content_loss self.style_loss = style_loss * W_STYLE # the total loss self.loss = self.content_loss + self.style_loss # starter_learning_rate = 1e1 # global_step = tf.train.get_global_step() # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps=500, # decay_rate=0.98, # staircase=True) self.opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize( self.loss, global_step=self.global_step, var_list=gen_img) all_var = tf.global_variables() init_var = [v for v in all_var if 'vgg_16' not in v.name] init = tf.variables_initializer(var_list=init_var) self.sess.run(init) self.save = tf.train.Saver()
def vgg_encoding(self, processed_images, is_training, reuse=False): with slim.arg_scope(vgg.vgg_arg_scope()): fc7 = vgg.vgg_16(processed_images, num_classes=self.no_classes, is_training=is_training, spatial_squeeze=False, fc_conv_padding='VALID', reuse=reuse, return_fc7=True, fc7_size=self.fc7_size) return fc7
def vgg_16(inputs, no_fc=False): with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_16(inputs, None, is_training=False, spatial_squeeze=False, fc_conv_padding='SAME', no_fc=no_fc) if no_fc: return end_points['vgg_16/pool5'], end_points[ 'vgg_16/pool4'], end_points['vgg_16/pool3'] else: return net, end_points['vgg_16/pool4'], end_points['vgg_16/pool3']
def build_model(self): # get content_img, style_img and define gen_img if content_input is not None: self.content_path = content_input if style_input is not None: self.style_path = style_input content_img, content_shape = utils.load_content_img(self.content_path) style_img = utils.load_style_img(self.style_path, content_shape) content_img_shape = content_img.shape gen_img = tf.Variable(tf.random_normal(content_img_shape) * 0.256) with slim.arg_scope(vgg.vgg_arg_scope()): f1, f2, f3, f4, exclude = vgg.vgg_16(tf.concat([gen_img, content_img, style_img], axis=0)) # calculate content_loss and style_loss content_loss = utils.cal_content_loss(f3) style_loss = utils.cal_style_loss(f1, f2, f3, f4) # load vgg model vgg_model_path = VGG_MODEL_PATH vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude) init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars) init_fn(self.sess) print('vgg_16 weights load done') self.gen_img = gen_img self.global_step = tf.Variable(0, name='global_step', trainable=False) self.content_loss = content_loss self.style_loss = style_loss * W_STYLE # the total loss self.loss = self.content_loss + self.style_loss # starter_learning_rate = 1e1 # global_step = tf.train.get_global_step() # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps=500, # decay_rate=0.98, # staircase=True) self.opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.loss, global_step=self.global_step, var_list=gen_img) all_var = tf.global_variables() init_var = [v for v in all_var if 'vgg_16' not in v.name] init = tf.variables_initializer(var_list=init_var) self.sess.run(init) self.save = tf.train.Saver()
def arch_multi_vgg16_conv(self, X1, X2, X3, num_classes, dropout_keep_prob=0.8, is_train=False): arg_scope = vgg_arg_scope() with slim.arg_scope(arg_scope): with tf.variable_scope('arch_multi_vgg16_conv_1'): net_vis1, end_points1 = vgg_16_conv(X1, is_training=is_train) with tf.variable_scope('arch_multi_vgg16_conv_2'): net_vis2, end_points2 = vgg_16_conv(X2, is_training=is_train) with tf.variable_scope('arch_multi_vgg16_conv_3'): net_vis3, end_points3 = vgg_16_conv(X3, is_training=is_train) # net_vis3, end_points3 = alexnet_v2(X3, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net_vis1 = slim.avg_pool2d(net_vis1, net_vis1.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_out') net_vis2 = slim.avg_pool2d(net_vis2, net_vis2.get_shape()[1:3], padding='VALID', scope='AvgPool_2a_out') net_vis3 = slim.avg_pool2d(net_vis3, net_vis3.get_shape()[1:3], padding='VALID', scope='AvgPool_3a_out') net_vis = tf.concat([net_vis1, net_vis2, net_vis3], 3) # 加入一个全连接 # net = slim.flatten(net_vis, scope='PreLogitsFlatten_out') # net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu, scope='Logits_out0') # net = slim.fully_connected(net, num_classes, activation_fn=None,scope='Logits_out1') net = slim.conv2d(net_vis, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') net = tf.squeeze(net, [1, 2], name='fc8/squeezed') return net, net_vis
def vgg16_fcn_net(image_tensor, number_of_classes, is_training=True, upsample_factor=8): # tf.reset_default_graph() # Define the model that we want to use -- specify to use only two classes at the last layer with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(image_tensor, num_classes=number_of_classes, is_training=is_training, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) img_shape = tf.shape(image_tensor) # Calculate the ouput size of the upsampled tensor # The shape should be batch_size X width X height X num_classes upsampled_logits_shape = tf.stack([ downsampled_logits_shape[0], img_shape[1], img_shape[2], downsampled_logits_shape[3] ]) # Perform the upsampling x2 upsampled_logits = upsample(logits, 'vgg_16/fc8/t_conv_x2', 2, end_points['vgg_16/pool4'], 'conv_pool4', number_of_classes) # Perform the upsampling x2 upsampled_logits = upsample(upsampled_logits, 'vgg_16/fc8/t_conv_x2_x2', 2, end_points['vgg_16/pool3'], 'conv_pool3', number_of_classes) # Perform the upsampling x8 upsample_filter_tensor_x8 = bilinear_upsample_weights( upsample_factor, number_of_classes, 'vgg_16/fc8/t_conv_x8') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x8, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1], padding='SAME') return upsampled_logits
def vgg16_fcn8_model(images, num_classes, is_training=False, raw_image_shape=(520 - 170, 800), decoder='fcn8'): train_image_shape = (224 * 2, 224 * 3) if decoder == 'fcn8': decoder_fn = mobilenet_v1_fcn_decoder elif decoder == 'fcn8_upsample': decoder_fn = mobilenet_v1_fcn8_upsample_decoder else: raise ValueError("the decoder should be either fcn8 or fcn8_upsample") if images.dtype != tf.uint8: raise ValueError("the image should be uint8") images = tf.image.resize_images(images, size=train_image_shape) tf.summary.image('input_image_after_rescale_and_resize', tf.expand_dims(images[0], 0)) processed_images = tf.map_fn(vgg_preprocessing.vgg_image_rescale, images, dtype=tf.float32) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(vgg.vgg_arg_scope()): # 1000 classes instead of 1001. logits, end_points = vgg.vgg_16(processed_images, num_classes=1000, is_training=is_training, spatial_squeeze=False) layer4 = end_points['vgg_16/pool3'] layer6 = end_points['vgg_16/pool4'] layer13 = end_points['vgg_16/pool5'] last_layer = decoder_fn(layer13, layer4, layer6, num_classes) last_layer = post_process_logits(end_points, last_layer, raw_image_shape, train_image_shape) return last_layer, end_points
def forward_tran_advers(x_img=None, label_index=None, number_of_classes=5,layer_name=None,Training=True): # 根据logits,label_index,计算目标函数对conv层maps的梯度 with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16_adversarial(inputs = x_img,\ num_classes = number_of_classes,\ is_training = Training,\ fc_conv_padding = 'VALID') prob=tf.nn.softmax(logits) if Training==True: prob_max_label = label_index else: prob_max_label = tf.argmax(prob,1) label_hot = tf.one_hot(prob_max_label, number_of_classes) cost = (-1) * tf.reduce_sum(tf.multiply( tf.log(prob), label_hot ), axis=1) y_c = tf.reduce_sum(tf.multiply( logits, label_hot), axis=1) # Grad CAM #target_conv_layer=end_points[layer_name] target_conv_layer=x_img #gb_grad = tf.gradients(cost, x_img)[0] # guided Grad-CAM target_grad_ac = tf.gradients(y_c, target_conv_layer)[0] # Grad CAM target_grad_yc= tf.gradients( tf.exp(y_c), target_conv_layer)[0] # Grad CAM ++ return target_conv_layer,target_grad_ac,target_grad_yc,logits,end_points
def build_model(self): train_imgs = tools.load_train_img(TRAIN_DATA_DIR, self.batch_size, self.img_size) style_imgs = tools.load_style_img(STYLE_IMAGE_PATH) with slim.arg_scope(model.arg_scope()): gen_img, variables = model.inference(train_imgs, reuse=False, name='transform') with slim.arg_scope(vgg.vgg_arg_scope()): gen_img_processed = [tf.image.per_image_standardization(image) for image in tf.unstack(gen_img, axis=0, num=self.batch_size)] f1, f2, f3, f4, exclude = vgg.vgg_16(tf.concat([gen_img_processed, train_imgs, style_imgs], axis=0)) gen_f, img_f, _ = tf.split(f4, 3, 0) content_loss = tf.nn.l2_loss(gen_f - img_f) / tf.to_float(tf.size(gen_f)) style_loss = model.styleloss(f1, f2, f3, f4) vgg_model_path = VGG_MODEL_PATH vgg_vars = slim.get_variables_to_restore(include=['vgg_16'], exclude=exclude) init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars) init_fn(self.sess) print("vgg's weights load done") self.gen_img = gen_img self.global_step = tf.Variable(0, name="global_step", trainable=False) self.content_loss = content_loss self.style_loss = style_loss * self.style_w self.loss = self.content_loss + self.style_loss self.learn_rate = tf.train.exponential_decay(self.learn_rate_base, self.global_step, 1, self.learn_rate_decay, staircase=True) self.opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, global_step=self.global_step, var_list=variables) all_var = tf.global_variables() init_var = [v for v in all_var if 'vgg_16' not in v.name] init = tf.variables_initializer(var_list=init_var) self.sess.run(init) self.save = tf.train.Saver(var_list=variables)
upsample_factor = 16 number_of_classes = 21 log_folder = os.path.join(FLAGS.output_dir, 'train') vgg_checkpoint_path = FLAGS.checkpoint_path # Creates a variable to hold the global_step. global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64) # Define the model that we want to use -- specify to use only two classes at the last layer with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(image_tensor, num_classes=number_of_classes, is_training=is_training_placeholder, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) img_shape = tf.shape(image_tensor) # Calculate the ouput size of the upsampled tensor # The shape should be batch_size X width X height X num_classes upsampled_logits_shape = tf.stack([ downsampled_logits_shape[0], img_shape[1], img_shape[2], downsampled_logits_shape[3]
def init_model(self): slim = tf.contrib.slim # 初始化 tf.reset_default_graph() # 输入图片 image_tensor = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='image_tensor') orig_img_tensor = tf.placeholder(tf.uint8, shape=(1, None, None, 3), name='orig_img_tensor') # 初始化模型 with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(image_tensor, num_classes=self.number_of_classes, is_training=False, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) img_shape = tf.shape(image_tensor) # Calculate the ouput size of the upsampled tensor # The shape should be batch_size X width X height X num_classes upsampled_logits_shape = tf.stack([ downsampled_logits_shape[0], img_shape[1], img_shape[2], downsampled_logits_shape[3] ]) pool4_feature = end_points['vgg_16/pool4'] with tf.variable_scope('vgg_16/fc8'): aux_logits_16s = slim.conv2d( pool4_feature, self.number_of_classes, [1, 1], activation_fn=None, weights_initializer=tf.zeros_initializer, scope='conv_pool4') # Perform the upsampling upsample_filter_np_x2 = bilinear_upsample_weights( 2, # upsample_factor, self.number_of_classes) upsample_filter_tensor_x2 = tf.Variable(upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2') upsampled_logits = tf.nn.conv2d_transpose( logits, upsample_filter_tensor_x2, output_shape=tf.shape(aux_logits_16s), strides=[1, 2, 2, 1], padding='SAME') upsampled_logits = upsampled_logits + aux_logits_16s upsample_filter_np_x16 = bilinear_upsample_weights( self.upsample_factor, self.number_of_classes) upsample_filter_tensor_x16 = tf.Variable(upsample_filter_np_x16, name='vgg_16/fc8/t_conv_x16') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x16, output_shape=upsampled_logits_shape, strides=[1, self.upsample_factor, self.upsample_factor, 1], padding='SAME') # Tensor to get the final prediction for each pixel -- pay # attention that we don't need softmax in this case because # we only need the final decision. If we also need the respective # probabilities we will have to apply softmax. pred = tf.argmax(upsampled_logits, axis=3, name='predictions') probabilities = tf.nn.softmax(upsampled_logits, name='probabilities') # 恢复模型,从训练好的模型中恢复参数 checkpoint_path = tf.train.latest_checkpoint(self.ckpt) assert checkpoint_path, "no checkpoint exist, cant perform predict." variables_to_restore = slim.get_model_variables() sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) init_op = tf.global_variables_initializer() init_local_op = tf.local_variables_initializer() saver = tf.train.Saver(max_to_keep=1) # Run the initializers. sess.run(init_op) sess.run(init_local_op) saver.restore(sess, checkpoint_path) logging.debug('checkpoint restored from [{0}]'.format(checkpoint_path)) return sess, pred, orig_img_tensor, probabilities
#!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import vgg inputs = tf.placeholder(tf.float32, (None, 224, 224, 3), name='inputs') r, g, b = tf.split(axis=3, num_or_size_splits=3, value=inputs * 255.0) VGG_MEAN = [103.939, 116.779, 123.68] bgr = tf.concat(values=[b - VGG_MEAN[0], g - VGG_MEAN[1], r - VGG_MEAN[2]], axis=3) with tf.contrib.slim.arg_scope(vgg.vgg_arg_scope()): fc8, endpoints = vgg.vgg_16(bgr, is_training=False) for k, v in endpoints.iteritems(): print(k, v)
def fcn(image_tensor, upsample_factor, number_of_classes, annotation_tensor): # Define the model that we want to use -- specify to use only two classes at the last layer with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(image_tensor, num_classes=number_of_classes, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) img_shape = tf.shape(image_tensor) # Calculate the ouput size of the upsampled tensor # The shape should be batch_size X width X height X num_classes upsampled_logits_shape = tf.stack([ downsampled_logits_shape[0], img_shape[1], img_shape[2], downsampled_logits_shape[3] ]) if upsample_factor == 32: upsample_filter_np_x32 = bilinear_upsample_weights( upsample_factor, number_of_classes) upsample_filter_tensor_x32 = tf.Variable(upsample_filter_np_x32, name='vgg_16/fc8/t_conv_x32') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x32, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1], padding='SAME') elif upsample_factor == 16: pool4_feature = end_points['vgg_16/pool4'] with tf.variable_scope('vgg_16/fc8'): aux_logits_16s = slim.conv2d( pool4_feature, number_of_classes, [1, 1], activation_fn=None, weights_initializer=tf.zeros_initializer, scope='conv_pool4') # Perform the upsampling upsample_filter_np_x2 = bilinear_upsample_weights( 2, # upsample_factor, number_of_classes) upsample_filter_tensor_x2 = tf.Variable(upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2') upsampled_logits = tf.nn.conv2d_transpose( logits, upsample_filter_tensor_x2, output_shape=tf.shape(aux_logits_16s), strides=[1, 2, 2, 1], padding='SAME') upsampled_logits = upsampled_logits + aux_logits_16s upsample_filter_np_x16 = bilinear_upsample_weights( upsample_factor, number_of_classes) upsample_filter_tensor_x16 = tf.Variable(upsample_filter_np_x16, name='vgg_16/fc8/t_conv_x16') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x16, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1], padding='SAME') elif upsample_factor == 8: pool3_feature = end_points['vgg_16/pool3'] with tf.variable_scope('vgg_16/fc8'): aux_logits_8s = slim.conv2d( pool3_feature, number_of_classes, [1, 1], activation_fn=None, weights_initializer=tf.zeros_initializer, scope='conv_pool3') pool4_feature = end_points['vgg_16/pool4'] with tf.variable_scope('vgg_16/fc8'): aux_logits_16s = slim.conv2d( pool4_feature, number_of_classes, [1, 1], activation_fn=None, weights_initializer=tf.zeros_initializer, scope='conv_pool4') # 对fc8结果做 upsampling,得到16s upsample_filter_np_x2 = bilinear_upsample_weights( 2, # upsample_factor, number_of_classes) upsample_filter_tensor_x2_1 = tf.Variable( upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2_1') upsampled_logits = tf.nn.conv2d_transpose( logits, upsample_filter_tensor_x2_1, output_shape=tf.shape(aux_logits_16s), strides=[1, 2, 2, 1], padding='SAME') # 求和之后再做一次 upsampling,得到8s upsampled_logits = upsampled_logits + aux_logits_16s upsample_filter_tensor_x2_2 = tf.Variable( upsample_filter_np_x2, name='vgg_16/fc8/t_conv_x2_2') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x2_2, output_shape=tf.shape(aux_logits_8s), strides=[1, 2, 2, 1], padding='SAME') upsampled_logits = upsampled_logits + aux_logits_8s upsample_filter_np_x8 = bilinear_upsample_weights( upsample_factor, number_of_classes) upsample_filter_tensor_x8 = tf.Variable(upsample_filter_np_x8, name='vgg_16/fc8/t_conv_x8') upsampled_logits = tf.nn.conv2d_transpose( upsampled_logits, upsample_filter_tensor_x8, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1], padding='SAME') lbl_onehot = tf.one_hot(annotation_tensor, number_of_classes) cross_entropies = tf.nn.softmax_cross_entropy_with_logits( logits=upsampled_logits, labels=lbl_onehot) cross_entropy_loss = tf.reduce_mean(tf.reduce_sum(cross_entropies, axis=-1)) return upsampled_logits, cross_entropy_loss
def Graph(x, y, res_weight, vgg_weight, inc_weight, y_pred_res, y_pred_vgg, y_pred_inc, raw_image): num_classes = 110 batch_size = 1 weight = [args.resnet_weight, args.vgg_weight] bias = tf.reshape(tf.constant([123.68, 116.78, 103.94]), [1, 1, 1, 3]) x_int = tf.image.resize_bilinear(x, [224, 224], align_corners=True) x_int = x_int - bias with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( x_int, num_classes=num_classes, is_training=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(x_int, num_classes=num_classes, is_training=False, scope='vgg_16', reuse=tf.AUTO_REUSE) end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] one_hot = tf.one_hot(y, num_classes) sum_prob = tf.clip_by_value( (res_weight * y_pred_res + vgg_weight * y_pred_vgg), 0, args.confidence) logits_resnet = end_points_res_v1_50['logits'] logits_vgg = end_points_vgg_16['logits'] logits = res_weight * logits_resnet + vgg_weight * logits_vgg cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) grad = tf.gradients([cross_entropy], [x])[0] #grad = tf.layers.dropout(grad,noise_shape=[1,299,299,3]) if args.norm: grad = grad / tf.norm(grad) else: #grad = grad/tf.reshape(tf.norm(grad,axis=-1),[1,299,299,1]) grad = tf.transpose(grad, [0, 3, 1, 2]) grad = grad / tf.reshape( tf.norm(tf.reshape(grad, [batch_size, 3, -1]), axis=2), [batch_size, 3, 1, 1]) grad = tf.transpose(grad, [0, 2, 3, 1]) if args.is_mask: mask = tf.ones(shape=[ int(299 - 2 * args.mask_size), int(299 - 2 * args.mask_size), 3 ]) mask = tf.pad( mask, tf.constant([[args.mask_size, args.mask_size], [args.mask_size, args.mask_size], [0, 0]])) grad = grad * mask alpha = args.maxa - (args.maxa - args.mina) / (args.confidence) * sum_prob x = x - alpha * grad #*tf.concat([tf.ones([299,299,1]),tf.ones([299,299,1]),tf.zeros([299,299,1])],-1) x = tf.clip_by_value(x, 0, 255) out_x = x - raw_image out_x = tf.floor(tf.abs(out_x)) * tf.sign(out_x) + raw_image out_x = tf.round(tf.clip_by_value(out_x, 0, 255)) return x, out_x
def net_arg_scope(): if net_type == 'resnet': return resnet_v1.resnet_arg_scope() elif net_type == 'vgg': return vgg.vgg_arg_scope(False)
feed_dict_to_use = {is_training_placeholder: True} upsample_factor = 16 number_of_classes = 21 log_folder = os.path.join(FLAGS.output_dir, 'train') vgg_checkpoint_path = FLAGS.checkpoint_path # Creates a variable to hold the global_step. global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int64) # Define the model that we want to use -- specify to use only two classes at the last layer with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(image_tensor, num_classes=number_of_classes, is_training=is_training_placeholder, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) img_shape = tf.shape(image_tensor) # Calculate the ouput size of the upsampled tensor # The shape should be batch_size X width X height X num_classes upsampled_logits_shape = tf.stack([ downsampled_logits_shape[0], img_shape[1],
def run_training(): #1.create log and model saved dir according to the datetime subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') models_dir = os.path.join("saved_models", subdir, "models") if not os.path.isdir(models_dir): # Create the model directory if it doesn't exist os.makedirs(models_dir) logs_dir = os.path.join("saved_models", subdir, "logs") if not os.path.isdir(logs_dir): # Create the log directory if it doesn't exist os.makedirs(logs_dir) topn_models_dir = os.path.join("saved_models", subdir, "topn")#topn dir used for save top accuracy model if not os.path.isdir(topn_models_dir): # Create the topn model directory if it doesn't exist os.makedirs(topn_models_dir) topn_file=open(os.path.join(topn_models_dir,"topn_acc.txt"),"a+") topn_file.close() #2.load dataset and define placeholder conf=config.get_config() train_dataset=input_dataset.TFRecordDataset(conf) train_iterator,train_next_element = train_dataset.generateDataset( dataset_path=conf.train_dataset_path,batch_suize=conf.batch_size) test_dataset=input_dataset.TFRecordDataset(conf) test_iterator,test_next_element = test_dataset.generateDataset( dataset_path=conf.test_dataset_path,batch_size=conf.batch_size,test_mode=1) phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') images_placeholder = tf.placeholder(name='input', shape=[None, conf.input_img_height,conf.input_img_width, 3], dtype=tf.float32) labels_placeholder = tf.placeholder(name='labels', shape=[None, ], dtype=tf.int64) # Create the model. #with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(batch_norm_updates_collections=None)): #predictions, end_points = mobilenet_v1.mobilenet_v1(images_placeholder,is_training=phase_train_placeholder,num_classes=3,prediction_fn=False) with slim.arg_scope(vgg.vgg_arg_scope()): predictions, end_points = vgg.vgg_a(images_placeholder,num_classes=3,is_training=phase_train_placeholder) output=tf.argmax(predictions,1,name="output") softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=predictions, labels=labels_placeholder),name="loss") tf.add_to_collection('losses', softmax_loss) correct_prediction = tf.equal(tf.argmax(predictions,1),labels_placeholder ) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #adjust learning rate global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(conf.learning_rate,global_step,conf.learning_rate_decay_step,conf.learning_rate_decay_rate,staircase=True) custom_loss=tf.get_collection("losses") regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss=tf.add_n(custom_loss+regularization_losses,name='total_loss') #optimize loss and update train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1).minimize(total_loss,global_step=global_step) #train_op=tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True).minimize(total_loss,global_step=global_step) saver=tf.train.Saver(tf.trainable_variables(),max_to_keep=5) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for epoch in range(conf.max_nrof_epochs): sess.run(train_iterator.initializer) while True: use_time=0 try: images_train, labels_train = sess.run(train_next_element) start_time=time.time() input_dict={phase_train_placeholder:True,images_placeholder:images_train,labels_placeholder:labels_train} step,lr,train_loss,_,train_accuracy = sess.run([global_step, learning_rate, total_loss, train_op, accuracy], feed_dict=input_dict) end_time=time.time() use_time+=(end_time-start_time) #display train result if(step%conf.display_iter==0): print ("step:%d lr:%f time:%.3f total_loss:%.3f acc:%.3f epoch:%d"%(step,lr,use_time,train_loss,train_accuracy,epoch) ) use_time=0 if (step%conf.test_save_iter==0): filename_cpkt = os.path.join(models_dir, "%d.ckpt"%step) saver.save(sess, filename_cpkt) #evaluate(models_dir) sess.run(test_iterator.initializer) total_acc=0 test_cnt=0 while True: try: test_cnt+=1 test_img,test_label=(sess.run(test_next_element) ) fd={images_placeholder:test_img,labels_placeholder:test_label,phase_train_placeholder: False} acc=sess.run(accuracy,feed_dict=fd) total_acc+=acc except tf.errors.OutOfRangeError: valid_acc=(total_acc*1.0/test_cnt)*100 print ("test accuracy %.2f"%valid_acc ) with open(os.path.join(topn_models_dir,"topn_acc.txt"),"a+")as tmp_f: tmp_f.write("step : %d accuracy : %f\n"%(step,valid_acc) ) if valid_acc>conf.topn_threshold: shutil.copyfile(os.path.join(models_dir, "%d.ckpt.meta"%step),os.path.join(topn_models_dir, "%d.ckpt.meta"%step)) shutil.copyfile(os.path.join(models_dir, "%d.ckpt.index"%step),os.path.join(topn_models_dir, "%d.ckpt.index"%step)) shutil.copyfile(os.path.join(models_dir, "%d.ckpt.data-00000-of-00001"%step),os.path.join(topn_models_dir, "%d.ckpt.data-00000-of-00001"%step)) break except tf.errors.OutOfRangeError: print("End of epoch ") break
def run_hand(all_video_list, video_output_parent_path, use_bn, train_vgg, checkpoint_path, backbone_net_ckpt_path): with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3]) img_size = tf.placeholder(dtype=tf.int32, shape=(2, ), name='original_image_size') img_normalized = raw_img / 255 - 0.5 # define vgg19 with slim.arg_scope(vgg.vgg_arg_scope()): vgg_outputs, end_points = vgg.vgg_19(img_normalized) # get net graph logger.info('initializing model...') net = PafNet(inputs_x=vgg_outputs, hm_channel_num=2, use_bn=use_bn) hm_pre, added_layers_out = net.gen_hand_net() hm_up = tf.image.resize_area(hm_pre[5], img_size) # cpm_up = tf.image.resize_area(cpm_pre[5], img_size) # hm_up = hm_pre[5] # cpm_up = cpm_pre[5] smoother = Smoother({'data': hm_up}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) logger.info('initialize saver...') # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') # trainable_var_list = [] trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'), name='vgg_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize session...') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.group(tf.global_variables_initializer())) logger.info('restoring vgg weights...') restorer.restore(sess, backbone_net_ckpt_path) logger.info('restoring from checkpoint...') saver.restore( sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) logger.info('initialization done') action_list = all_video_list.keys() for action in action_list: video_list = all_video_list[action] for video in video_list: dir_name = video.split('/') name = dir_name[-2] save_path = video_output_parent_path + '/' + name anno_loader = cut_body_part(anno_file=save_path + '/' + action + '_lstm.json', coco_images=save_path + '/pics/') img_info = [] anno_info = [] for img, hand_list, img_meta, anno in tqdm( anno_loader.crop_part()): for hand in hand_list: position = hand['position'] ori_h = position[3] - position[1] + 1 ori_w = position[2] - position[0] + 1 peaks_origin, heatmap_origin = sess.run( [ tensor_peaks, hm_up, ], feed_dict={ raw_img: hand['hand'][np.newaxis, :, :, :], img_size: [ori_h, ori_w] }) re_origin = np.where( peaks_origin[0, :, :, 0] == np.max(peaks_origin[0, :, :, 0])) peaks_flip, heatmap_flip = sess.run( [ tensor_peaks, hm_up, ], feed_dict={ raw_img: np.fliplr(hand['hand'][np.newaxis, :, :, :]), img_size: [ori_h, ori_w] }) peaks_flip = np.fliplr(peaks_flip) re_flip = np.where( peaks_flip[0, :, :, 0] == np.max(peaks_flip[0, :, :, 0])) anno['keypoints'][hand['idx'] * 3] = int( position[0] + (re_origin[1][0] + re_flip[1][0]) / 2) anno['keypoints'][hand['idx'] * 3 + 1] = int( position[1] + (re_origin[0][0] + re_flip[0][0]) / 2) anno_info.append(anno) img_info.append(img_meta) ref = {"images": img_info, "annotations": anno_info} with open( save_path + '/' + action + '.json'.split('.')[0] + '_hand_coco' + '.json', "w") as f: json.dump(ref, f) print('writed to ' + save_path + '/' + action + '.json'.split('.')[0] + '_hand_coco' + '.json')