def flatten_fully_connected(inputs, num_outputs, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=slim.xavier_initializer(), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): with tf.variable_scope(scope, 'flatten_fully_connected', [inputs]): if inputs.shape.ndims > 2: inputs = slim.flatten(inputs) return slim.fully_connected(inputs, num_outputs, activation_fn, normalizer_fn, normalizer_params, weights_initializer, weights_regularizer, biases_initializer, biases_regularizer, reuse, variables_collections, outputs_collections, trainable, scope)
def _region_proposal(self, net_conv, is_training): initializer = slim.xavier_initializer(uniform=True) rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) hidden_num = 128 # bi_lstm shape: [-1, hidden_num * 2] bi_lstm = self._BiLstm(rpn, cfg.RPN_CHANNELS, hidden_num, name="bi_lstm") shape = tf.shape(rpn) N, H, W, _ = shape[0], shape[1], shape[2], shape[3] bi_lstm_reshape = tf.reshape(bi_lstm, [N, H, W, hidden_num * 2]) fc = slim.conv2d(bi_lstm_reshape, 512, [1, 1], weights_initializer=initializer, padding='VALID', scope='conv_fc') # use 1x1 conv as FC (N, H, W, num_anchors * 2) rpn_cls_score = slim.conv2d(fc, self._num_anchors * 2, [1, 1], weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # use 1x1 conv as FC (N, H, W, num_anchors * 4) rpn_bbox_pred = slim.conv2d(fc, self._num_anchors * 4, [1, 1], weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # (N, H, W, num_anchors * 2) -> (N, H, W * num_anchors, 2) rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob") # (N, H, W*num_anchors, 2) -> (N, H, W, num_anchors*2) rpn_cls_prob_reshape = self._reshape_layer(rpn_cls_prob, self._num_anchors * 2, "rpn_cls_prob_reshape") if is_training: self._anchor_target_layer(rpn_cls_score, "anchor") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError self._predictions["rois"] = rois self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob_reshape self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
def build_bisenet(self, reuse=False): """ Builds the BiSeNet model. Arguments: reuse: Reuse variable or not Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage batch_norm_params = self.model_config['batch_norm_params'] init_method = self.model_config['conv_config']['init_method'] if init_method == 'kaiming_normal': initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) else: initializer = slim.xavier_initializer() with tf.variable_scope('spatial_net', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): spatial_net = ConvBlock(self.images, n_filters=64, kernel_size=[7, 7], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[1, 1]) frontend_config = self.model_config['frontend_config'] ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(self.images, frontend_config, self.is_training(), reuse) ### Combining the paths with tf.variable_scope('combine_path', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # tail part size = tf.shape(end_points['pool5'])[1:3] global_context = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True) global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None) global_context = tf.nn.relu(slim.batch_norm(global_context, fused=True)) global_context = tf.image.resize_bilinear(global_context, size=size) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128) net_5 = tf.add(net_5, global_context) net_5 = Upsampling(net_5, scale=2) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) print('111111111111111', net_5, net_4, tf.add(net_4, net_5)) exit() net_4 = tf.add(net_4, net_5) net_4 = Upsampling(net_4, scale=2) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) context_net = net_4 net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) # Upsampling + dilation or only Upsampling net = Upsampling(net, scale=2) net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, normalizer_fn=slim.batch_norm) net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') self.net = Upsampling(net, 4) # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') # self.net = Upsampling(net, scale=8) if self.mode in ['train', 'validation', 'test']: sup1 = slim.conv2d(net_5, self.num_classes, [1, 1], activation_fn=None, scope='supl1') sup2 = slim.conv2d(net_4, self.num_classes, [1, 1], activation_fn=None, scope='supl2') self.sup1 = Upsampling(sup1, scale=16) self.sup2 = Upsampling(sup2, scale=8) self.init_fn = init_fn
def build(self): with self._graph.as_default(), tf.device('/cpu:0'): # Create an optimizer that performs gradient descent. opt, lr, global_step = self.get_opt() ##some global placeholder L2_reg = tf.placeholder(tf.float32, name="L2_reg") training = tf.placeholder(tf.bool, name="training_flag") total_loss_to_show = 0. images_place_holder_list = [] hm_gt_place_holder_list = [] wh_gt_place_holder_list = [] reg_place_holder_list = [] ind_place_holder_list = [] regmask_place_holder_list = [] weights_initializer = slim.xavier_initializer() biases_initializer = tf.constant_initializer(0.) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(L2_reg) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(cfg.TRAIN.num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % (i)) as scope: with slim.arg_scope([slim.model_variable, slim.variable], device='/cpu:0'): if cfg.MODEL.deployee: images_ = tf.placeholder(tf.float32, [1, cfg.DATA.hin, cfg.DATA.win, cfg.DATA.channel], name="images") else: ###fix size images_ = tf.placeholder(tf.float32, [None,None,None, cfg.DATA.channel], name="images") hm_ = tf.placeholder(tf.float32, [cfg.TRAIN.batch_size, None, None, cfg.DATA.num_class], name="heatmap_target") wh_ = tf.placeholder(tf.float32, [cfg.TRAIN.batch_size, None, 2], name="wh_target") reg_ = tf.placeholder(tf.float32, [cfg.TRAIN.batch_size, None, 2], name="reg_target") ind_ = tf.placeholder(tf.float32, [cfg.TRAIN.batch_size, None], name="ind_target") regmask_ = tf.placeholder(tf.float32, [cfg.TRAIN.batch_size, None], name="regmask_target") ###total anchor images_place_holder_list.append(images_) hm_gt_place_holder_list.append(hm_) wh_gt_place_holder_list.append(wh_) reg_place_holder_list.append(reg_) ind_place_holder_list.append(ind_) regmask_place_holder_list.append(regmask_) with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, weights_initializer=weights_initializer, biases_initializer=biases_initializer): hm_loss, wh_loss, reg_loss, l2_loss = self.tower_loss( scope, images_, hm_, wh_, reg_, ind_, regmask_, L2_reg, training) ##use muti gpu ,large batch if i == cfg.TRAIN.num_gpu - 1: total_loss = tf.add_n([hm_loss, wh_loss, reg_loss, l2_loss]) else: total_loss = tf.add_n([hm_loss, wh_loss, reg_loss]) total_loss_to_show += total_loss # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() ##when use batchnorm, updates operations only from the ## final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. bn_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=scope) # Retain the summaries from the final tower. self.summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) ###freeze some params train_var_list = self.frozen() # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(total_loss, train_var_list) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = self.average_gradients(tower_grads) # Add a summary to track the learning rate. self.add_summary(tf.summary.scalar('learning_rate', lr)) self.add_summary(tf.summary.scalar('total_loss', total_loss_to_show)) self.add_summary(tf.summary.scalar('hm_loss', hm_loss)) self.add_summary(tf.summary.scalar('wh_loss', wh_loss)) self.add_summary(tf.summary.scalar('reg_loss', reg_loss)) self.add_summary(tf.summary.scalar('l2_loss', l2_loss)) # Add histograms for gradients. for grad, var in grads: if grad is not None: self.add_summary(tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): self.add_summary(tf.summary.histogram(var.op.name, var)) if self.ema_weights: # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( 0.9, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op, *bn_update_ops) else: train_op = tf.group(apply_gradient_op, *bn_update_ops) ###set inputs and ouputs self.inputs = [images_place_holder_list, hm_gt_place_holder_list, wh_gt_place_holder_list, reg_place_holder_list, ind_place_holder_list, regmask_place_holder_list, L2_reg, training] self.outputs = [train_op, total_loss_to_show, hm_loss, wh_loss, reg_loss, l2_loss, lr] self.val_outputs = [total_loss_to_show, hm_loss, wh_loss, reg_loss, l2_loss, lr] tf_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) ##init all variables init = tf.global_variables_initializer() self.sess.run(init)
def build(self): with self._graph.as_default(), tf.device('/cpu:0'): # Create an optimizer that performs gradient descent. opt, lr, global_step = self.get_opt() ##some global placeholder keep_prob = tf.placeholder(tf.float32, name="keep_prob") L2_reg = tf.placeholder(tf.float32, name="L2_reg") training = tf.placeholder(tf.bool, name="training_flag") total_loss_to_show = 0. images_place_holder_list = [] labels_place_holder_list = [] boxes_place_holder_list = [] weights_initializer = slim.xavier_initializer() biases_initializer = tf.constant_initializer(0.) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(L2_reg) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(cfg.TRAIN.num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % (i)) as scope: with slim.arg_scope( [slim.model_variable, slim.variable], device='/cpu:0'): images_ = tf.placeholder(tf.float32, [None, None, None, 3], name="images") boxes_ = tf.placeholder( tf.float32, [cfg.TRAIN.batch_size, None, 4], name="input_boxes") labels_ = tf.placeholder( tf.int64, [cfg.TRAIN.batch_size, None], name="input_labels") ###total anchor images_place_holder_list.append(images_) labels_place_holder_list.append(labels_) boxes_place_holder_list.append(boxes_) with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, weights_initializer=weights_initializer, biases_initializer=biases_initializer): reg_loss, cla_loss, l2_loss = self.tower_loss( scope, images_, labels_, boxes_, L2_reg, training) ##use muti gpu ,large batch if i == cfg.TRAIN.num_gpu - 1: total_loss = tf.add_n( [reg_loss, cla_loss, l2_loss]) else: total_loss = tf.add_n( [reg_loss, cla_loss]) total_loss_to_show += total_loss # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() ##when use batchnorm, updates operations only from the ## final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. bn_update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope=scope) # Retain the summaries from the final tower. self.summaries = tf.get_collection( tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(total_loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = self.average_gradients(tower_grads) # Add a summary to track the learning rate. self.add_summary(tf.summary.scalar('learning_rate', lr)) self.add_summary( tf.summary.scalar('total_loss', total_loss_to_show)) self.add_summary(tf.summary.scalar('loc_loss', reg_loss)) self.add_summary(tf.summary.scalar('cla_loss', cla_loss)) self.add_summary(tf.summary.scalar('l2_loss', l2_loss)) # Add histograms for gradients. for grad, var in grads: if grad is not None: self.add_summary( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): self.add_summary(tf.summary.histogram(var.op.name, var)) if self.ema_weights: # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( 0.9, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op, *bn_update_ops) else: train_op = tf.group(apply_gradient_op, *bn_update_ops) ###set inputs and ouputs self.inputs = [ images_place_holder_list, boxes_place_holder_list, labels_place_holder_list, keep_prob, L2_reg, training ] self.outputs = [ train_op, total_loss_to_show, reg_loss, cla_loss, l2_loss, lr ] self.val_outputs = [ total_loss_to_show, reg_loss, cla_loss, l2_loss, lr ] ##init all variables init = tf.global_variables_initializer() self.sess.run(init)
def build_bisenet_custom(self, reuse=False): """ Builds the BiSeNet model. Arguments: reuse: Reuse variable or not Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage batch_norm_params = self.model_config['batch_norm_params'] init_method = self.model_config['conv_config']['init_method'] down_16x_end_points = self.model_config['net_node']['16xdown:50'] down_32x_end_points = self.model_config['net_node']['32xdown:25'] if init_method == 'kaiming_normal': initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) else: initializer = slim.xavier_initializer() with tf.variable_scope('spatial_net', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # inference/spatial_net/Conv/Conv2D run 1 average cost 250.552994 ms, 25.405 %, FlopsRate: 9.064 % # conv2d spatial_net = slim.conv2d(self.images, 16, [3, 3], stride=[2, 2], activation_fn=None) spatial_net = hard_swish( slim.batch_norm(spatial_net, fused=True)) # bneck1 exp_size = _make_divisible(16) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 16, kernel=[3, 3], stride=2) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck2 exp_size = _make_divisible(72) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 24, kernel=[3, 3], stride=2) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck3 exp_size = _make_divisible(88) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 24, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck4 exp_size = _make_divisible(96) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 40, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck5 spatial_net = DepthSepConv(spatial_net, 80, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck6 spatial_net = DepthSepConv(spatial_net, 128, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) frontend_config = self.model_config['frontend_config'] ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( self.images, frontend_config, self.is_training(), reuse) ### Combining the paths with tf.variable_scope('combine_path', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # tail part global_context = tf.reduce_mean( end_points[down_32x_end_points], [1, 2], keep_dims=True) global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None) global_context = tf.nn.relu( slim.batch_norm(global_context, fused=True)) ARM_out1 = AttentionRefinementModule_Custom( end_points[down_32x_end_points], n_filters=128) ARM_out2 = AttentionRefinementModule_Custom( end_points[down_16x_end_points], n_filters=128) ARM_out1 = tf.add(ARM_out1, global_context) ARM_out1 = Upsampling(ARM_out1, scale=2) # inference/combine_path/Conv_6/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 % exp_size = _make_divisible(256) ARM_out1 = slim.conv2d(ARM_out1, exp_size, [1, 1], stride=[1, 1], activation_fn=None) ARM_out1 = slim.batch_norm(ARM_out1, fused=True) ARM_out1 = DepthSepConv(ARM_out1, 128, kernel=[3, 3], stride=1) ARM_out1 = tf.nn.relu(slim.batch_norm(ARM_out1, fused=True)) ARM_out2 = tf.add(ARM_out2, ARM_out1) ARM_out2 = Upsampling(ARM_out2, scale=2) # inference/combine_path/Conv_13/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 % exp_size = _make_divisible(256) ARM_out2 = slim.conv2d(ARM_out2, exp_size, [1, 1], stride=[1, 1], activation_fn=None) ARM_out2 = slim.batch_norm(ARM_out2, fused=True) ARM_out2 = DepthSepConv(ARM_out2, 128, kernel=[3, 3], stride=1) ARM_out2 = tf.nn.relu(slim.batch_norm(ARM_out2, fused=True)) context_net = ARM_out2 FFM_out = FeatureFusionModule_Custom(input_1=spatial_net, input_2=context_net, n_filters=256) ARM_out1 = ConvBlock(ARM_out1, n_filters=128, kernel_size=[3, 3]) ARM_out2 = ConvBlock(ARM_out2, n_filters=128, kernel_size=[3, 3]) exp_size = _make_divisible(128) FFM_out = slim.conv2d(FFM_out, exp_size, [1, 1], stride=[1, 1], activation_fn=None) FFM_out = slim.batch_norm(FFM_out, fused=True) FFM_out = DepthSepConv(FFM_out, 64, kernel=[3, 3], stride=1) FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True)) # Upsampling + dilation or only Upsampling FFM_out = Upsampling(FFM_out, scale=2) # inference/combine_path/Conv_12/Conv2D run 1 average cost 32.151001 ms, 3.260 %, FlopsRate: 8.879 % exp_size = _make_divisible(128) FFM_out = slim.conv2d(FFM_out, exp_size, [1, 1], stride=[1, 1], activation_fn=None) FFM_out = DepthSepConv(FFM_out, 64, kernel=[3, 3], stride=1, rate=2) FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True)) FFM_out = slim.conv2d(FFM_out, self.num_classes, [1, 1], activation_fn=None, scope='logits') self.net = Upsampling(FFM_out, 4) if self.mode in ['train', 'validation', 'test']: sup1 = slim.conv2d(ARM_out1, self.num_classes, [1, 1], activation_fn=None, scope='supl1') sup2 = slim.conv2d(ARM_out2, self.num_classes, [1, 1], activation_fn=None, scope='supl2') self.sup1 = Upsampling(sup1, scale=16) self.sup2 = Upsampling(sup2, scale=8) self.init_fn = init_fn
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print inputs.get_shape() net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope="conv1") print net.get_shape() net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME') print net.get_shape() net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv2") print net.get_shape() net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2") print net.get_shape() net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv3") print net.get_shape() net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME') print net.get_shape() net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope="conv4") print net.get_shape() fc_flatten = slim.flatten(net) print fc_flatten.get_shape() fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="fc1", activation_fn=tf.nn.relu) print fc1.get_shape() #batch*2 cls_prob = slim.fully_connected(fc1, num_outputs=2, scope="cls_fc", activation_fn=tf.nn.softmax) print cls_prob.get_shape() #batch*4 bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope="bbox_fc", activation_fn=None) print bbox_pred.get_shape() #batch*10 landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope="landmark_fc", activation_fn=None) print landmark_pred.get_shape() #train if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: return cls_prob, bbox_pred, landmark_pred
def onet_cnn6(inputs,label=None,bbox_target=None,landmark_target=None,training=True): with slim.arg_scope([slim.conv2d], activation_fn = prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): #model structure # # #print (inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3,3], stride=1, scope="conv1") print("Conv 1: ", net.get_shape()) #net = slim.conv2d(inputs, num_outputs=64, kernel_size=[3,3], stride=2, scope="conv2") #print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME') print("Pool 1: ", net.get_shape()) net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope="conv2") print("Conv 2: ", net.get_shape()) #net = slim.conv2d(net,num_outputs=128,kernel_size=[3,3],stride=2,scope="conv4") #print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2") print("Pool 2: ", net.get_shape()) net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope="conv3") print("Conv 3: ", net.get_shape()) net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],padding="SAME",stride=1,scope="conv4") print("Conv 4: ", net.get_shape()) #net = slim.conv2d(net,num_outputs=128,kernel_size=[3,3],stride=2,scope="conv6") #print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME') print("Pool 3: ", net.get_shape()) net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],stride=1,scope="conv5") print("Conv 5: ", net.get_shape()) net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],padding="SAME",stride=1,scope="conv6") print("Conv 6: ", net.get_shape()) fc_flatten = slim.flatten(net) #print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256,scope="fc1") #print(fc1.get_shape()) #batch*2 cls_prob = slim.fully_connected(fc1,num_outputs=2,scope="cls_fc",activation_fn=tf.nn.softmax) #print(cls_prob.get_shape()) #batch*4 bbox_pred = slim.fully_connected(fc1,num_outputs=4,scope="bbox_fc",activation_fn=None) #print(bbox_pred.get_shape()) #batch*10 landmark_pred = slim.fully_connected(fc1,num_outputs=4,scope="landmark_fc",activation_fn=None) #print(landmark_pred.get_shape()) #train if training: config = singleton.configuration._instance.config #cls_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(label,cls_prob)) #cls_loss = tf.reduce_mean(tf.losses.sigmoid_cross_entropy(label,cls_prob)) #cls_loss = cls_ohem(cls_prob,label) cls_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(label,cls_prob,from_logits=False)) #bbox_loss = bbox_ohem(bbox_pred,bbox_target,label) if config.bbox_loss == "mse": bbox_loss = get_bb_loss(bbox_pred,bbox_target,label) else: bbox_loss = bbox_ohem(bbox_pred,bbox_target,label) if config.landmark_loss == "mse": landmark_loss = get_landmark_loss(landmark_pred, landmark_target,label) else: landmark_loss = landmark_ohem(landmark_pred, landmark_target,label) #bbox_loss = tf.reduce_mean(tf.losses.mean_squared_error(bbox_target,bbox_pred)) accuracy = cal_accuracy(cls_prob,label) #landmark_loss = get_landmark_loss(landmark_pred, landmark_target,label) #landmark_loss = landmark_ohem(landmark_pred, landmark_target,label) #landmark_loss = tf.reduce_mean(tf.losses.mean_squared_error(landmark_target,landmark_pred)) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss,bbox_loss,landmark_loss,L2_loss,accuracy,cls_prob else: return cls_prob,bbox_pred,landmark_pred
def _make_graph(self): self.logger.info("Generating training graph on {} GPUs ...".format( self.cfg.num_gpus)) weights_initializer = slim.xavier_initializer() biases_initializer = tf.constant_initializer(0.) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( self.cfg.TRAIN.weight_decay) tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(self.cfg.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i) as name_scope: # Force all Variables to reside on the CPU. with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, weights_initializer=weights_initializer, biases_initializer=biases_initializer): # loss over single GPU if (self.cfg.MODEL.occluded_detection): self.net.make_occ_network(is_train=True) else: self.net.make_network(is_train=True) if i == self.cfg.num_gpus - 1: loss = self.net.get_loss(include_wd=True) else: loss = self.net.get_loss() self._input_list.append(self.net.get_inputs()) tf.get_variable_scope().reuse_variables() if i == 0: if self.cfg.num_gpus > 1 and self.cfg.TRAIN.batch_norm is True: self.logger.warning( "BN is calculated only on single GPU.") extra_update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS, name_scope) with tf.control_dependencies(extra_update_ops): grads = self._optimizer.compute_gradients(loss) else: grads = self._optimizer.compute_gradients(loss) final_grads = [] with tf.variable_scope('Gradient_Mult') as scope: for grad, var in grads: final_grads.append((grad, var)) tower_grads.append(final_grads) if len(tower_grads) > 1: grads = average_gradients(tower_grads) else: grads = tower_grads[0] apply_gradient_op = self._optimizer.apply_gradients(grads) train_op = tf.group(apply_gradient_op, *extra_update_ops) return train_op
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): #why activation is prelu, why? ''' leaky relu vs prelu: https://datascience.stackexchange.com/questions/18583/what-is-the-difference-between-leakyrelu-and-prelu Leaky ReLUs: allow a small, non-zero gradient when the unit is not active. Parametric ReLUs: take this idea further by making the coefficient of leakage into a parameter that is learned along with the other neural network parameters. ''' with slim.arg_scope( [slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer( ), # slim does not have zeros initilizer weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print("PNet input shape: ", inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=10, kernel_size=[3, 3], stride=1, scope='conv1') print("PNet conv1 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, padding='SAME', scope='pool1') print("PNet pool1 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') print("PNet conv2 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') print("PNet conv3 shape: ", net.get_shape()) # final 3 conv to get H*W*2 classifier, H*W*4 bbox, H*W*10 landmar_pred conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) print('P_Net conv4_1 shape ', net.get_shape()) bbox_pred = slim.conv2d( net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None ) # important scope name should not be the same as veriable name print('P_Net bbox_pred conv layer shape ', bbox_pred.get_shape()) landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) print('P_Net ladmark conv layer shape', landmark_pred.get_shape()) if training: #batch*2 to determin if it is a face #why squeezing? what will happe cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) #check bbox_loss bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) #landmark loss landmark_pred = tf.squeeze(landmark_pred, [1, 2], name='landmark_pred') landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) #tf.add_n: Adds all input tensors element-wise. L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: #test, batch_size=1 cls_prob_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_prob_test, bbox_pred_test, landmark_pred_test
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print("ONet input shape: ", inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv1') print("ONet conv1 shape: ", net.get_shape()) # in the original model, for O net all pooling using stride of 2 net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool1', padding='SAME') print("ONet pool1 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv2') print("ONet conv2 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2') print("ONet pool2 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv3') print("ONet conv3 shape: ", net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME') print("ONet pool3 shape: ", net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope='conv4') print("ONet conv4 shape: ", net.get_shape()) fc_flatten = slim.flatten(net) print("ONet fc input shape: ", fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope='fc1', activation_fn=tf.nn.relu) #cls print('ONet fc shape after flattening: ', fc1.get_shape()) cls_prob = slim.fully_connected(fc1, num_outputs=2, scope='cls_fc', activation_fn=tf.nn.softmax) print('ONet cls_prob fc shape ', cls_prob.get_shape()) #bbox bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope='bbox_fc', activation_fn=None) print('ONet bbox_pred fc shape ', bbox_pred.get_shape()) #landmark landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope='landmark_fc', activation_fn=None) print('ONet landmark fc shape ', landmark_pred.get_shape()) if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: return cls_prob, bbox_pred, landmark_pred
def initialize(self, config, num_classes): ''' Initialize the graph from scratch according config. ''' #A default graph is registered, operations will be added to the graph with self.graph.as_default(): #A default session is created, operations will be added to the session with self.sess.as_default(): # Set up placeholders #width and height from image size, [112,112] w, h = config.image_size #channels = 3 (RGB) channels = config.channels #A placeholder is a variable that we will assign data to at a later date #It allows us to create our operations and build our computation graph without needing the data. #In TensorFlowterminology, we then feed data into the graph through these placeholders. image_batch_placeholder = tf.placeholder(tf.float32, shape=[None, h, w, channels], name='image_batch') label_batch_placeholder = tf.placeholder(tf.int32, shape=[None], name='label_batch') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') global_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='global_step') #splits a tensor into sub tensors image_splits = tf.split(image_batch_placeholder, config.num_gpus) label_splits = tf.split(label_batch_placeholder, config.num_gpus) grads_splits = [] split_dict = {} #function for insering values into a dicitonary based on a key def insert_dict(k,v): if k in split_dict: split_dict[k].append(v) else: split_dict[k] = [v] #numgpus = 1 for i in range(config.num_gpus): scope_name = '' if i==0 else 'gpu_%d' % i # A context manager for use when defining a Python op # context manager pushes a name scope, which will make the name of all operations added within it have a prefix. with tf.name_scope(scope_name): with tf.variable_scope('', reuse=i>0): #Specifies the device for ops created/executed in this context with tf.device('/gpu:%d' % i): #identity returns a tensor with same shape and contents as input images = tf.identity(image_splits[i], name='inputs') labels = tf.identity(label_splits[i], name='labels') # Save the first channel for testing if i == 0: self.inputs = images # Build networks if config.localization_net is not None: localization_net = utils.import_file(config.localization_net, 'network') imsize = (112, 112) images, theta = localization_net.inference(images, imsize, phase_train_placeholder, weight_decay = 0.0) images = tf.identity(images, name='transformed_image') if i == 0: tf.summary.image('transformed_image', images) else: images = images #calls import_file, passes sealnet as network network = utils.import_file(config.network, 'network') #calls inference function in sealnet file prelogits = network.inference(images, keep_prob_placeholder, phase_train_placeholder, bottleneck_layer_size = config.embedding_size, weight_decay = config.weight_decay, model_version = config.model_version) prelogits = tf.identity(prelogits, name='prelogits') #Normalizes along dimension axis using an L2 norm embeddings = tf.nn.l2_normalize(prelogits, dim=1, name='embeddings') if i == 0: self.outputs = tf.identity(embeddings, name='outputs') # Build all loss functions losses = [] # Orignal Softmax if 'softmax' in config.losses.keys(): logits = slim.fully_connected(prelogits, num_classes, weights_regularizer=slim.l2_regularizer(config.weight_decay), # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_initializer=slim.xavier_initializer(), biases_initializer=tf.constant_initializer(0.0), activation_fn=None, scope='Logits') cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits), name='cross_entropy') losses.append(cross_entropy) insert_dict('sloss', cross_entropy) # L2-Softmax if 'cosine' in config.losses.keys(): logits, cosine_loss = tflib.cosine_softmax(prelogits, labels, num_classes, gamma=config.losses['cosine']['gamma'], weight_decay=config.weight_decay) losses.append(cosine_loss) insert_dict('closs', cosine_loss) # A-Softmax if 'angular' in config.losses.keys(): a_cfg = config.losses['angular'] angular_loss = tflib.angular_softmax(prelogits, labels, num_classes, global_step, a_cfg['m'], a_cfg['lamb_min'], a_cfg['lamb_max'], weight_decay=config.weight_decay) losses.append(angular_loss) insert_dict('aloss', angular_loss) # Split Loss if 'split' in config.losses.keys(): split_losses = tflib.split_softmax(prelogits, labels, num_classes, global_step, gamma=config.losses['split']['gamma'], weight_decay=config.weight_decay) losses.extend(split_losses) insert_dict('loss', split_losses[0]) # Collect all losses reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg_loss') losses.append(reg_loss) insert_dict('reg_loss', reg_loss) total_loss = tf.add_n(losses, name='total_loss') grads_split = tf.gradients(total_loss, tf.trainable_variables()) grads_splits.append(grads_split) # Merge the splits grads = tflib.average_grads(grads_splits) for k,v in split_dict.items(): v = tflib.average_tensors(v) split_dict[k] = v if 'loss' in k: tf.summary.scalar('losses/' + k, v) else: tf.summary.scalar(k, v) # Training Operaters apply_gradient_op = tflib.apply_gradient(tf.trainable_variables(), grads, config.optimizer, learning_rate_placeholder, config.learning_rate_multipliers) update_global_step_op = tf.assign_add(global_step, 1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_ops = [apply_gradient_op, update_global_step_op] + update_ops train_op = tf.group(*train_ops) tf.summary.scalar('learning_rate', learning_rate_placeholder) summary_op = tf.summary.merge_all() # Initialize variables self.sess.run(tf.local_variables_initializer()) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=None) # Keep useful tensors self.image_batch_placeholder = image_batch_placeholder self.label_batch_placeholder = label_batch_placeholder self.learning_rate_placeholder = learning_rate_placeholder self.keep_prob_placeholder = keep_prob_placeholder self.phase_train_placeholder = phase_train_placeholder self.global_step = global_step self.watch_list = split_dict self.train_op = train_op self.summary_op = summary_op
def split_softmax(prelogits, label, num_classes, global_step, weight_decay, gamma=16.0, reuse=None): nrof_features = prelogits.shape[1].value batch_size = tf.shape(prelogits)[0] with tf.variable_scope('SplitSoftmax', reuse=reuse): weights = tf.get_variable('weights', shape=(num_classes, nrof_features), regularizer=slim.l2_regularizer(weight_decay), initializer=slim.xavier_initializer(), # initializer=tf.truncated_normal_initializer(stddev=0.1), # initializer=tf.constant_initializer(0), trainable=True, dtype=tf.float32) alpha = tf.get_variable('alpha', shape=(), regularizer=slim.l2_regularizer(1e-2), initializer=tf.constant_initializer(1.00), trainable=True, dtype=tf.float32) beta = tf.get_variable('beta', shape=(), # regularizer=slim.l2_regularizer(1e-2), initializer=tf.constant_initializer(0.0), trainable=True, dtype=tf.float32) sigma = tf.get_variable('sigma', shape=(), regularizer=slim.l2_regularizer(1e-1), initializer=tf.constant_initializer(1.0), trainable=True, dtype=tf.float32) threshold_pos = tf.get_variable('threshold_pos', shape=(), initializer=tf.constant_initializer(16.0), trainable=False, dtype=tf.float32) threshold_neg = tf.get_variable('threshold_neg', shape=(), initializer=tf.constant_initializer(0.0), trainable=False, dtype=tf.float32) # Normalizing the vecotors weights_normed = tf.nn.l2_normalize(weights, dim=1) prelogits_normed = tf.nn.l2_normalize(prelogits, dim=1) # weights_normed = weights # prelogits_normed = prelogits # Caluculate Centers centers, label_center, center_idx, center_weight = centers_by_label(prelogits_normed, label) centers = tf.gather(centers, center_idx) centers_normed = tf.nn.l2_normalize(centers, dim=1) coef = 1.0 # Label and logits between batch and examplars label_mat_glob = tf.one_hot(label, num_classes, dtype=tf.float32) label_mask_pos_glob = tf.cast(label_mat_glob, tf.bool) label_mask_neg_glob = tf.logical_not(label_mask_pos_glob) # label_exp_batch = tf.expand_dims(label, 1) # label_exp_glob = tf.expand_dims(label_history, 1) # label_mat_glob = tf.equal(label_exp_batch, tf.transpose(label_exp_glob)) # label_mask_pos_glob = tf.cast(label_mat_glob, tf.bool) # label_mask_neg_glob = tf.logical_not(label_mat_glob) # dist_mat_glob = euclidean_distance(prelogits_normed, tf.transpose(weights_normed), False) dist_mat_glob = tf.matmul(prelogits_normed, tf.transpose(weights_normed)) # + beta dist_pos_glob = tf.boolean_mask(dist_mat_glob, label_mask_pos_glob) dist_neg_glob = tf.boolean_mask(dist_mat_glob, label_mask_neg_glob) logits_glob = coef * dist_mat_glob logits_pos_glob = tf.boolean_mask(logits_glob, label_mask_pos_glob) logits_neg_glob = tf.boolean_mask(logits_glob, label_mask_neg_glob) # Label and logits within batch label_exp_batch = tf.expand_dims(label, 1) label_mat_batch = tf.equal(label_exp_batch, tf.transpose(label_exp_batch)) label_mask_pos_batch = tf.cast(label_mat_batch, tf.bool) label_mask_neg_batch = tf.logical_not(label_mask_pos_batch) mask_non_diag = tf.logical_not(tf.cast(tf.eye(batch_size), tf.bool)) label_mask_pos_batch = tf.logical_and(label_mask_pos_batch, mask_non_diag) # dist_mat_batch = euclidean_distance(prelogits_normed, tf.transpose(prelogits_normed), False) dist_mat_batch = tf.matmul(prelogits_normed, tf.transpose(prelogits_normed)) dist_pos_batch = tf.boolean_mask(dist_mat_batch, label_mask_pos_batch) dist_neg_batch = tf.boolean_mask(dist_mat_batch, label_mask_neg_batch) logits_batch = coef * dist_mat_batch logits_pos_batch = tf.boolean_mask(logits_batch, label_mask_pos_batch) logits_neg_batch = tf.boolean_mask(logits_batch, label_mask_neg_batch) # num_anchor = 32 # prelogits_anchor = tf.reshape(prelogits_normed[:num_anchor], [num_anchor, 1, nrof_features]) # prelogits_refer = tf.reshape(prelogits_normed[num_anchor:], [num_anchor, -1, nrof_features]) # dist_anchor = tf.reduce_sum(tf.square(prelogits_anchor-prelogits_refer), axis=2) # dist_anchor = tf.reshape(dist_anchor, [-1]) # logits_anchor = -0.5 * gamma * dist_anchor logits_pos = logits_pos_glob logits_neg = logits_neg_glob dist_pos = dist_pos_glob dist_neg = dist_neg_glob # epsilon_trsd = 0.3 t_pos = coef * (threshold_pos) t_neg = coef * (threshold_neg) if gamma == 'auto': # gamma = tf.nn.softplus(alpha) gamma = tf.log(tf.exp(1.0) + tf.exp(alpha)) elif type(gamma) == tuple: t_min, decay = gamma epsilon = 1e-5 t = t_min + 1.0/(epsilon + decay*tf.cast(global_step, tf.float32)) gamma = 1.0 / t else: assert type(gamma) == float gamma = tf.constant(gamma) hinge_loss = lambda x: tf.nn.relu(1.0 + x) margin_func = hinge_loss # Losses losses = [] # num_pos = tf.cast(0.95 * tf.cast(tf.size(logits_pos), tf.float32), tf.int32) # # num_neg = tf.cast(0.75 * tf.cast(tf.size(logits_neg), tf.float32), tf.int32) # q_d = tf.pow(tf.sqrt(dist_neg), 2-nrof_features)*tf.pow(1-0.25*dist_neg, (3-nrof_features)/2) # tf.add_to_collection('watch_list', ('q_d', tf.reduce_sum(q_d))) # q_d = tf.minimum(1.0, 1 * q_d / tf.reduce_sum(q_d)) # tf.add_to_collection('watch_list', ('q_d', tf.reduce_mean(q_d))) # sample_mask = tf.random_uniform(shape=tf.shape(logits_neg)) <= q_d # sample_mask = logits_neg >= tf.reduce_min(logits_pos) # _logits_neg = tf.boolean_mask(logits_neg, sample_mask) # tf.add_to_collection('watch_list', ('sample_ratio', # tf.cast(tf.size(_logits_neg),tf.float32) / tf.cast(tf.size(logits_neg),tf.float32))) # gamma2 = 1 / 0.01 _logits_pos = tf.reshape(logits_pos, [batch_size, -1]) _logits_neg = tf.reshape(logits_neg, [batch_size, -1]) norm = tf.square(tf.reduce_sum(tf.square(prelogits), axis=1, keep_dims=True)) norm_weights = tf.norm(tf.gather(weights, label), axis=1, keep_dims=True) t_pos = (beta) t_neg = (beta) _logits_pos = _logits_pos * gamma _logits_neg = _logits_neg * gamma # _logits_neg, _ = tf.nn.top_k(_logits_neg, num_neg) # _logits_pos, _ = tf.nn.top_k(_logits_pos, num_pos) # _logits_neg = tf.boolean_mask(_logits_neg, sample_mask) # _logits_pos = -tf.reduce_logsumexp(-_logits_pos)# , axis=1)[:,None] _logits_neg = tf.reduce_logsumexp(_logits_neg, axis=1)[:,None] # _logits_pos = tf.reduce_mean(_logits_pos) #-- Simulate Ranking # se_neg = tf.reduce_sum(tf.exp(_logits_neg)) # min_pos = tf.reduce_min(_logits_pos) # t_pos = tf.stop_gradient(tf.log(se_neg)) # t_neg = tf.stop_gradient(tf.log(se_neg - tf.exp(_logits_neg))) # norm = tf.reshape(prelogits[:,-1], [batch_size, -1]) # norm_weighted = tf.exp(-norm) # norm_weighted = norm / tf.reduce_sum(norm) * tf.cast(tf.size(norm), tf.float32) # sigma_batch = tf.reshape(tf.gather(sigma, label), [batch_size, -1]) m = 5.0 # tf.add_to_collection('watch_list', ('m',m)) factor = 1 / tf.cast(batch_size, tf.float32) bias = tf.log(tf.cast(num_classes, tf.float32)) loss_pos = tf.nn.relu(m + _logits_neg - _logits_pos) * 0.5 loss_neg = tf.nn.relu(m + _logits_neg - _logits_pos) * 0.5 loss = tf.reduce_mean((loss_pos + loss_neg), name='split_loss') losses.extend([loss]) tf.add_to_collection('watch_list', ('split_loss', loss)) # Global loss # weights_batch = tf.gather(weights_normed, label) # _logits_pos_glob = tf.reduce_sum(tf.square(prelogits_normed - weights_batch), axis=1) * coef * gamma _logits_pos_glob = tf.reshape(logits_pos_glob, [batch_size, -1]) * gamma _logits_neg_glob = tf.reshape(logits_neg_glob, [batch_size, -1]) * gamma _logits_neg_glob = tf.reduce_logsumexp(_logits_neg_glob) # , axis=1)[:,None] loss_glob = tf.reduce_mean(tf.nn.relu(1 + _logits_neg_glob - _logits_pos_glob), name='loss_glob') # losses.append(loss_glob) # tf.add_to_collection('watch_list', ('loss_glob', loss_glob)) # Weight decay loss_weight = tf.reduce_sum( 1e-7 * tf.square(weights_normed), name='loss_weight') # losses.append(loss_weight) # tf.add_to_collection('watch_list', ('loss_weight', loss_weight)) # Split Softmax # _logits_pos_glob = tf.reshape(logits_pos_glob, [batch_size, -1]) * gamma # _logits_neg_glob = tf.reshape(logits_neg_glob, [batch_size, -1]) * gamma # _logits_pos_glob = tf.log(tf.reduce_sum(tf.exp(_logits_pos_glob) + num_classes-1, axis=1)[:,None]) # _logits_neg_glob = tf.reduce_logsumexp(_logits_neg_glob, axis=1)[:,None] # _t_pos = t_pos * gamma # _t_neg = t_neg * gamma # loss_pos = tf.reduce_mean(tf.nn.softplus(_t_pos - _logits_pos_glob), name='loss_pos') # loss_neg = tf.reduce_mean(tf.nn.softplus(_logits_neg_glob - _t_neg), name='loss_neg') # losses.extend([loss_pos, loss_neg]) # Batch Center loss # centers_batch = tf.gather(centers, center_idx) centers_batch = tf.gather(weights_normed, label) dist_center = tf.reduce_sum(tf.square(prelogits_normed - centers_batch), axis=1) loss_center = tf.reduce_mean(1.0*dist_center, name='loss_center') # losses.append(loss_center) # tf.add_to_collection('watch_list', ('loss_center', loss_center)) # Update threshold if not threshold_pos in tf.trainable_variables(): # -- Mean threshold mean_pos, var_pos = tf.nn.moments(dist_pos, axes=[0]) mean_neg, var_neg = tf.nn.moments(dist_neg, axes=[0]) std_pos = tf.sqrt(var_pos) std_neg = tf.sqrt(var_neg) threshold_batch = std_neg*mean_pos / (std_pos+std_neg) + std_pos*mean_neg / (std_pos+std_neg) threshold_pos_batch = threshold_neg_batch = threshold_batch # -- Logits # threshold_pos_batch = tf.reduce_logsumexp(_logits_neg) # threshold_neg_batch = -tf.reduce_logsumexp(-_logits_pos) # -- Quantile # diff_pos_sorted, _ = tf.nn.top_k(logits_pos, 2) # diff_neg_sorted, _ = tf.nn.top_k(logits_neg, 2704237) # threshold_pos_batch = diff_neg_sorted[-1] # threshold_neg_batch = diff_pos_sorted[-1] threshold_neg_batch = tf.reduce_min(_logits_pos) threshold_pos_batch = tf.reduce_max(_logits_neg) # -- Update diff_threshold_pos = threshold_pos - threshold_pos_batch diff_threshold_neg = threshold_neg - threshold_neg_batch diff_threshold_pos = 0.1 * diff_threshold_pos diff_threshold_neg = 0.1 * diff_threshold_neg threshold_pos_update_op = tf.assign_sub(threshold_pos, diff_threshold_pos) threshold_neg_update_op = tf.assign_sub(threshold_neg, diff_threshold_neg) threshold_update_op = tf.group(threshold_pos_update_op, threshold_neg_update_op) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, threshold_update_op) # Update centers if not weights in tf.trainable_variables(): weights_batch = tf.gather(weights, label) diff_centers = weights_batch - prelogits unique_label, unique_idx, unique_count = tf.unique_with_counts(label) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff_centers = diff_centers / tf.cast((1 + appear_times), tf.float32) diff_centers = 0.5 * diff_centers centers_update_op = tf.scatter_sub(weights, label, diff_centers) # centers_decay_op = tf.assign_sub(weights, 2*weight_decay*weights)# weight decay centers_update_op = tf.group(centers_update_op) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, centers_update_op) # if not sigma in tf.trainable_variables(): # weights_batch = tf.gather(weights, label) # diff_centers = weights_batch - prelogits # _, var_pos = tf.nn.moments(diff_centers, axes=[0]) # sigma_batch = tf.reduce_mean(tf.sqrt(var_pos)) # diff_sigma = sigma - sigma_batch # diff_sigma = 0.01 * diff_sigma # sigma_update_op = tf.assign_sub(sigma, diff_sigma) # tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, sigma_update_op) # Analysis mean_dist_pos = tf.reduce_mean(dist_pos, name='mean_dist_pos') mean_dist_neg = tf.reduce_mean(dist_neg, name='mean_dist_neg') acc_pos = tf.reduce_mean(tf.cast(tf.greater_equal(logits_pos, t_pos), tf.float32), name='acc_pos') acc_neg = tf.reduce_mean(tf.cast(tf.less(logits_neg, t_neg), tf.float32), name='acc_neg') tf.summary.scalar('threshold_pos', threshold_pos) tf.summary.scalar('mean_dist_pos', mean_dist_pos) tf.summary.scalar('mean_dist_neg', mean_dist_neg) tf.summary.scalar('acc_pos', acc_pos) tf.summary.scalar('acc_neg', acc_neg) tf.summary.scalar('gamma', gamma) tf.summary.scalar('alpha', alpha) tf.summary.scalar('beta', beta) tf.summary.histogram('dist_pos', dist_pos) tf.summary.histogram('dist_neg', dist_neg) # tf.summary.histogram('dist_neg_min', _logits_neg / coef) # tf.summary.histogram('sigma', sigma) # tf.add_to_collection('watch_list', ('alpha', alpha)) tf.add_to_collection('watch_list', ('gamma', gamma)) tf.add_to_collection('watch_list', ('alpha', alpha)) tf.add_to_collection('watch_list', ('beta', beta)) # tf.add_to_collection('watch_list', ('t_pos', t_pos)) # tf.add_to_collection('watch_list', ('t_neg', tf.reduce_mean(t_neg))) # tf.add_to_collection('watch_list', ('dpos', mean_dist_pos)) # tf.add_to_collection('watch_list', ('dneg', mean_dist_neg)) # tf.add_to_collection('watch_list', ('loss_pos', loss_pos)) # tf.add_to_collection('watch_list', ('loss_neg', loss_neg)) # tf.add_to_collection('watch_list', ('sigma', sigma)) # tf.add_to_collection('watch_list', ('logits_pos', tf.reduce_mean(_logits_pos))) # tf.add_to_collection('watch_list', ('logits_neg', tf.reduce_mean(_logits_neg))) # tf.add_to_collection('watch_list', ('acc_pos', acc_pos)) # tf.add_to_collection('watch_list', ('acc_neg', acc_neg)) return losses
def _setup_basic_network(self, inputs, is_training=True): self._end_points = {} with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.00005), padding='valid'): end_point = 'conv1' net = slim.conv2d(inputs, 10, 3, stride=1, scope=end_point) self._end_points[end_point] = net end_point = 'pool1' net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope=end_point, padding='SAME') self._end_points[end_point] = net end_point = 'conv2' net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope=end_point) self._end_points[end_point] = net end_point = 'conv3' net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope=end_point) self._end_points[end_point] = net end_point = 'conv4_1' conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope=end_point, activation_fn=tf.nn.softmax) self._end_points[end_point] = conv4_1 end_point = 'conv4_2' bounding_box_predictions = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope=end_point, activation_fn=None) self._end_points[end_point] = bounding_box_predictions end_point = 'conv4_3' landmark_predictions = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope=end_point, activation_fn=None) self._end_points[end_point] = landmark_predictions return (conv4_1, bounding_box_predictions, landmark_predictions)
def L_O_Net(inputs, label=None, bbox_target=None, animoji_target=None, training=True): # batch_norm_params = { # 'decay': 0.995, # 'epsilon': 0.001, # 'updates_collections': None, # 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], # } # with slim.arg_scope([slim.conv2d, slim.fully_connected], # activation_fn = prelu, # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), # biases_initializer=tf.zeros_initializer(), # weights_regularizer=slim.l2_regularizer(0.0005), # normalizer_fn=slim.batch_norm, # normalizer_params=batch_norm_params # ): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print('L_O_Net network shape') print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv1', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv2', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool2', padding='valid') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv3', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[3, 3], stride=1, scope='conv4', padding='valid') print(net.get_shape()) net = slim.avg_pool2d(net, kernel_size=[2, 2], stride=1, scope='pool4', padding='valid') print(net.get_shape()) ################# ## mobilenet_v2## ################# # exp = 6 # expansion ratio # net = conv2d_block(inputs, 32, 3, 2, training, name='conv1_1') # size/2 # print(net.get_shape()) # net = res_block(net, 1, 16, 1, training, name='res2_1') # print(net.get_shape()) # net = res_block(net, exp, 24, 2, training, name='res3_1') # size/4 # net = res_block(net, exp, 24, 1, training, name='res3_2') # print(net.get_shape()) # net = res_block(net, exp, 32, 2, training, name='res4_1') # size/8 # net = res_block(net, exp, 32, 1, training, name='res4_2') # net = res_block(net, exp, 32, 1, training, name='res4_3') # print(net.get_shape()) # net = res_block(net, exp, 64, 1, training, name='res5_1') # net = res_block(net, exp, 64, 1, training, name='res5_2') # net = res_block(net, exp, 64, 1, training, name='res5_3') # net = res_block(net, exp, 64, 1, training, name='res5_4') # print(net.get_shape()) # net = res_block(net, exp, 96, 2, training, name='res6_1') # size/16 # net = res_block(net, exp, 96, 1, training, name='res6_2') # net = res_block(net, exp, 96, 1, training, name='res6_3') # print(net.get_shape()) # net = res_block(net, exp, 160, 2, training, name='res7_1') # size/32 # net = res_block(net, exp, 160, 1, training, name='res7_2') # net = res_block(net, exp, 160, 1, training, name='res7_3') # print(net.get_shape()) # net = res_block(net, exp, 320, 1, training, name='res8_1', shortcut=False) # print(net.get_shape()) # net = pwise_block(net, 1280, training, name='conv9_1') # net = global_avg(net) # print(net.get_shape()) # fc_flatten = flatten(conv_1x1(net,96, name='fc_flatten')) # print(fc_flatten.get_shape()) net = tf.transpose(net, perm=[0, 3, 1, 2]) print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope='fc1', activation_fn=prelu) print(fc1.get_shape()) cls_prob = slim.fully_connected(fc1, num_outputs=2, scope='cls_fc', activation_fn=tf.nn.softmax) print(cls_prob.get_shape()) bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope='bbox_fc', activation_fn=None) print(bbox_pred.get_shape()) # landmark_pred = slim.fully_connected(fc1,num_outputs=10,scope='landmark_fc',activation_fn=None) # print(landmark_pred.get_shape()) animoji_pred = slim.fully_connected(fc1, num_outputs=140, scope='animoji_fc', activation_fn=None) print(animoji_pred.get_shape()) if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy, recall = cal_accuracy(cls_prob, label) # landmark_loss = landmark_ohem(landmark_pred, landmark_target,label) animoji_loss = animoji_ohem(animoji_pred, animoji_target, label) print(tf.losses.get_regularization_losses()) L2_loss = tf.add_n(tf.losses.get_regularization_losses()) # return cls_loss,bbox_loss,landmark_loss,animoji_loss,L2_loss,accuracy, recall return cls_loss, bbox_loss, animoji_loss, L2_loss, accuracy, recall else: # return cls_prob,bbox_pred,landmark_pred,animoji_pred return cls_prob, bbox_pred, animoji_pred
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print('O_Net network shape') print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv1') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool1', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv2') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope='conv3') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope='conv4') print(net.get_shape()) net = tf.transpose(net, perm=[0, 3, 1, 2]) print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope='fc1', activation_fn=prelu) print(fc1.get_shape()) cls_prob = slim.fully_connected(fc1, num_outputs=2, scope='cls_fc', activation_fn=tf.nn.softmax) print(cls_prob.get_shape()) bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope='bbox_fc', activation_fn=None) print(bbox_pred.get_shape()) landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope='landmark_fc', activation_fn=None) print(landmark_pred.get_shape()) if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy, recall = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(tf.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, recall else: return cls_prob, bbox_pred, landmark_pred, None
def O_Net(self, inputs): with tf.variable_scope('ONet', reuse=None): with slim.arg_scope( [slim.conv2d], activation_fn=self.prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope="conv1") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv2") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2") print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv3") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope="conv4") # print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="conv5", activation_fn=self.prelu) print(fc1.get_shape()) # batch*2 cls_prob = slim.fully_connected(fc1, num_outputs=2, scope="conv6-1", activation_fn=tf.nn.softmax) print(cls_prob.get_shape()) # batch*4 bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope="conv6-2", activation_fn=None) print(bbox_pred.get_shape()) # batch*10 landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope="conv6-3", activation_fn=None) print(landmark_pred.get_shape()) return cls_prob, bbox_pred, landmark_pred
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1') _activation_summary(net) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') _activation_summary(net) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') _activation_summary(net) net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') _activation_summary(net) # batch*H*W*2 conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) _activation_summary(conv4_1) # bbox_pre[batch, H, W, 4] bbox_pred = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None) _activation_summary(bbox_pred) # landmark_pred[batch, H, W, 10] landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) _activation_summary(landmark_pred) if training: #batch*2 # calculate classification loss cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) #batch # cal bounding box error, squared sum error bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) #batch*10 landmark_pred = tf.squeeze(landmark_pred, [1, 2], name="landmark_pred") landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: # when test, batch_size = 1 cls_pro_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_pro_test, bbox_pred_test, landmark_pred_test
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): #define common param with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print inputs.get_shape() net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1') print net.get_shape() net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') print net.get_shape() net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') print net.get_shape() net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') print net.get_shape() #batch*H*W*2 conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) #conv4_1 = slim.conv2d(net,num_outputs=1,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.sigmoid) print conv4_1.get_shape() #batch*H*W*4 bbox_pred = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None) print bbox_pred.get_shape() #batch*H*W*10 landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) print landmark_pred.get_shape() #cls_prob_original = conv4_1 #bbox_pred_original = bbox_pred if training: #batch*2 cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) #batch bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) #batch*10 landmark_pred = tf.squeeze(landmark_pred, [1, 2], name="landmark_pred") landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy #test else: #when test,batch_size = 1 cls_pro_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_pro_test, bbox_pred_test, landmark_pred_test
def network(in_image, if_is_training): batch_norm_params = { 'is_training': if_is_training, 'zero_debias_moving_mean': True, 'decay': 0.99, 'epsilon': 0.001, 'scale': True, 'updates_collections': None } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, padding='SAME', weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.0005)): out_1 = 32 out_2 = 64 out_3 = 128 net = slim.conv2d(in_image, num_outputs=out_2, kernel_size=[5, 5], stride=1, scope='conv1') print('1_con:\t', net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1') print('1_pool:\t', net.get_shape()) net = slim.conv2d(net, num_outputs=out_2, kernel_size=[5, 5], stride=1, scope='conv2') print('2_con:\t', net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool2') print('2_pool:\t', net.get_shape()) net = slim.conv2d(net, num_outputs=out_3, kernel_size=[3, 3], stride=1, scope='conv3_1') net = slim.conv2d(net, num_outputs=out_3, kernel_size=[3, 3], stride=1, scope='conv3_2') print('3_con:\t', net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3') print('3_pool:\t', net.get_shape()) # net = tf.reshape(net,shape=[-1,2*2*128]) net = slim.flatten(net, scope='flatten') with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net = slim.fully_connected( net, 1000, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='fc_total') print('fc:\t', net.get_shape()) pre_loca = slim.fully_connected( net, 2000, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='fc2_1') pre_loca = slim.fully_connected( pre_loca, 8, activation_fn=tf.nn.sigmoid, # normalizer_fn=None, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='fc2_2') pre_loca = tf.reshape(pre_loca, shape=[-1, 4, 2]) return pre_loca
def build_network(self, images, is_training=True, scope='yolov1'): net = images with tf.variable_scope(scope): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(0.00004)): with slim.arg_scope( [slim.conv2d], weights_initializer=slim.xavier_initializer(), normalizer_fn=slim.batch_norm, activation_fn=slim.nn.leaky_relu, normalizer_params=self.bn_params): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net = slim.conv2d(net, 64, [7, 7], stride=2, padding='SAME', scope='layer1') net = slim.max_pool2d(net, [2, 2], stride=2, padding='SAME', scope='pool1') net = slim.conv2d(net, 192, [3, 3], stride=1, padding='SAME', scope='layer2') net = slim.max_pool2d(net, [2, 2], stride=2, padding='SAME', scope='pool2') net = slim.conv2d(net, 128, [1, 1], stride=1, padding='SAME', scope='layer3_1') net = slim.conv2d(net, 256, [3, 3], stride=1, padding='SAME', scope='layer3_2') net = slim.conv2d(net, 256, [1, 1], stride=1, padding='SAME', scope='layer3_3') net = slim.conv2d(net, 512, [3, 3], stride=1, padding='SAME', scope='layer3_4') net = slim.max_pool2d(net, [2, 2], stride=2, padding='SAME', scope='pool3') net = slim.conv2d(net, 256, [1, 1], stride=1, padding='SAME', scope='layer4_1') net = slim.conv2d(net, 512, [3, 3], stride=1, padding='SAME', scope='layer4_2') net = slim.conv2d(net, 256, [1, 1], stride=1, padding='SAME', scope='layer4_3') net = slim.conv2d(net, 512, [3, 3], stride=1, padding='SAME', scope='layer4_4') net = slim.conv2d(net, 256, [1, 1], stride=1, padding='SAME', scope='layer4_5') net = slim.conv2d(net, 512, [3, 3], stride=1, padding='SAME', scope='layer4_6') net = slim.conv2d(net, 256, [1, 1], stride=1, padding='SAME', scope='layer4_7') net = slim.conv2d(net, 512, [3, 3], stride=1, padding='SAME', scope='layer4_8') net = slim.conv2d(net, 512, [1, 1], stride=1, padding='SAME', scope='layer4_9') net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer4_10') net = slim.max_pool2d(net, [2, 2], stride=2, padding='SAME', scope='pool4') net = slim.conv2d(net, 512, [1, 1], stride=1, padding='SAME', scope='layer5_1') net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer5_2') net = slim.conv2d(net, 512, [1, 1], stride=1, padding='SAME', scope='layer5_3') net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer5_4') if self.pre_training: net = slim.avg_pool2d(net, [7, 7], stride=1, padding='VALID', scope='clssify_avg5') net = slim.flatten(net) net = slim.fully_connected( net, self.pre_train_num, activation_fn=slim.nn.leaky_relu, scope='classify_fc1') return net net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer5_5') net = slim.conv2d(net, 1024, [3, 3], stride=2, padding='SAME', scope='layer5_6') net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer6_1') net = slim.conv2d(net, 1024, [3, 3], stride=1, padding='SAME', scope='layer6_2') net = slim.flatten(net) net = slim.fully_connected( net, 1024, activation_fn=slim.nn.leaky_relu, scope='fc1') net = slim.dropout(net, 0.5) net = slim.fully_connected( net, 4096, activation_fn=slim.nn.leaky_relu, scope='fc2') net = slim.dropout(net, 0.5) net = slim.fully_connected(net, self.output_size, activation_fn=None, scope='fc3') # N, 7,7,30 # net = tf.reshape(net,[-1,S,S,B*5+C]) return net
def _setup_basic_network(self, inputs, is_training=True): self._end_points = {} with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.00005), padding='valid'): end_point = 'conv1' net = slim.conv2d( inputs, num_outputs=28, kernel_size=[3, 3], stride=1, scope=end_point) self._end_points[end_point] = net end_point = 'pool1' net = slim.max_pool2d( net, kernel_size=[3, 3], stride=2, scope=end_point, padding='SAME') self._end_points[end_point] = net end_point = 'conv2' net = slim.conv2d( net, num_outputs=48, kernel_size=[3, 3], stride=1, scope=end_point) self._end_points[end_point] = net end_point = 'pool2' net = slim.max_pool2d( net, kernel_size=[3, 3], stride=2, scope=end_point) self._end_points[end_point] = net end_point = 'conv3' net = slim.conv2d( net, num_outputs=64, kernel_size=[2, 2], stride=1, scope=end_point) self._end_points[end_point] = net fc_flatten = slim.flatten(net) end_point = 'fc1' fc1 = slim.fully_connected( fc_flatten, num_outputs=128, scope=end_point, activation_fn=prelu) self._end_points[end_point] = fc1 end_point = 'cls_fc' class_probability = slim.fully_connected( fc1, num_outputs=2, scope=end_point, activation_fn=tf.nn.softmax) self._end_points[end_point] = class_probability end_point = 'bbox_fc' bounding_box_predictions = slim.fully_connected( fc1, num_outputs=4, scope=end_point, activation_fn=None) self._end_points[end_point] = bounding_box_predictions end_point = 'landmark_fc' landmark_predictions = slim.fully_connected( fc1, num_outputs=10, scope=end_point, activation_fn=None) self._end_points[end_point] = landmark_predictions return (class_probability, bounding_box_predictions, landmark_predictions)
def initialize(self, config, num_classes): ''' Initialize the graph from scratch according config. ''' with self.graph.as_default(): with self.sess.as_default(): # Set up placeholders w, h = config.image_size channels = config.channels image_batch_placeholder = tf.placeholder(tf.float32, shape=[None, h, w, channels], name='image_batch') label_batch_placeholder = tf.placeholder(tf.int32, shape=[None], name='label_batch') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') global_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='global_step') image_splits = tf.split(image_batch_placeholder, config.num_gpus) label_splits = tf.split(label_batch_placeholder, config.num_gpus) grads_splits = [] split_dict = {} def insert_dict(k,v): if k in split_dict: split_dict[k].append(v) else: split_dict[k] = [v] for i in range(config.num_gpus): scope_name = '' if i==0 else 'gpu_%d' % i with tf.name_scope(scope_name): with tf.variable_scope('', reuse=i>0): with tf.device('/gpu:%d' % i): images = tf.identity(image_splits[i], name='inputs') labels = tf.identity(label_splits[i], name='labels') # Save the first channel for testing if i == 0: self.inputs = images # Build networks network = imp.load_source('network', config.network) prelogits = network.inference(images, keep_prob_placeholder, phase_train_placeholder, bottleneck_layer_size = config.embedding_size, weight_decay = config.weight_decay, model_version = config.model_version) prelogits = tf.identity(prelogits, name='prelogits') embeddings = tf.nn.l2_normalize(prelogits, dim=1, name='embeddings') if i == 0: self.outputs = tf.identity(embeddings, name='outputs') # Build all losses losses = [] # Orignal Softmax if 'softmax' in config.losses.keys(): logits = slim.fully_connected(prelogits, num_classes, weights_regularizer=slim.l2_regularizer(config.weight_decay), weights_initializer=slim.xavier_initializer(), biases_initializer=tf.constant_initializer(0.0), activation_fn=None, scope='Logits') cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits), name='cross_entropy') losses.append(cross_entropy) insert_dict('sloss', cross_entropy) # L2-Softmax if 'cosine' in config.losses.keys(): logits, cosine_loss = tflib.cosine_softmax(prelogits, labels, num_classes, weight_decay=config.weight_decay, **config.losses['cosine']) losses.append(cosine_loss) insert_dict('closs', cosine_loss) # A-Softmax if 'angular' in config.losses.keys(): angular_loss = tflib.angular_softmax(prelogits, labels, num_classes, global_step, weight_decay=config.weight_decay, **config.losses['angular']) losses.append(angular_loss) insert_dict('aloss', angular_loss) # AM-Softmax if 'am' in config.losses.keys(): am_loss = tflib.am_softmax(prelogits, labels, num_classes, global_step, weight_decay=config.weight_decay, **config.losses['am']) losses.append(am_loss) insert_dict('loss', am_loss) # Max-margin Pairwise Score (MPS) if 'pair' in config.losses.keys(): pair_loss = tflib.pair_loss(prelogits, labels, num_classes, global_step, weight_decay=config.weight_decay, **config.losses['pair']) losses.append(pair_loss) insert_dict('loss', pair_loss) # Collect all losses reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg_loss') losses.append(reg_loss) insert_dict('reg_loss', reg_loss) total_loss = tf.add_n(losses, name='total_loss') grads_split = tf.gradients(total_loss, tf.trainable_variables()) grads_splits.append(grads_split) # Merge the splits self.watchlist = {} grads = tflib.average_grads(grads_splits) for k,v in split_dict.items(): v = tflib.average_tensors(v) self.watchlist[k] = v if 'loss' in k: tf.summary.scalar('losses/' + k, v) else: tf.summary.scalar(k, v) # Training Operaters apply_gradient_op = tflib.apply_gradient(tf.trainable_variables(), grads, config.optimizer, learning_rate_placeholder, config.learning_rate_multipliers) update_global_step_op = tf.assign_add(global_step, 1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_ops = [apply_gradient_op, update_global_step_op] + update_ops train_op = tf.group(*train_ops) tf.summary.scalar('learning_rate', learning_rate_placeholder) summary_op = tf.summary.merge_all() # Initialize variables self.sess.run(tf.local_variables_initializer()) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.trainable_variables()) # Keep useful tensors self.image_batch_placeholder = image_batch_placeholder self.label_batch_placeholder = label_batch_placeholder self.learning_rate_placeholder = learning_rate_placeholder self.keep_prob_placeholder = keep_prob_placeholder self.phase_train_placeholder = phase_train_placeholder self.global_step = global_step self.train_op = train_op self.summary_op = summary_op
def discriminator(images, num_classes, bottleneck_size=512, keep_prob=1.0, phase_train=True, weight_decay=0.0, reuse=None, scope='Discriminator'): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=leaky_relu, normalizer_fn=None, normalizer_params=batch_norm_params): with tf.variable_scope(scope, [images], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=phase_train): print('{} input shape:'.format(scope), [dim.value for dim in images.shape]) net = conv(images, 32, kernel_size=4, stride=2, scope='conv1') print('module_1 shape:', [dim.value for dim in net.shape]) net = conv(net, 64, kernel_size=4, stride=2, scope='conv2') print('module_2 shape:', [dim.value for dim in net.shape]) net = conv(net, 128, kernel_size=4, stride=2, scope='conv3') print('module_3 shape:', [dim.value for dim in net.shape]) net = conv(net, 256, kernel_size=4, stride=2, scope='conv4') print('module_4 shape:', [dim.value for dim in net.shape]) net = conv(net, 512, kernel_size=4, stride=2, scope='conv5') print('module_5 shape:', [dim.value for dim in net.shape]) # Patch Discrminator patch5_logits = slim.conv2d(net, 3, 1, activation_fn=None, normalizer_fn=None, scope='patch5_logits') patch_logits = tf.reshape(patch5_logits, [-1, 3]) # Global Discriminator net = slim.flatten(net) prelogits = slim.fully_connected( net, bottleneck_size, scope='Bottleneck', weights_initializer=slim.xavier_initializer(), activation_fn=None, normalizer_fn=None) prelogits = tf.nn.l2_normalize(prelogits, dim=1) print('latent shape:', [dim.value for dim in prelogits.shape]) logits = slim.fully_connected(prelogits, num_classes, scope='Logits', activation_fn=None, normalizer_fn=None) return patch_logits, logits
def O_Net(inputs): with tf.variable_scope('O_Net'): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): # pdb.set_trace() net = slim.conv2d(inputs, num_outputs=32, kernel_size=[5, 3], stride=1, scope="oconv1") # 140, 46, 32 net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="opool1", padding='SAME') # 70, 23, 32 22 net = slim.conv2d(net, num_outputs=64, kernel_size=[5, 3], stride=1, scope="oconv2") # 66, 21, 64 net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="opool2") # 32, 10, 64 333 net = slim.conv2d(net, num_outputs=64, kernel_size=[5, 3], stride=1, scope="oconv3") # 28, 8, 64 net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="opool3", padding='SAME') # 14, 4, 64 4444 net = slim.conv2d(net, num_outputs=64, kernel_size=[5, 3], stride=1, scope="oconv4") # 10, 2, 64 net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="opool4", padding='SAME') # 5, 1, 64 555 net = slim.conv2d(net, num_outputs=128, kernel_size=[3, 1], stride=1, scope="oconv5") # 3, 1, 128 6666 fc_flatten = slim.flatten(net) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="ofc1", activation_fn=prelu) ### 777 fc2_1 = slim.fully_connected(fc1, num_outputs=2, scope="ofc2_1", activation_fn=tf.nn.softmax) fc2_2 = slim.fully_connected(fc1, num_outputs=4, scope="ofc2_2", activation_fn=None) return (fc2_1, fc2_2)
def aspp(inputs, output_stride, batch_norm_decay, is_training, depth=256): '''实现ASPP 参数: inputs:输入四维向量 output_stride:决定空洞卷积膨胀率 batch_norm_decay:同上函数 is_training:是否训练 depth:输出通道数 返回值: ASPP后的输出 ''' with tf.variable_scope('aspp'): if output_stride not in [8, 16]: raise ValueError('out_stride整错了') # 膨胀率 # atrous_rates = [6, 12, 18] with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0005)): with slim.arg_scope([slim.conv2d], weights_initializer=slim.xavier_initializer(), normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': is_training, 'decay': batch_norm_decay }): inputs_size = tf.shape(inputs)[1:3] # slim.conv2d默认激活函数为relu,padding=SAME conv_1x1 = slim.conv2d(inputs, depth, [1, 1], stride=1, scope='conv_1x1') # 空洞卷积rate不为1 conv_3x3_1 = slim.conv2d(inputs, depth, [3, 3], stride=1, rate=1, scope='conv_3x3_1') conv_3x3_2 = slim.conv2d(inputs, depth, [3, 3], stride=1, rate=2, scope='conv_3x3_2') conv_3x3_3 = slim.conv2d(inputs, depth, [3, 3], stride=1, rate=4, scope='conv_3x3_3') # pcam = PAM_Module(inputs) with tf.variable_scope('image_level_features'): # 池化 image_level_features = tf.reduce_mean( inputs, axis=[1, 2], keep_dims=True, name='global_average_pooling') image_level_features = slim.conv2d(image_level_features, depth, [1, 1], stride=1, scope='conv_1x1') # # 双线性插值 image_level_features = tf.image.resize_bilinear( image_level_features, inputs_size, name='upsample') net = tf.concat([ conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features ], axis=3, name='concat') net = slim.conv2d(net, 512, [1, 1], trainable=is_training, scope='convq') return net
def _make_graph(self): self.logger.info("Generating training graph on {} GPUs ...".format( self.cfg.nr_gpus)) weights_initializer = slim.xavier_initializer() biases_initializer = tf.constant_initializer(0.) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( self.cfg.weight_decay) tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(self.cfg.nr_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i) as name_scope: # Force all Variables to reside on the CPU. with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, weights_initializer=weights_initializer, biases_initializer=biases_initializer): # loss over single GPU self.net.make_network(is_train=True) if i == self.cfg.nr_gpus - 1: loss = self.net.get_loss(include_wd=True) else: loss = self.net.get_loss() self._input_list.append(self.net.get_inputs()) tf.get_variable_scope().reuse_variables() if i == 0: if self.cfg.nr_gpus > 1 and self.cfg.bn_train is True: self.logger.warning( "BN is calculated only on single GPU.") extra_update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS, name_scope) with tf.control_dependencies(extra_update_ops): grads = self._optimizer.compute_gradients(loss) else: grads = self._optimizer.compute_gradients(loss) final_grads = [] with tf.variable_scope('Gradient_Mult') as scope: for grad, var in grads: scale = 1. if self.cfg.double_bias and '/biases:' in var.name: scale *= 2. if not np.allclose(scale, 1.): grad = tf.multiply(grad, scale) final_grads.append((grad, var)) tower_grads.append(final_grads) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if False: variable_averages = tf.train.ExponentialMovingAverage(0.9999) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply( variables_to_average) apply_gradient_op = self._optimizer.apply_gradients(grads) train_op = tf.group(apply_gradient_op, variables_averages_op, *extra_update_ops) else: apply_gradient_op = self._optimizer.apply_gradients(grads) train_op = tf.group(apply_gradient_op, *extra_update_ops) return train_op
def L_O_Net(inputs,label=None,bbox_target=None,landmark_target=None,animoji_target=None,training=True): # batch_norm_params = { # 'decay': 0.995, # 'epsilon': 0.001, # 'updates_collections': None, # 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], # } # with slim.arg_scope([slim.conv2d, slim.fully_connected], # activation_fn = prelu, # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), # biases_initializer=tf.zeros_initializer(), # weights_regularizer=slim.l2_regularizer(0.0005), # normalizer_fn=slim.batch_norm, # normalizer_params=batch_norm_params # ): with slim.arg_scope([slim.conv2d], activation_fn = prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print('L_O_Net network shape') print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3,3], stride=1, scope='conv1', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool1', padding='SAME') print(net.get_shape()) net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope='conv2', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2', padding='valid') print(net.get_shape()) net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope='conv3', padding='valid') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME') print(net.get_shape()) net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],stride=1,scope='conv4', padding='valid') print(net.get_shape()) net = tf.transpose(net, perm=[0,3,1,2]) print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256,scope='fc1', activation_fn=prelu) print(fc1.get_shape()) cls_prob = slim.fully_connected(fc1,num_outputs=2,scope='cls_fc',activation_fn=tf.nn.softmax) print(cls_prob.get_shape()) bbox_pred = slim.fully_connected(fc1,num_outputs=4,scope='bbox_fc',activation_fn=None) print(bbox_pred.get_shape()) landmark_pred = slim.fully_connected(fc1,num_outputs=10,scope='landmark_fc',activation_fn=None) print(landmark_pred.get_shape()) animoji_pred = slim.fully_connected(fc1,num_outputs=140,scope='animoji_fc',activation_fn=None) print(animoji_pred.get_shape()) if training: cls_loss = cls_ohem(cls_prob,label) bbox_loss = bbox_ohem(bbox_pred,bbox_target,label) accuracy, recall = cal_accuracy(cls_prob,label) landmark_loss = landmark_ohem(landmark_pred, landmark_target,label) animoji_loss = animoji_ohem(animoji_pred, animoji_target,label) L2_loss = tf.add_n(tf.losses.get_regularization_losses()) return cls_loss,bbox_loss,landmark_loss,animoji_loss,L2_loss,accuracy, recall else: return cls_prob,bbox_pred,landmark_pred,animoji_pred
def angular_softmax(prelogits, label, num_classes, global_step, weight_decay, m, lamb_min, lamb_max, reuse=None): ''' Tensorflow implementation of Angular-Sofmax, proposed in: W. Liu, Y. Wen, Z. Yu, M. Li, B. Raj, and L. Song. Sphereface: Deep hypersphere embedding for face recognition. In CVPR, 2017. ''' num_features = prelogits.shape[1].value batch_size = tf.shape(prelogits)[0] lamb_min = lamb_min lamb_max = lamb_max lambda_m_theta = [ lambda x: x**0, lambda x: x**1, lambda x: 2.0 * (x**2) - 1.0, lambda x: 4.0 * (x**3) - 3.0 * x, lambda x: 8.0 * (x**4) - 8.0 * (x**2) + 1.0, lambda x: 16.0 * (x**5) - 20.0 * (x**3) + 5.0 * x ] with tf.variable_scope('AngularSoftmax', reuse=reuse): weights = tf.get_variable( 'weights', shape=(num_features, num_classes), regularizer=slim.l2_regularizer(1e-4), initializer=slim.xavier_initializer(), # initializer=tf.truncated_normal_initializer(stddev=0.1), trainable=True, dtype=tf.float32) lamb = tf.get_variable('lambda', shape=(), initializer=tf.constant_initializer(lamb_max), trainable=False, dtype=tf.float32) prelogits_norm = tf.sqrt( tf.reduce_sum(tf.square(prelogits), axis=1, keep_dims=True)) weights_normed = tf.nn.l2_normalize(weights, dim=0) prelogits_normed = tf.nn.l2_normalize(prelogits, dim=1) # Compute cosine and phi cos_theta = tf.matmul(prelogits_normed, weights_normed) cos_theta = tf.minimum(1.0, tf.maximum(-1.0, cos_theta)) theta = tf.acos(cos_theta) cos_m_theta = lambda_m_theta[m](cos_theta) k = tf.floor(m * theta / 3.14159265) phi_theta = tf.pow(-1.0, k) * cos_m_theta - 2.0 * k cos_theta = cos_theta * prelogits_norm phi_theta = phi_theta * prelogits_norm lamb_new = tf.maximum( lamb_min, lamb_max / (1.0 + 0.1 * tf.cast(global_step, tf.float32))) update_lamb = tf.assign(lamb, lamb_new) # Compute loss with tf.control_dependencies([update_lamb]): label_dense = tf.one_hot(label, num_classes, dtype=tf.float32) logits = cos_theta logits -= label_dense * cos_theta * 1.0 / (1.0 + lamb) logits += label_dense * phi_theta * 1.0 / (1.0 + lamb) cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\ labels=label, logits=logits), name='cross_entropy') return cross_entropy
# yapf: disable fc = ts.add_arg_scope(tf.layers.dense) conv1d = ts.add_arg_scope(tf.layers.conv1d) conv2d = ts.add_arg_scope(tf.layers.conv2d) sep_conv2d = ts.add_arg_scope(tf.layers.separable_conv2d) max_pooling2d = ts.add_arg_scope(tf.layers.max_pooling2d) batch_norm = ts.add_arg_scope(tf.layers.batch_normalization) # yapf: enable conv2d_activation = tf.nn.elu conv2d_params = { 'kernel_size': 3, 'strides': (1, 1), 'padding': 'SAME', 'kernel_initializer': ts.xavier_initializer(), 'use_bias': True, 'bias_initializer': tf.zeros_initializer(), } sep_conv2d_params = { 'kernel_size': 3, 'strides': (1, 1), 'dilation_rate': (1, 1), 'depth_multiplier': 1, 'padding': 'SAME', 'depthwise_initializer': ts.xavier_initializer(), 'pointwise_initializer': ts.xavier_initializer(), 'use_bias': True, 'bias_initializer': tf.zeros_initializer(), }
def mobilenet(dict_data, params): n_labels = params['n_labels'] is_training = params['is_training'] inputs = dict_data['images'] if ('width_multiplier' not in params.keys()): width_multiplier = 1.0 else: width_multiplier = params['width_multiplier'] if ('scope' not in params.keys()): scope = 'MobileNet' else: scope = params['scope'] if ('freeze_convs' not in params.keys()): freeze_convs = False else: freeze_convs = params['freeze_convs'] def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False, freeze_convs=False): num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d( inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv', trainable=not freeze_convs) bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm', trainable=not freeze_convs) pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv', trainable=not freeze_convs) bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm', trainable=not freeze_convs) return bn # with tf.variable_scope(scope) as sc: end_points_collection = '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1', trainable=not freeze_convs) net = slim.batch_norm(net, scope='conv_1/batch_norm', trainable=not freeze_convs) net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13', freeze_convs=freeze_convs) net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14', freeze_convs=freeze_convs) net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') with tf.variable_scope('block_fc1'): net = tf.layers.dense( inputs=net, units=1024, activation=tf.nn.relu, kernel_initializer=slim.xavier_initializer(), kernel_regularizer=slim.l2_regularizer(0.0001)) with tf.variable_scope('block_fc2'): net = tf.layers.dense( inputs=net, units=512, activation=tf.nn.relu, kernel_initializer=slim.xavier_initializer(), kernel_regularizer=slim.l2_regularizer(0.0001)) with tf.variable_scope('block_fc3'): net = tf.layers.dense( inputs=net, units=n_labels, activation=tf.nn.relu, kernel_initializer=slim.xavier_initializer(), kernel_regularizer=slim.l2_regularizer(0.0001)) return net
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print('P_Net network shape') print(inputs.get_shape()) net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') print(net.get_shape()) net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') print(net.get_shape()) conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) print(conv4_1.get_shape()) bbox_pred = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None) print(bbox_pred.get_shape()) landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) print(landmark_pred.get_shape()) if training: cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) landmark_pred = tf.squeeze(landmark_pred, [1, 2], name='landmark_pred') landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy, recall = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(tf.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, recall else: #when test,batch_size = 1 cls_pro_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_pro_test, bbox_pred_test, landmark_pred_test