def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'): net = resnet_utils.conv2d_same( image, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d( net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1( net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1( net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1( net_conv4, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d( net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d( rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d( rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr( height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores = proposal_opr( rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=False) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): conv_new_1 = slim.conv2d( net_conv5, 1024, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="conv_new_1") rfcn_cls = slim.conv2d( conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_cls") rfcn_bbox = slim.conv2d( conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_bbox") [psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) [psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'): net = resnet_utils.conv2d_same(image, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(net_conv4, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d(rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d(rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr(height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores = proposal_opr(rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=False) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): conv_new_1 = slim.conv2d(net_conv5, 1024, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="conv_new_1") rfcn_cls = slim.conv2d(conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_cls") rfcn_bbox = slim.conv2d(conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_bbox") [psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool(rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) [psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool(rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss