def test_use_depthwise_convolution(self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4, use_dropout=True, use_depthwise=True ) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() resolution = 32 expected_num_anchors = resolution*resolution*5 with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, objectness_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(objectness_predictions)], feed_dict={image_features: np.random.rand(4, resolution, resolution, 64)}) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) self.assertAllEqual(objectness_predictions_shape, [4, expected_num_anchors, 1]) expected_variable_set = set([ 'BoxPredictor/Conv2d_0_1x1_32/biases', 'BoxPredictor/Conv2d_0_1x1_32/weights', 'BoxPredictor/BoxEncodingPredictor_depthwise/biases', 'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights', 'BoxPredictor/BoxEncodingPredictor/biases', 'BoxPredictor/BoxEncodingPredictor/weights', 'BoxPredictor/ClassPredictor_depthwise/biases', 'BoxPredictor/ClassPredictor_depthwise/depthwise_weights', 'BoxPredictor/ClassPredictor/biases', 'BoxPredictor/ClassPredictor/weights']) self.assertEqual(expected_variable_set, actual_variable_set)
def build_convolutional_box_predictor(is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, add_background_class=True, class_prediction_bias_init=0.0, use_depthwise=False, box_encodings_clip_range=None): box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise, box_encodings_clip_range=box_encodings_clip_range) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)
def graph_fn(image_features): conv_box_predictor = box_predictor.ConvolutionalBoxPredictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, use_dropout=True, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4 ) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[1], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, objectness_predictions)
def build_convolutional_box_predictor( is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, add_background_class=True, class_prediction_bias_init=0.0, use_depthwise=False, ): """Builds the ConvolutionalBoxPredictor from the arguments. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: number of classes. Note that num_classes *does not* include the background category, so if groundtruth labels take values in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the assigned classification targets can range from {0,... K}). conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. min_depth: Minimum feature depth prior to predicting box encodings and class predictions. max_depth: Maximum feature depth prior to predicting box encodings and class predictions. If max_depth is set to 0, no additional feature map will be inserted before location and class predictions. num_layers_before_predictor: Number of the additional conv layers before the predictor. use_dropout: Option to use dropout or not. Note that a single dropout op is applied here prior to both box and class predictions, which stands in contrast to the ConvolutionalBoxPredictor below. dropout_keep_prob: Keep probability for dropout. This is only used if use_dropout is True. kernel_size: Size of final convolution kernel. If the spatial resolution of the feature map is smaller than the kernel size, then the kernel size is automatically set to be min(feature_width, feature_height). box_code_size: Size of encoding for each box. apply_sigmoid_to_scores: If True, apply the sigmoid on the output class_predictions. add_background_class: Whether to add an implicit background class. class_prediction_bias_init: Constant value to initialize bias of the last conv2d layer before class prediction. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. Returns: A ConvolutionalBoxPredictor class. """ box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_class_slots=num_classes + 1 if add_background_class else num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)
def build(argscope_fn, box_predictor_config, is_training, num_classes): """Builds box predictor based on the configuration. Builds box predictor based on the configuration. See box_predictor.proto for configurable options. Also, see box_predictor.py for more details. Args: argscope_fn: A function that takes the following inputs: * hyperparams_pb2.Hyperparams proto * a boolean indicating if the model is in training mode. and returns a tf slim argscope for Conv and FC hyperparameters. box_predictor_config: box_predictor_pb2.BoxPredictor proto containing configuration. is_training: Whether the models is in training mode. num_classes: Number of classes to predict. Returns: box_predictor: box_predictor.BoxPredictor object. Raises: ValueError: On unknown box predictor. """ if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor): raise ValueError('box_predictor_config not of type ' 'box_predictor_pb2.BoxPredictor.') box_predictor_oneof = box_predictor_config.WhichOneof( 'box_predictor_oneof') if box_predictor_oneof == 'convolutional_box_predictor': config_box_predictor = box_predictor_config.convolutional_box_predictor conv_hyperparams_fn = argscope_fn( config_box_predictor.conv_hyperparams, is_training) box_predictor_object = ( convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, min_depth=config_box_predictor.min_depth, max_depth=config_box_predictor.max_depth, num_layers_before_predictor=( config_box_predictor.num_layers_before_predictor), use_dropout=config_box_predictor.use_dropout, dropout_keep_prob=config_box_predictor. dropout_keep_probability, kernel_size=config_box_predictor.kernel_size, box_code_size=config_box_predictor.box_code_size, apply_sigmoid_to_scores=config_box_predictor. apply_sigmoid_to_scores, class_prediction_bias_init=( config_box_predictor.class_prediction_bias_init), use_depthwise=config_box_predictor.use_depthwise)) return box_predictor_object if box_predictor_oneof == 'weight_shared_convolutional_box_predictor': config_box_predictor = ( box_predictor_config.weight_shared_convolutional_box_predictor) conv_hyperparams_fn = argscope_fn( config_box_predictor.conv_hyperparams, is_training) apply_batch_norm = config_box_predictor.conv_hyperparams.HasField( 'batch_norm') box_predictor_object = ( convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, depth=config_box_predictor.depth, num_layers_before_predictor=( config_box_predictor.num_layers_before_predictor), kernel_size=config_box_predictor.kernel_size, box_code_size=config_box_predictor.box_code_size, class_prediction_bias_init=config_box_predictor. class_prediction_bias_init, use_dropout=config_box_predictor.use_dropout, dropout_keep_prob=config_box_predictor. dropout_keep_probability, share_prediction_tower=config_box_predictor. share_prediction_tower, apply_batch_norm=apply_batch_norm)) return box_predictor_object if box_predictor_oneof == 'mask_rcnn_box_predictor': config_box_predictor = box_predictor_config.mask_rcnn_box_predictor fc_hyperparams_fn = argscope_fn(config_box_predictor.fc_hyperparams, is_training) conv_hyperparams_fn = None if config_box_predictor.HasField('conv_hyperparams'): conv_hyperparams_fn = argscope_fn( config_box_predictor.conv_hyperparams, is_training) box_prediction_head = box_head.BoxHead( is_training=is_training, num_classes=num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=config_box_predictor.use_dropout, dropout_keep_prob=config_box_predictor.dropout_keep_probability, box_code_size=config_box_predictor.box_code_size, share_box_across_classes=( config_box_predictor.share_box_across_classes)) class_prediction_head = class_head.ClassHead( is_training=is_training, num_classes=num_classes, fc_hyperparams_fn=fc_hyperparams_fn, use_dropout=config_box_predictor.use_dropout, dropout_keep_prob=config_box_predictor.dropout_keep_probability) third_stage_heads = {} if config_box_predictor.predict_instance_masks: third_stage_heads[ mask_rcnn_box_predictor.MASK_PREDICTIONS] = mask_head.MaskHead( num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, mask_height=config_box_predictor.mask_height, mask_width=config_box_predictor.mask_width, mask_prediction_num_conv_layers=( config_box_predictor.mask_prediction_num_conv_layers), mask_prediction_conv_depth=( config_box_predictor.mask_prediction_conv_depth), masks_are_class_agnostic=( config_box_predictor.masks_are_class_agnostic)) box_predictor_object = mask_rcnn_box_predictor.MaskRCNNBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, third_stage_heads=third_stage_heads) return box_predictor_object if box_predictor_oneof == 'rfcn_box_predictor': config_box_predictor = box_predictor_config.rfcn_box_predictor conv_hyperparams_fn = argscope_fn( config_box_predictor.conv_hyperparams, is_training) box_predictor_object = rfcn_box_predictor.RfcnBoxPredictor( is_training=is_training, num_classes=num_classes, conv_hyperparams_fn=conv_hyperparams_fn, crop_size=[ config_box_predictor.crop_height, config_box_predictor.crop_width ], num_spatial_bins=[ config_box_predictor.num_spatial_bins_height, config_box_predictor.num_spatial_bins_width ], depth=config_box_predictor.depth, box_code_size=config_box_predictor.box_code_size) return box_predictor_object raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
def build_convolutional_box_predictor(is_training, num_classes, conv_hyperparams_fn, min_depth, max_depth, num_layers_before_predictor, use_dropout, dropout_keep_prob, kernel_size, box_code_size, apply_sigmoid_to_scores=False, class_prediction_bias_init=0.0, use_depthwise=False, predict_instance_masks=False, mask_height=7, mask_width=7, masks_are_class_agnostic=False): """Builds the ConvolutionalBoxPredictor from the arguments. Args: is_training: Indicates whether the BoxPredictor is in training mode. num_classes: Number of classes. conv_hyperparams_fn: A function to generate tf-slim arg_scope with hyperparameters for convolution ops. min_depth: Minimum feature depth prior to predicting box encodings and class predictions. max_depth: Maximum feature depth prior to predicting box encodings and class predictions. If max_depth is set to 0, no additional feature map will be inserted before location and class predictions. num_layers_before_predictor: Number of the additional conv layers before the predictor. use_dropout: Option to use dropout or not. Note that a single dropout op is applied here prior to both box and class predictions, which stands in contrast to the ConvolutionalBoxPredictor below. dropout_keep_prob: Keep probability for dropout. This is only used if use_dropout is True. kernel_size: Size of final convolution kernel. If the spatial resolution of the feature map is smaller than the kernel size, then the kernel size is automatically set to be min(feature_width, feature_height). box_code_size: Size of encoding for each box. apply_sigmoid_to_scores: if True, apply the sigmoid on the output class_predictions. class_prediction_bias_init: constant value to initialize bias of the last conv2d layer before class prediction. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. predict_instance_masks: If True, will add a third stage mask prediction to the returned class. mask_height: Desired output mask height. The default value is 7. mask_width: Desired output mask width. The default value is 7. masks_are_class_agnostic: Boolean determining if the mask-head is class-agnostic or not. Returns: A ConvolutionalBoxPredictor class. """ box_prediction_head = box_head.ConvolutionalBoxHead( is_training=is_training, box_code_size=box_code_size, kernel_size=kernel_size, use_depthwise=use_depthwise) class_prediction_head = class_head.ConvolutionalClassHead( is_training=is_training, num_classes=num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, apply_sigmoid_to_scores=apply_sigmoid_to_scores, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise) other_heads = {} if predict_instance_masks: other_heads[convolutional_box_predictor.MASK_PREDICTIONS] = ( mask_head.ConvolutionalMaskHead( is_training=is_training, num_classes=num_classes, use_dropout=use_dropout, dropout_keep_prob=dropout_keep_prob, kernel_size=kernel_size, use_depthwise=use_depthwise, mask_height=mask_height, mask_width=mask_width, masks_are_class_agnostic=masks_are_class_agnostic)) return convolutional_box_predictor.ConvolutionalBoxPredictor( is_training=is_training, num_classes=num_classes, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=conv_hyperparams_fn, num_layers_before_predictor=num_layers_before_predictor, min_depth=min_depth, max_depth=max_depth)