def test_return_batch_norm_params_with_notrain_when_train_is_false(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } batch_norm { decay: 0.7 center: false scale: true epsilon: 0.03 train: false } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) batch_norm_params = scope[_get_scope_key(slim.batch_norm)] self.assertAlmostEqual(batch_norm_params['decay'], 0.7) self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) self.assertFalse(batch_norm_params['center']) self.assertTrue(batch_norm_params['scale']) self.assertFalse(batch_norm_params['is_training'])
def _build_ssd_feature_extractor(feature_extractor_config, is_training, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. reuse_weights: if the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple batch_norm_trainable = feature_extractor_config.batch_norm_trainable use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] return feature_extractor_class(is_training, depth_multiplier, min_depth, pad_to_multiple, conv_hyperparams, batch_norm_trainable, reuse_weights, use_explicit_padding, use_depthwise)
def test_return_non_default_batch_norm_params_with_train_during_train(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } batch_norm { decay: 0.7 center: false scale: true epsilon: 0.03 train: true } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm) batch_norm_params = conv_scope_arguments['normalizer_params'] self.assertAlmostEqual(batch_norm_params['decay'], 0.7) self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03) self.assertFalse(batch_norm_params['center']) self.assertTrue(batch_norm_params['scale']) self.assertTrue(batch_norm_params['is_training'])
def _build_ssd_feature_extractor(feature_extractor_config, is_training, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. reuse_weights: if the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] kwargs = { 'is_training': is_training, 'depth_multiplier': depth_multiplier, 'min_depth': min_depth, 'pad_to_multiple': pad_to_multiple, 'conv_hyperparams_fn': conv_hyperparams, 'reuse_weights': reuse_weights, 'use_explicit_padding': use_explicit_padding, 'use_depthwise': use_depthwise, 'override_base_feature_extractor_hyperparams': override_base_feature_extractor_hyperparams } if feature_extractor_config.HasField('fpn'): kwargs.update({ 'fpn_min_level': feature_extractor_config.fpn.min_level, 'fpn_max_level': feature_extractor_config.fpn.max_level, }) return feature_extractor_class(**kwargs)
def _build_arg_scope_with_conv_hyperparams(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_default_arg_scope_has_conv2d_transpose_op(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
def test_explicit_fc_op_arg_scope_has_fully_connected_op(self): conv_hyperparams_text_proto = """ op: FC regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
def _build_arg_scope_with_hyperparams( self, op_type=hyperparams_pb2.Hyperparams.CONV): hyperparams = hyperparams_pb2.Hyperparams() hyperparams_text_proto = """ activation: NONE regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } """ text_format.Merge(hyperparams_text_proto, hyperparams) hyperparams.op = op_type return hyperparams_builder.build(hyperparams, is_training=True)
def test_use_relu_6_activation(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU_6 """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def test_do_not_use_batch_norm_if_default(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] self.assertEqual(conv_scope_arguments['normalizer_fn'], None) self.assertEqual(conv_scope_arguments['normalizer_params'], None)
def _build_conv_arg_scope_no_batch_norm(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ activation: RELU_6 regularizer { l2_regularizer { } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) kwargs_1, kwargs_2, kwargs_3 = scope.values() self.assertDictEqual(kwargs_1, kwargs_2) self.assertDictEqual(kwargs_1, kwargs_3)
def test_use_relu_6_activation(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU_6 """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() kwargs_1, kwargs_2, kwargs_3 = scope.values() self.assertDictEqual(kwargs_1, kwargs_2) self.assertDictEqual(kwargs_1, kwargs_3)
def test_use_relu_activation(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } activation: RELU """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, reuse_weights=None, inplace_batchnorm_update=False): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. reuse_weights: if the feature extractor should reuse weights. inplace_batchnorm_update: Whether to update batch_norm inplace during training. This is required for batch norm to work correctly on TPUs. When this is false, user must add a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch norm moving average parameters. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple batch_norm_trainable = feature_extractor_config.batch_norm_trainable use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError( 'Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] return feature_extractor_class(is_training, depth_multiplier, min_depth, pad_to_multiple, conv_hyperparams, batch_norm_trainable, reuse_weights, use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def test_variance_in_range_with_truncated_normal_initializer(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.8 } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] initializer = conv_scope_arguments['weights_initializer'] self._assert_variance_in_range(initializer, shape=[100, 40], variance=0.49, tol=1e-1)
def _build_arg_scope_with_conv_hyperparams(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ activation: RELU_6 regularizer { l2_regularizer { } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } batch_norm { train: true, } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_variance_in_range_with_variance_scaling_initializer_uniform(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { variance_scaling_initializer { factor: 2.0 mode: FAN_IN uniform: true } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] initializer = conv_scope_arguments['weights_initializer'] self._assert_variance_in_range(initializer, shape=[100, 40], variance=2. / 100.)
def test_return_l1_regularized_weights(self): conv_hyperparams_text_proto = """ regularizer { l1_regularizer { weight: 0.5 } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) conv_scope_arguments = scope.values()[0] regularizer = conv_scope_arguments['weights_regularizer'] weights = np.array([1., -1, 4., 2.]) with self.test_session() as sess: result = sess.run(regularizer(tf.constant(weights))) self.assertAllClose(np.abs(weights).sum() * 0.5, result)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, reuse_weights=None, inplace_batchnorm_update=False): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. reuse_weights: if the feature extractor should reuse weights. inplace_batchnorm_update: Whether to update batch_norm inplace during training. This is required for batch norm to work correctly on TPUs. When this is false, user must add a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch norm moving average parameters. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple batch_norm_trainable = feature_extractor_config.batch_norm_trainable use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] return feature_extractor_class(is_training, depth_multiplier, min_depth, pad_to_multiple, conv_hyperparams, batch_norm_trainable, reuse_weights, use_explicit_padding, use_depthwise, inplace_batchnorm_update)
def test_variance_in_range_with_random_normal_initializer(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { random_normal_initializer { mean: 0.0 stddev: 0.8 } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] initializer = conv_scope_arguments['weights_initializer'] self._assert_variance_in_range(initializer, shape=[100, 40], variance=0.64, tol=1e-1)
def _build_lstm_feature_extractor(feature_extractor_config, is_training, lstm_state_depth, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. lstm_state_depth: An integer of the depth of the lstm state. reuse_weights: If the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError( 'Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] return feature_extractor_class( is_training, depth_multiplier, min_depth, pad_to_multiple, conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise, override_base_feature_extractor_hyperparams, lstm_state_depth)
def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { } } initializer { variance_scaling_initializer { factor: 2.0 mode: FAN_AVG uniform: false } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] initializer = conv_scope_arguments['weights_initializer'] self._assert_variance_in_range(initializer, shape=[100, 40], variance=4. / (100. + 40.))
def test_return_l2_regularizer_weights(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { weight: 0.42 } } initializer { truncated_normal_initializer { } } """ conv_hyperparams_proto = hyperparams_pb2.Hyperparams() text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto) scope_fn = hyperparams_builder.build(conv_hyperparams_proto, is_training=True) scope = scope_fn() conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] regularizer = conv_scope_arguments['weights_regularizer'] weights = np.array([1., -1, 4., 2.]) with self.test_session() as sess: result = sess.run(regularizer(tf.constant(weights))) self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. kwargs: key-value 'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn' and 'without_rpn' which need some boxes replacing the proposal generated by rpn 'filter_fn_arg' is the args of filter fn which need the boxes to filter the proposals. 'replace_rpn_arg' is a dictionary. only if the rpn_type=='without_rpn' and not None, it's useful in order to replace the proposals generated by rpn with the gt which maybe adjusted. 'type': a string which is 'gt' or 'others'. 'scale': a float which is used to scale the boxes(maybe gt). Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = ( ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } filter_fn_arg = kwargs.get('filter_fn_arg') if filter_fn_arg: filter_fn = functools.partial(filter_bbox, **filter_fn_arg) common_kwargs['filter_fn'] = filter_fn rpn_type = kwargs.get('rpn_type') if rpn_type: common_kwargs['rpn_type'] = rpn_type replace_rpn_arg = kwargs.get('replace_rpn_arg') if replace_rpn_arg: common_kwargs['replace_rpn_arg'] = replace_rpn_arg if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_rcnn_attention_model(rcnna_config, is_training, is_calibration): """Builds a R-CNN attention model based on the model config. Args: rcnna_config: A rcnn_attention.proto object containing the config for the desired RCNNAttention model. is_training: True if this model is being built for training purposes. Returns: RCNNAttentionMetaArch based on the config. Raises: ValueError: If rcnna_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = rcnna_config.num_classes k_shot = rcnna_config.k_shot image_resizer_fn = image_resizer_builder.build(rcnna_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( rcnna_config.feature_extractor, is_training) first_stage_only = rcnna_config.first_stage_only first_stage_anchor_generator = anchor_generator_builder.build( rcnna_config.first_stage_anchor_generator) first_stage_atrous_rate = rcnna_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( rcnna_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( rcnna_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = rcnna_config.first_stage_box_predictor_depth first_stage_minibatch_size = rcnna_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( rcnna_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = rcnna_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = rcnna_config.first_stage_nms_iou_threshold first_stage_max_proposals = rcnna_config.first_stage_max_proposals first_stage_loc_loss_weight = ( rcnna_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = rcnna_config.first_stage_objectness_loss_weight initial_crop_size = rcnna_config.initial_crop_size maxpool_kernel_size = rcnna_config.maxpool_kernel_size maxpool_stride = rcnna_config.maxpool_stride second_stage_box_predictor = build_box_predictor( hyperparams_builder.build, rcnna_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = rcnna_config.second_stage_batch_size second_stage_balance_fraction = rcnna_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( rcnna_config.second_stage_post_processing) second_stage_localization_loss_weight = ( rcnna_config.second_stage_localization_loss_weight) second_stage_classification_loss_weight = ( rcnna_config.second_stage_classification_loss_weight) hard_example_miner = None if rcnna_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( rcnna_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) attention_tree = None if rcnna_config.HasField('attention_tree'): attention_tree = attention_tree_builder.build( hyperparams_builder.build, rcnna_config.attention_tree, rcnna_config.k_shot, num_classes, rcnna_config.num_negative_bags, is_training, is_calibration) second_stage_convline = None if rcnna_config.HasField('second_stage_convline'): second_stage_convline = convline_builder.build( hyperparams_builder.build, None, rcnna_config.second_stage_convline, is_training) common_kwargs = { 'is_training': is_training, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'initial_crop_size': initial_crop_size, 'maxpool_kernel_size': maxpool_kernel_size, 'maxpool_stride': maxpool_stride, 'second_stage_mask_rcnn_box_predictor': second_stage_box_predictor, 'num_classes': num_classes } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): raise ValueError('RFCNBoxPredictor is not supported.') elif rcnna_config.build_faster_rcnn_arch: model = faster_rcnn_meta_arch.FasterRCNNMetaArch(**common_kwargs) model._k_shot = k_shot model._tree_debug_tensors = lambda: {} return model else: return rcnn_attention_meta_arch.RCNNAttentionMetaArch( k_shot=k_shot, attention_tree=attention_tree, second_stage_convline=second_stage_convline, attention_tree_only=rcnna_config.attention_tree_only, add_gt_boxes_to_rpn=rcnna_config.add_gt_boxes_to_rpn, **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, mtl=None): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor_kwargs = {} feature_extractor_kwargs[ 'freeze_layer'] = frcnn_config.feature_extractor.freeze_layer feature_extractor_kwargs[ 'batch_norm_trainable'] = frcnn_config.feature_extractor.batch_norm_trainable if frcnn_config.feature_extractor.HasField('weight_decay'): feature_extractor_kwargs['weight_decay'] = \ frcnn_config.feature_extractor.weight_decay feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training and frcnn_config.feature_extractor.trainable, reuse_weights=tf.AUTO_REUSE, **feature_extractor_kwargs) first_stage_only = frcnn_config.first_stage_only first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_clip_window = frcnn_config.first_stage_clip_window first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_trainable = \ frcnn_config.first_stage_box_predictor_trainable first_stage_box_predictor_arg_scope = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training and frcnn_config.second_stage_box_predictor.trainable, num_classes=num_classes, reuse_weights=tf.AUTO_REUSE) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) if mtl.window: window_box_predictor = box_predictor_builder.build( hyperparams_builder.build, mtl.window_box_predictor, is_training=is_training and mtl.window_box_predictor.trainable, num_classes=num_classes + 1, reuse_weights=tf.AUTO_REUSE) else: window_box_predictor = second_stage_box_predictor if mtl.closeness: closeness_box_predictor = box_predictor_builder.build( hyperparams_builder.build, mtl.closeness_box_predictor, is_training=is_training and mtl.closeness_box_predictor.trainable, num_classes=num_classes + 1, reuse_weights=tf.AUTO_REUSE) else: closeness_box_predictor = second_stage_box_predictor if mtl.edgemask: edgemask_predictor = mask_predictor_builder.build( hyperparams_builder.build, mtl.edgemask_predictor, is_training=is_training and mtl.edgemask_predictor.trainable, num_classes=2, reuse_weights=tf.AUTO_REUSE, channels=1) else: edgemask_predictor = None mtl_refiner_arg_scope = None if mtl.refine: mtl_refiner_arg_scope = hyperparams_builder.build( mtl.refiner_fc_hyperparams, is_training) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_clip_window': first_stage_clip_window, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_trainable': first_stage_box_predictor_trainable, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'mtl': mtl, 'mtl_refiner_arg_scope': mtl_refiner_arg_scope, 'window_box_predictor': window_box_predictor, 'closeness_box_predictor': closeness_box_predictor, 'edgemask_predictor': edgemask_predictor } if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, **common_kwargs)
def _build_arg_scope_with_hyperparams(self, hyperparams_text_proto, is_training): hyperparams = hyperparams_pb2.Hyperparams() text_format.Merge(hyperparams_text_proto, hyperparams) return hyperparams_builder.build(hyperparams, is_training=is_training)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, freeze_batchnorm, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. freeze_batchnorm: Whether to freeze batch norm parameters during training or not. When training with a small batch size (e.g. 1), it is desirable to freeze batch norm update and use pretrained batch norm params. reuse_weights: if the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise if is_keras_extractor: conv_hyperparams = hyperparams_builder.KerasLayerHyperparams( feature_extractor_config.conv_hyperparams) else: conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and ( not is_keras_extractor): raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) if is_keras_extractor: feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ feature_type] else: feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] kwargs = { 'is_training': is_training, 'depth_multiplier': depth_multiplier, 'min_depth': min_depth, 'pad_to_multiple': pad_to_multiple, 'use_explicit_padding': use_explicit_padding, 'use_depthwise': use_depthwise, 'override_base_feature_extractor_hyperparams': override_base_feature_extractor_hyperparams } if is_keras_extractor: kwargs.update({ 'conv_hyperparams': conv_hyperparams, 'inplace_batchnorm_update': False, 'freeze_batchnorm': freeze_batchnorm }) else: kwargs.update({ 'conv_hyperparams_fn': conv_hyperparams, 'reuse_weights': reuse_weights, }) if feature_extractor_config.HasField('fpn'): kwargs.update({ 'fpn_min_level': feature_extractor_config.fpn.min_level, 'fpn_max_level': feature_extractor_config.fpn.max_level, 'additional_layer_depth': feature_extractor_config.fpn.additional_layer_depth, }) return feature_extractor_class(**kwargs)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, freeze_batchnorm, reuse_weights=None): feature_type = feature_extractor_config.type is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise if is_keras_extractor: conv_hyperparams = hyperparams_builder.KerasLayerHyperparams( feature_extractor_config.conv_hyperparams) else: conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and ( not is_keras_extractor): raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) if is_keras_extractor: feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ feature_type] else: feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] kwargs = { 'is_training': is_training, 'depth_multiplier': depth_multiplier, 'min_depth': min_depth, 'pad_to_multiple': pad_to_multiple, 'use_explicit_padding': use_explicit_padding, 'use_depthwise': use_depthwise, 'override_base_feature_extractor_hyperparams': override_base_feature_extractor_hyperparams } if feature_extractor_config.HasField('replace_preprocessor_with_placeholder'): kwargs.update({ 'replace_preprocessor_with_placeholder': feature_extractor_config.replace_preprocessor_with_placeholder }) if feature_extractor_config.HasField('num_layers'): kwargs.update({'num_layers': feature_extractor_config.num_layers}) if is_keras_extractor: kwargs.update({ 'conv_hyperparams': conv_hyperparams, 'inplace_batchnorm_update': False, 'freeze_batchnorm': freeze_batchnorm }) else: kwargs.update({ 'conv_hyperparams_fn': conv_hyperparams, 'reuse_weights': reuse_weights, }) if feature_extractor_config.HasField('fpn'): kwargs.update({ 'fpn_min_level': feature_extractor_config.fpn.min_level, 'fpn_max_level': feature_extractor_config.fpn.max_level, 'additional_layer_depth': feature_extractor_config.fpn.additional_layer_depth, }) return feature_extractor_class(**kwargs)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, freeze_batchnorm, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. freeze_batchnorm: Whether to freeze batch norm parameters during training or not. When training with a small batch size (e.g. 1), it is desirable to freeze batch norm update and use pretrained batch norm params. reuse_weights: if the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise if is_keras_extractor: conv_hyperparams = hyperparams_builder.KerasLayerHyperparams( feature_extractor_config.conv_hyperparams) else: conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and ( not is_keras_extractor): raise ValueError( 'Unknown ssd feature_extractor: {}'.format(feature_type)) if is_keras_extractor: feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ feature_type] else: feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] kwargs = { 'is_training': is_training, 'depth_multiplier': depth_multiplier, 'min_depth': min_depth, 'pad_to_multiple': pad_to_multiple, 'use_explicit_padding': use_explicit_padding, 'use_depthwise': use_depthwise, 'override_base_feature_extractor_hyperparams': override_base_feature_extractor_hyperparams } if feature_extractor_config.HasField( 'replace_preprocessor_with_placeholder'): kwargs.update({ 'replace_preprocessor_with_placeholder': feature_extractor_config.replace_preprocessor_with_placeholder }) if feature_extractor_config.HasField('num_layers'): kwargs.update({'num_layers': feature_extractor_config.num_layers}) if is_keras_extractor: kwargs.update({ 'conv_hyperparams': conv_hyperparams, 'inplace_batchnorm_update': False, 'freeze_batchnorm': freeze_batchnorm }) else: kwargs.update({ 'conv_hyperparams_fn': conv_hyperparams, 'reuse_weights': reuse_weights, }) if feature_extractor_config.HasField('fpn'): kwargs.update({ 'fpn_min_level': feature_extractor_config.fpn.min_level, 'fpn_max_level': feature_extractor_config.fpn.max_level, 'additional_layer_depth': feature_extractor_config.fpn.additional_layer_depth, }) return feature_extractor_class(**kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( frcnn_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries} if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, meta_architecture='faster_rcnn'): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size # TODO(bhattad): When eval is supported using static shapes, add separate # use_static_shapes_for_trainig and use_static_shapes_for_evaluation. use_static_shapes = frcnn_config.use_static_shapes and is_training first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_proposals_path = frcnn_config.first_stage_proposals_path if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes and is_training) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher, iou_threshold=frcnn_config.second_stage_target_iou_threshold) second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=frcnn_config.use_static_balanced_label_sampler and is_training) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks } if isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) elif meta_architecture == 'faster_rcnn': return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_override_RPN': return faster_rcnn_meta_arch_override_RPN.FasterRCNNMetaArchOverrideRPN( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs) elif meta_architecture == 'faster_rcnn_rpn_blend': common_kwargs['use_matmul_crop_and_resize'] = False common_kwargs[ 'first_stage_nms_iou_threshold'] = frcnn_config.first_stage_nms_iou_threshold common_kwargs[ 'first_stage_nms_score_threshold'] = frcnn_config.first_stage_nms_score_threshold common_kwargs.pop('crop_and_resize_fn') common_kwargs.pop('first_stage_non_max_suppression_fn') common_kwargs.pop('resize_masks') common_kwargs.pop('use_static_shapes') return faster_rcnn_meta_arch_rpn_blend.FasterRCNNMetaArchRPNBlend( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, first_stage_proposals_path=first_stage_proposals_path, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_ssd_feature_extractor(feature_extractor_config, is_training, reuse_weights=None): """Builds a ssd_meta_arch.SSDFeatureExtractor based on config. Args: feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto. is_training: True if this feature extractor is being built for training. reuse_weights: if the feature extractor should reuse weights. Returns: ssd_meta_arch.SSDFeatureExtractor based on config. Raises: ValueError: On invalid feature extractor type. """ feature_type = feature_extractor_config.type depth_multiplier = feature_extractor_config.depth_multiplier min_depth = feature_extractor_config.min_depth pad_to_multiple = feature_extractor_config.pad_to_multiple use_explicit_padding = feature_extractor_config.use_explicit_padding use_depthwise = feature_extractor_config.use_depthwise conv_hyperparams = hyperparams_builder.build( feature_extractor_config.conv_hyperparams, is_training) override_base_feature_extractor_hyperparams = ( feature_extractor_config.override_base_feature_extractor_hyperparams) if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP: raise ValueError( 'Unknown ssd feature_extractor: {}'.format(feature_type)) feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] kwargs = { 'is_training': is_training, 'depth_multiplier': depth_multiplier, 'min_depth': min_depth, 'pad_to_multiple': pad_to_multiple, 'conv_hyperparams_fn': conv_hyperparams, 'reuse_weights': reuse_weights, 'use_explicit_padding': use_explicit_padding, 'use_depthwise': use_depthwise, 'override_base_feature_extractor_hyperparams': override_base_feature_extractor_hyperparams } if feature_extractor_config.HasField('fpn'): kwargs.update({ 'fpn_min_level': feature_extractor_config.fpn.min_level, 'fpn_max_level': feature_extractor_config.fpn.max_level, 'additional_layer_depth': feature_extractor_config.fpn.additional_layer_depth, }) return feature_extractor_class(**kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): """Builds a Faster R-CNN or R-FCN detection model based on the model config. Builds R-FCN model if the second_stage_box_predictor in the config is of type `rfcn_box_predictor` else builds a Faster R-CNN model. Args: frcnn_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: FasterRCNNMetaArch based on the config. Raises: ValueError: If frcnn_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = frcnn_config.num_classes image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer) is_keras = (frcnn_config.feature_extractor.type in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP) if is_keras: feature_extractor = _build_faster_rcnn_keras_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) else: feature_extractor = _build_faster_rcnn_feature_extractor( frcnn_config.feature_extractor, is_training, inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update) number_of_stages = frcnn_config.number_of_stages first_stage_anchor_generator = anchor_generator_builder.build( frcnn_config.first_stage_anchor_generator) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate if is_keras: first_stage_box_predictor_arg_scope_fn = ( hyperparams_builder.KerasLayerHyperparams( frcnn_config.first_stage_box_predictor_conv_hyperparams)) else: first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( frcnn_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size use_static_shapes = frcnn_config.use_static_shapes and ( frcnn_config.use_static_shapes_for_eval or is_training) first_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.first_stage_positive_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) first_stage_max_proposals = frcnn_config.first_stage_max_proposals if (frcnn_config.first_stage_nms_iou_threshold < 0 or frcnn_config.first_stage_nms_iou_threshold > 1.0): raise ValueError('iou_threshold not in [0, 1.0].') if (is_training and frcnn_config.second_stage_batch_size > first_stage_max_proposals): raise ValueError('second_stage_batch_size should be no greater than ' 'first_stage_max_proposals.') first_stage_non_max_suppression_fn = functools.partial( post_processing.batch_multiclass_non_max_suppression, score_thresh=frcnn_config.first_stage_nms_score_threshold, iou_thresh=frcnn_config.first_stage_nms_iou_threshold, max_size_per_class=frcnn_config.first_stage_max_proposals, max_total_size=frcnn_config.first_stage_max_proposals, use_static_shapes=use_static_shapes, use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage, use_combined_nms=frcnn_config.use_combined_nms_in_first_stage) first_stage_loc_loss_weight = ( frcnn_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight initial_crop_size = frcnn_config.initial_crop_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_stride = frcnn_config.maxpool_stride second_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'detection', use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher) if is_keras: second_stage_box_predictor = box_predictor_builder.build_keras( hyperparams_builder.KerasLayerHyperparams, freeze_batchnorm=False, inplace_batchnorm_update=False, num_predictions_per_location_list=[1], box_predictor_config=frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) else: second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, frcnn_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_sampler = sampler.BalancedPositiveNegativeSampler( positive_fraction=frcnn_config.second_stage_balance_fraction, is_static=(frcnn_config.use_static_balanced_label_sampler and use_static_shapes)) (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn) = post_processing_builder.build( frcnn_config.second_stage_post_processing) second_stage_localization_loss_weight = ( frcnn_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( frcnn_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( frcnn_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( frcnn_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if frcnn_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( frcnn_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) crop_and_resize_fn = (ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize else ops.native_crop_and_resize) clip_anchors_to_image = (frcnn_config.clip_anchors_to_image) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'number_of_stages': number_of_stages, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_target_assigner': first_stage_target_assigner, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope_fn': first_stage_box_predictor_arg_scope_fn, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_sampler': first_stage_sampler, 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_target_assigner': second_stage_target_assigner, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_sampler': second_stage_sampler, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner, 'add_summaries': add_summaries, 'crop_and_resize_fn': crop_and_resize_fn, 'clip_anchors_to_image': clip_anchors_to_image, 'use_static_shapes': use_static_shapes, 'resize_masks': frcnn_config.resize_masks, 'return_raw_detections_during_predict': (frcnn_config.return_raw_detections_during_predict) } if (isinstance(second_stage_box_predictor, rfcn_box_predictor.RfcnBoxPredictor) or isinstance(second_stage_box_predictor, rfcn_keras_box_predictor.RfcnKerasBoxPredictor)): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return faster_rcnn_meta_arch.FasterRCNNMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_mask_rcnn_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)
def _build_sin_model(sin_config, is_training): """Builds a SIN detection model based on the model config. Args: sin_config: A faster_rcnn.proto object containing the config for the desired FasterRCNNMetaArch or RFCNMetaArch. is_training: True if this model is being built for training purposes. Returns: SINMetaArch based on the config. Raises: ValueError: If sin_config.type is not recognized (i.e. not registered in model_class_map). """ num_classes = sin_config.num_classes image_resizer_fn = image_resizer_builder.build(sin_config.image_resizer) feature_extractor = _build_faster_rcnn_feature_extractor( sin_config.feature_extractor, is_training, reuse_weights=tf.AUTO_REUSE) fc_hyperparams = hyperparams_builder.build( sin_config.second_stage_box_predictor.sin_box_predictor.fc_hyperparams, is_training) first_stage_only = sin_config.first_stage_only first_stage_anchor_generator = anchor_generator_builder.build( sin_config.first_stage_anchor_generator) first_stage_atrous_rate = sin_config.first_stage_atrous_rate first_stage_box_predictor_arg_scope = hyperparams_builder.build( sin_config.first_stage_box_predictor_conv_hyperparams, is_training) first_stage_box_predictor_kernel_size = ( sin_config.first_stage_box_predictor_kernel_size) first_stage_box_predictor_depth = sin_config.first_stage_box_predictor_depth first_stage_minibatch_size = sin_config.first_stage_minibatch_size first_stage_positive_balance_fraction = ( sin_config.first_stage_positive_balance_fraction) first_stage_nms_score_threshold = sin_config.first_stage_nms_score_threshold first_stage_nms_iou_threshold = sin_config.first_stage_nms_iou_threshold first_stage_max_proposals = sin_config.first_stage_max_proposals first_stage_loc_loss_weight = ( sin_config.first_stage_localization_loss_weight) first_stage_obj_loss_weight = sin_config.first_stage_objectness_loss_weight initial_crop_size = sin_config.initial_crop_size maxpool_kernel_size = sin_config.maxpool_kernel_size maxpool_stride = sin_config.maxpool_stride second_stage_box_predictor = box_predictor_builder.build( hyperparams_builder.build, sin_config.second_stage_box_predictor, is_training=is_training, num_classes=num_classes) second_stage_batch_size = sin_config.second_stage_batch_size second_stage_balance_fraction = sin_config.second_stage_balance_fraction (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn ) = post_processing_builder.build(sin_config.second_stage_post_processing) second_stage_localization_loss_weight = ( sin_config.second_stage_localization_loss_weight) second_stage_classification_loss = ( losses_builder.build_faster_rcnn_classification_loss( sin_config.second_stage_classification_loss)) second_stage_classification_loss_weight = ( sin_config.second_stage_classification_loss_weight) second_stage_mask_prediction_loss_weight = ( sin_config.second_stage_mask_prediction_loss_weight) hard_example_miner = None if sin_config.HasField('hard_example_miner'): hard_example_miner = losses_builder.build_hard_example_miner( sin_config.hard_example_miner, second_stage_classification_loss_weight, second_stage_localization_loss_weight) common_kwargs = { 'is_training': is_training, 'num_classes': num_classes, 'image_resizer_fn': image_resizer_fn, 'feature_extractor': feature_extractor, 'fc_hyperparams': fc_hyperparams, 'first_stage_only': first_stage_only, 'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_box_predictor_arg_scope': first_stage_box_predictor_arg_scope, 'first_stage_box_predictor_kernel_size': first_stage_box_predictor_kernel_size, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_positive_balance_fraction': first_stage_positive_balance_fraction, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'second_stage_batch_size': second_stage_batch_size, 'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_non_max_suppression_fn': second_stage_non_max_suppression_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_localization_loss_weight': second_stage_localization_loss_weight, 'second_stage_classification_loss': second_stage_classification_loss, 'second_stage_classification_loss_weight': second_stage_classification_loss_weight, 'hard_example_miner': hard_example_miner} if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): return rfcn_meta_arch.RFCNMetaArch( second_stage_rfcn_box_predictor=second_stage_box_predictor, **common_kwargs) else: return sin_meta_arch.SINMetaArch( initial_crop_size=initial_crop_size, maxpool_kernel_size=maxpool_kernel_size, maxpool_stride=maxpool_stride, second_stage_box_predictor=second_stage_box_predictor, second_stage_mask_prediction_loss_weight=( second_stage_mask_prediction_loss_weight), **common_kwargs)