def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self): sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=True) self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False) self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) sc = mobilenet_v1.mobilenet_v1_arg_scope() self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
def build_model(): """Build the mobilenet_v1 model for evaluation. Returns: g: graph with rewrites after insertion of quantization ops and batch norm folding. eval_ops: eval ops for inference. variables_to_restore: List of variables to restore from checkpoint. """ g = tf.Graph() with g.as_default(): inputs, labels = imagenet_input(is_training=False) scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False, weight_decay=0.0) with slim.arg_scope(scope): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=False, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) if FLAGS.quantize: tf.contrib.quantize.create_eval_graph() eval_ops = metrics(logits, labels) return g, eval_ops
def build_model(): """Builds graph for model to train with rewrites for quantization. Returns: g: Graph with fake quantization ops and batch norm folding suitable for training quantized weights. train_tensor: Train op for execution during training. """ g = tf.Graph() with g.as_default(), tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks)): inputs, labels = imagenet_input(is_training=True) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=True, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) tf.losses.softmax_cross_entropy(labels, logits) # Call rewriter to produce graph with fake quant ops and folded batch norms # quant_delay delays start of quantization till quant_delay steps, allowing # for better model accuracy. if FLAGS.quantize: tf.contrib.quantize.create_training_graph( quant_delay=get_quant_delay()) total_loss = tf.losses.get_total_loss(name='total_loss') # Configure the learning rate using an exponential decay. num_epochs_per_decay = 2.5 imagenet_size = 1271167 decay_steps = int(imagenet_size / FLAGS.batch_size * num_epochs_per_decay) learning_rate = tf.train.exponential_decay( get_learning_rate(), tf.train.get_or_create_global_step(), decay_steps, _LEARNING_RATE_DECAY_FACTOR, staircase=True) opt = tf.train.GradientDescentOptimizer(learning_rate) train_tensor = slim.learning.create_train_op(total_loss, optimizer=opt) slim.summaries.add_scalar_summary(total_loss, 'total_loss', 'losses') slim.summaries.add_scalar_summary(learning_rate, 'learning_rate', 'training') return g, train_tensor
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps conv_depth = 1024 if self._skip_last_stride: conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0 conv_depth = int(float(conv_depth) * conv_depth_ratio) def depth(d): return max(int(d * 1.0), 16) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], padding='SAME'): net = slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=2, scope='Conv2d_12_pointwise') return slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=1, scope='Conv2d_13_pointwise')
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self._conv_depth_ratio_in_percentage) _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return activations['Conv2d_11_pointwise'], activations
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=0, num_layers=6, image_features={ 'image_features': image_features['Conv2d_11_pointwise'] }) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, conv_defs=_CONV_DEFS if self._use_depthwise else None, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self. _min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_Conv2d_{}'.format( i - base_fpn_max_level + 13)) feature_maps.append(last_feature_map) return feature_maps
def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self): sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=None) self.assertNotIn('is_training', sc[slim.arg_scope_func_key( slim.batch_norm)])