def testBuildAndCheckAllEndPointsApproximateFaceNet(self): batch_size = 5 height, width = 128, 128 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope([slim.conv2d, slim.separable_conv2d], normalizer_fn=slim.batch_norm): _, end_points = mobilenet_v1.mobilenet_v1_base( inputs, final_endpoint='Conv2d_13_pointwise', depth_multiplier=0.75) _, explicit_padding_end_points = mobilenet_v1.mobilenet_v1_base( inputs, final_endpoint='Conv2d_13_pointwise', depth_multiplier=0.75, use_explicit_padding=True) # For the Conv2d_0 layer FaceNet has depth=16 endpoints_shapes = { 'Conv2d_0': [batch_size, 64, 64, 24], 'Conv2d_1_depthwise': [batch_size, 64, 64, 24], 'Conv2d_1_pointwise': [batch_size, 64, 64, 48], 'Conv2d_2_depthwise': [batch_size, 32, 32, 48], 'Conv2d_2_pointwise': [batch_size, 32, 32, 96], 'Conv2d_3_depthwise': [batch_size, 32, 32, 96], 'Conv2d_3_pointwise': [batch_size, 32, 32, 96], 'Conv2d_4_depthwise': [batch_size, 16, 16, 96], 'Conv2d_4_pointwise': [batch_size, 16, 16, 192], 'Conv2d_5_depthwise': [batch_size, 16, 16, 192], 'Conv2d_5_pointwise': [batch_size, 16, 16, 192], 'Conv2d_6_depthwise': [batch_size, 8, 8, 192], 'Conv2d_6_pointwise': [batch_size, 8, 8, 384], 'Conv2d_7_depthwise': [batch_size, 8, 8, 384], 'Conv2d_7_pointwise': [batch_size, 8, 8, 384], 'Conv2d_8_depthwise': [batch_size, 8, 8, 384], 'Conv2d_8_pointwise': [batch_size, 8, 8, 384], 'Conv2d_9_depthwise': [batch_size, 8, 8, 384], 'Conv2d_9_pointwise': [batch_size, 8, 8, 384], 'Conv2d_10_depthwise': [batch_size, 8, 8, 384], 'Conv2d_10_pointwise': [batch_size, 8, 8, 384], 'Conv2d_11_depthwise': [batch_size, 8, 8, 384], 'Conv2d_11_pointwise': [batch_size, 8, 8, 384], 'Conv2d_12_depthwise': [batch_size, 4, 4, 384], 'Conv2d_12_pointwise': [batch_size, 4, 4, 768], 'Conv2d_13_depthwise': [batch_size, 4, 4, 768], 'Conv2d_13_pointwise': [batch_size, 4, 4, 768] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name, expected_shape in endpoints_shapes.items(): self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape) self.assertItemsEqual(endpoints_shapes.keys(), explicit_padding_end_points.keys()) for endpoint_name, expected_shape in endpoints_shapes.items(): self.assertTrue(endpoint_name in explicit_padding_end_points) self.assertListEqual( explicit_padding_end_points[endpoint_name].get_shape().as_list( ), expected_shape)
def testOutputStride8BuildAndCheckAllEndPointsUptoConv2d_13(self): batch_size = 5 height, width = 224, 224 output_stride = 8 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope([slim.conv2d, slim.separable_conv2d], normalizer_fn=slim.batch_norm): _, end_points = mobilenet_v1.mobilenet_v1_base( inputs, output_stride=output_stride, final_endpoint='Conv2d_13_pointwise') _, explicit_padding_end_points = mobilenet_v1.mobilenet_v1_base( inputs, output_stride=output_stride, final_endpoint='Conv2d_13_pointwise', use_explicit_padding=True) endpoints_shapes = { 'Conv2d_0': [batch_size, 112, 112, 32], 'Conv2d_1_depthwise': [batch_size, 112, 112, 32], 'Conv2d_1_pointwise': [batch_size, 112, 112, 64], 'Conv2d_2_depthwise': [batch_size, 56, 56, 64], 'Conv2d_2_pointwise': [batch_size, 56, 56, 128], 'Conv2d_3_depthwise': [batch_size, 56, 56, 128], 'Conv2d_3_pointwise': [batch_size, 56, 56, 128], 'Conv2d_4_depthwise': [batch_size, 28, 28, 128], 'Conv2d_4_pointwise': [batch_size, 28, 28, 256], 'Conv2d_5_depthwise': [batch_size, 28, 28, 256], 'Conv2d_5_pointwise': [batch_size, 28, 28, 256], 'Conv2d_6_depthwise': [batch_size, 28, 28, 256], 'Conv2d_6_pointwise': [batch_size, 28, 28, 512], 'Conv2d_7_depthwise': [batch_size, 28, 28, 512], 'Conv2d_7_pointwise': [batch_size, 28, 28, 512], 'Conv2d_8_depthwise': [batch_size, 28, 28, 512], 'Conv2d_8_pointwise': [batch_size, 28, 28, 512], 'Conv2d_9_depthwise': [batch_size, 28, 28, 512], 'Conv2d_9_pointwise': [batch_size, 28, 28, 512], 'Conv2d_10_depthwise': [batch_size, 28, 28, 512], 'Conv2d_10_pointwise': [batch_size, 28, 28, 512], 'Conv2d_11_depthwise': [batch_size, 28, 28, 512], 'Conv2d_11_pointwise': [batch_size, 28, 28, 512], 'Conv2d_12_depthwise': [batch_size, 28, 28, 512], 'Conv2d_12_pointwise': [batch_size, 28, 28, 1024], 'Conv2d_13_depthwise': [batch_size, 28, 28, 1024], 'Conv2d_13_pointwise': [batch_size, 28, 28, 1024] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name, expected_shape in endpoints_shapes.items(): self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape) self.assertItemsEqual(endpoints_shapes.keys(), explicit_padding_end_points.keys()) for endpoint_name, expected_shape in endpoints_shapes.items(): self.assertTrue(endpoint_name in explicit_padding_end_points) self.assertListEqual( explicit_padding_end_points[endpoint_name].get_shape().as_list( ), expected_shape)
def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope([slim.conv2d, slim.separable_conv2d], normalizer_fn=slim.batch_norm): mobilenet_v1.mobilenet_v1_base(inputs) total_params, _ = slim.model_analyzer.analyze_vars( slim.get_model_variables()) self.assertAlmostEqual(3217920, total_params)
def testBuildOnlyUptoFinalEndpoint(self): print('\n=======================================================') print('testBuildOnlyUptoFinalEndpoint') batch_size = 5 height, width = 224, 224 endpoints = [ 'Conv2d_0', 'Conv2d_1_depthwise', 'Conv2d_1_pointwise', 'Conv2d_2_depthwise', 'Conv2d_2_pointwise', 'Conv2d_3_depthwise', 'Conv2d_3_pointwise', 'Conv2d_4_depthwise', 'Conv2d_4_pointwise', 'Conv2d_5_depthwise', 'Conv2d_5_pointwise', 'Conv2d_6_depthwise', 'Conv2d_6_pointwise', 'Conv2d_7_depthwise', 'Conv2d_7_pointwise', 'Conv2d_8_depthwise', 'Conv2d_8_pointwise', 'Conv2d_9_depthwise', 'Conv2d_9_pointwise', 'Conv2d_10_depthwise', 'Conv2d_10_pointwise', 'Conv2d_11_depthwise', 'Conv2d_11_pointwise', 'Conv2d_12_depthwise', 'Conv2d_12_pointwise', 'Conv2d_13_depthwise', 'Conv2d_13_pointwise' ] for index, endpoint in enumerate(endpoints): with tf.Graph().as_default(): inputs = tf.random_uniform((batch_size, height, width, 3)) out_tensor, end_points = mobilenet_v1.mobilenet_v1_base( inputs, final_endpoint=endpoint, scope='MobilenetV1_testBuildOnlyUptoFinalEndpoint') self.assertTrue( out_tensor.op.name.startswith( 'MobilenetV1_testBuildOnlyUptoFinalEndpoint/' + endpoint)) self.assertItemsEqual(endpoints[:index + 1], end_points.keys())
def testBuildBaseNetwork(self): print('\n====================================================') print('testBuildBaseNetwork') batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) net, end_points = mobilenet_v1.mobilenet_v1_base( inputs, scope='MobilenetV1_testBuildBaseNetwork') self.assertTrue( net.op.name.startswith( 'MobilenetV1_testBuildBaseNetwork/Conv2d_13')) self.assertListEqual(net.get_shape().as_list(), [batch_size, 7, 7, 1024]) expected_endpoints = [ 'Conv2d_0', 'Conv2d_1_depthwise', 'Conv2d_1_pointwise', 'Conv2d_2_depthwise', 'Conv2d_2_pointwise', 'Conv2d_3_depthwise', 'Conv2d_3_pointwise', 'Conv2d_4_depthwise', 'Conv2d_4_pointwise', 'Conv2d_5_depthwise', 'Conv2d_5_pointwise', 'Conv2d_6_depthwise', 'Conv2d_6_pointwise', 'Conv2d_7_depthwise', 'Conv2d_7_pointwise', 'Conv2d_8_depthwise', 'Conv2d_8_pointwise', 'Conv2d_9_depthwise', 'Conv2d_9_pointwise', 'Conv2d_10_depthwise', 'Conv2d_10_pointwise', 'Conv2d_11_depthwise', 'Conv2d_11_pointwise', 'Conv2d_12_depthwise', 'Conv2d_12_pointwise', 'Conv2d_13_depthwise', 'Conv2d_13_pointwise' ] for key in expected_endpoints: print('{}: {}'.format(key, end_points[key].shape)) self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildCustomNetworkUsingConvDefs(self): batch_size = 5 height, width = 224, 224 conv_defs = [ mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=32), mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=64), mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=128), mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512) ] inputs = tf.random_uniform((batch_size, height, width, 3)) net, end_points = mobilenet_v1.mobilenet_v1_base( inputs, final_endpoint='Conv2d_3_pointwise', conv_defs=conv_defs, scope='MobilenetV1_') self.assertTrue(net.op.name.startswith('MobilenetV1/Conv2d_3')) self.assertListEqual(net.get_shape().as_list(), [batch_size, 56, 56, 512]) expected_endpoints = [ 'Conv2d_0', 'Conv2d_1_depthwise', 'Conv2d_1_pointwise', 'Conv2d_2_depthwise', 'Conv2d_2_pointwise', 'Conv2d_3_depthwise', 'Conv2d_3_pointwise' ] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: params[ 'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self. _conv_depth_ratio_in_percentage) _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return activations['Conv2d_11_pointwise'], activations
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=0, num_layers=6, image_features={ 'image_features': image_features['Conv2d_11_pointwise'] }) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) #return feature_maps.values() return list(feature_maps.values())
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, conv_defs=self._conv_defs, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise, use_explicit_padding=self._use_explicit_padding) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features padding = 'VALID' if self._use_explicit_padding else 'SAME' kernel_size = 3 for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial( slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if self._use_explicit_padding: last_feature_map = ops.fixed_padding( last_feature_map, kernel_size) last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[kernel_size, kernel_size], stride=2, padding=padding, scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13)) feature_maps.append(last_feature_map) return feature_maps