def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) with arg_scope(inception_v3.inception_v3_arg_scope()): inception_v3.inception_v3_base(inputs) total_params, _ = model_analyzer.analyze_vars( variables_lib.get_model_variables()) self.assertAlmostEqual(21802784, total_params)
def testBuildAndCheckAllEndPointsUptoMixed7c(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) _, end_points = inception_v3.inception_v3_base( inputs, final_endpoint='Mixed_7c') endpoints_shapes = { 'Conv2d_1a_3x3': [batch_size, 149, 149, 32], 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], 'MaxPool_3a_3x3': [batch_size, 73, 73, 64], 'Conv2d_3b_1x1': [batch_size, 73, 73, 80], 'Conv2d_4a_3x3': [batch_size, 71, 71, 192], 'MaxPool_5a_3x3': [batch_size, 35, 35, 192], 'Mixed_5b': [batch_size, 35, 35, 256], 'Mixed_5c': [batch_size, 35, 35, 288], 'Mixed_5d': [batch_size, 35, 35, 288], 'Mixed_6a': [batch_size, 17, 17, 768], 'Mixed_6b': [batch_size, 17, 17, 768], 'Mixed_6c': [batch_size, 17, 17, 768], 'Mixed_6d': [batch_size, 17, 17, 768], 'Mixed_6e': [batch_size, 17, 17, 768], 'Mixed_7a': [batch_size, 8, 8, 1280], 'Mixed_7b': [batch_size, 8, 8, 2048], 'Mixed_7c': [batch_size, 8, 8, 2048] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), expected_shape)
def encode(self, inputs): inputs = tf.image.resize_images( images=inputs, size=[self.params["resize_height"], self.params["resize_width"]], method=tf.image.ResizeMethod.BILINEAR) outputs, _ = inception_v3_base(tf.to_float(inputs)) output_shape = outputs.get_shape() #pylint: disable=E1101 shape_list = output_shape.as_list() # Take attentin over output elemnts in width and height dimension: # Shape: [B, W*H, ...] outputs_flat = tf.reshape(outputs, [shape_list[0], -1, shape_list[-1]]) # Final state is the pooled output # Shape: [B, W*H*...] final_state = tf.contrib.slim.avg_pool2d( outputs, output_shape[1:3], padding="VALID", scope="pool") final_state = tf.contrib.slim.flatten(outputs, scope="flatten") return EncoderOutput( outputs=outputs_flat, final_state=final_state, attention_values=outputs_flat, attention_values_length=tf.shape(outputs_flat)[1])
def inception(inputs, is_training=False): with slim.arg_scope(_inception_v3_arg_scope(is_training=is_training)): with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.batch_norm], trainable=True): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, _ = inception_v3.inception_v3_base(inputs, scope='InceptionV3') return net
def testBuildBaseNetwork(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) final_endpoint, end_points = inception_v3.inception_v3_base(inputs) self.assertTrue(final_endpoint.op.name.startswith('InceptionV3/Mixed_7c')) self.assertListEqual(final_endpoint.get_shape().as_list(), [batch_size, 8, 8, 2048]) expected_endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' ] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self): batch_size = 5 height, width = 299, 299 endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' ] for index, endpoint in enumerate(endpoints): with ops.Graph().as_default(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) out_tensor, end_points = inception_v3.inception_v3_base( inputs, final_endpoint=endpoint) self.assertTrue( out_tensor.op.name.startswith('InceptionV3/' + endpoint)) self.assertItemsEqual(endpoints[:index + 1], end_points)
def style_prediction(style_input_, activation_names, activation_depths, is_training=True, trainable=True, inception_end_point='Mixed_6e', style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings (beta and gamma parameters). Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. is_training: bool. Is it training phase or not? trainable: bool. Should the parameters be marked as trainable? inception_end_point: string. Specifies the endpoint to construct the inception_v3 network up to. This network is part of the style prediction network. style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope(_inception_v3_arg_scope(is_training=is_training)): with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.batch_norm], trainable=trainable): with slim.arg_scope( [slim.batch_norm, slim.dropout], is_training=is_training): _, end_points = inception_v3.inception_v3_base( style_input_, scope='InceptionV3', final_endpoint=inception_end_point) # Shape of feat_convlayer is (batch_size, ?, ?, depth). # For Mixed_6e end point, depth is 768, for input image size of 256x265 # width and height are 14x14. feat_convlayer = end_points[inception_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean( feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format(activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format(activation_names[i])] = gamma return style_params, bottleneck_feat
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout( net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net
def build_image_embeddings(self): """Builds the image model(Inception V3) subgraph and generates image embeddings""" # parameter initialization batch_norm_params = { "is_training": False, "trainable": False, # decay for the moving averages "decay": 0.9997, # epsilon to prevent 0s in variance "epsilon": 0.001, # collection containing the moving mean and moving variance "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } stddev = 0.1, dropout_keep_prob = 0.8 with tf.variable_scope("InceptionV3", "InceptionV3", [self.images]) as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=None, trainable=False): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(self.images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout( net, keep_prob=dropout_keep_prob, is_training=False, scope="dropout") net = slim.flatten(net, scope="flatten") # add summaries for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) self.inception_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") # map inception output(net) into embedding space with tf.variable_scope("image_embedding") as scope: image_embeddings = tf.contrib.layers.fully_connected( inputs=net, num_outputs=self.embedding_size, activation_fn=None, weights_initializer=self.initializer, biases_initializer=None, scope=scope) # save the embedding size in the graph tf.constant(self.embedding_size, name="embedding_size") self.image_embeddings = image_embeddings