def test_aliases(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a2', t2) self.assertEqual(t1.aliases, ['a1']) self.assertEqual(t2.aliases, ['a2'])
def test_gather_aliases(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') t3 = constant_op.constant(2.0, name='t3') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a2', t2) ops.add_to_collection('end_points', t3) aliases = utils.gather_tensors_aliases(ops.get_collection('end_points')) self.assertEqual(aliases, ['a1', 'a2', 't3'])
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None): """Bottleneck residual unit variant with BN after convolutions. This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for its definition. Note that we use here the bottleneck variant which has an extra bottleneck layer. When putting together two consecutive ResNet blocks that use this unit, one should use stride = 2 in the last unit of the first block. Args: inputs: A tensor of size [batch, height, width, channels]. depth: The depth of the ResNet unit output. depth_bottleneck: The depth of the bottleneck layers. stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input. rate: An integer, rate for atrous convolution. outputs_collections: Collection to add the ResNet unit output. scope: Optional variable_scope. Returns: The ResNet unit's output. """ with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4) if depth == depth_in: shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') else: shortcut = layers.conv2d(inputs, depth, [1, 1], stride=stride, activation_fn=None, scope='shortcut') residual = layers.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') residual = layers.conv2d(residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3') output = nn_ops.relu(shortcut + residual) return utils.collect_named_outputs(outputs_collections, sc.name, output)
def bottleneck( inputs, depth, depth_bottleneck, stride, rate=1, centered_stride=False, outputs_collections=None, scope=None): """Bottleneck residual unit variant with BN before convolutions. This is the full preactivation residual unit variant proposed in [2]. See Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck variant which has an extra bottleneck layer. When putting together two consecutive ResNet blocks that use this unit, one should use stride = 2 in the last unit of the first block. Args: inputs: A tensor of size [batch, height, width, channels] for NHWC or permuted for NCHW. depth: The depth of the ResNet unit output. depth_bottleneck: The depth of the bottleneck layers. stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input. rate: An integer, rate for atrous convolution. outputs_collections: Collection to add the ResNet unit output. scope: Optional variable_scope. Returns: The ResNet unit's output. """ if centered_stride: assert stride in (1, 2) _shift = lambda x: x if centered_stride and stride == 2: _shift = lambda x: spatial_slice(x, slice(1, None)) with variable_scope.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: depth_in = tfu.static_n_channels(inputs) preact = layers.batch_norm(inputs, activation_fn=nn_ops.relu, scope='preact') if depth == depth_in: shortcut = resnet_utils.subsample(_shift(inputs), stride, 'shortcut') else: shortcut = layers_lib.conv2d( _shift(preact), depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') residual = layers_lib.conv2d( preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = resnet_utils.conv2d_same( residual, depth_bottleneck, 3, stride, rate, centered_stride=centered_stride, scope='conv2') residual = layers_lib.conv2d( residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') output = shortcut + residual return utils.collect_named_outputs(outputs_collections, sc.name, output)
def test_convert_collection_to_dict_clear_collection(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a21', t2) utils.collect_named_outputs('end_points', 'a22', t2) utils.convert_collection_to_dict('end_points', clear_collection=True) self.assertEqual(ops.get_collection('end_points'), [])
def test_convert_collection_to_dict(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a21', t2) utils.collect_named_outputs('end_points', 'a22', t2) end_points = utils.convert_collection_to_dict('end_points') self.assertEqual(end_points['a1'], t1) self.assertEqual(end_points['a21'], t2) self.assertEqual(end_points['a22'], t2)
def resnet_block_fn(block, current_stride, net, output_stride, outputs_collections, rate, store_non_strided_activations, scope_suffix): with variable_scope.variable_scope(block.scope, f'block{scope_suffix}', [net]) as sc: block_stride = 1 for i, unit in enumerate(block.args): if store_non_strided_activations and i == len(block.args) - 1: # Move stride from the block's last unit to the end of the block. block_stride = unit.get('stride', 1) unit = dict(unit, stride=1) with variable_scope.variable_scope('unit_%d' % (i + 1), values=[net]): # If we have reached the target output_stride, then we need to employ # atrous convolution with stride=1 and multiply the atrous rate by the # current unit's stride for use in subsequent layers. if output_stride is not None and current_stride == output_stride: net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) rate *= unit.get('stride', 1) else: net = block.unit_fn(net, rate=1, **unit) current_stride *= unit.get('stride', 1) if output_stride is not None and current_stride > output_stride: raise ValueError( 'The target output_stride cannot be reached.') # Collect activations at the block's end before performing subsampling. net = utils.collect_named_outputs(outputs_collections, sc.name, net) # Subsampling of the block's output activations. if output_stride is not None and current_stride == output_stride: rate *= block_stride else: net = subsample(net, block_stride) current_stride *= block_stride if output_stride is not None and current_stride > output_stride: raise ValueError('The target output_stride cannot be reached.') return current_stride, net, rate
def mpusim_separable_convolution2d(inputs, num_outputs, kernel_size, depth_multiplier=1, stride=1, padding='SAME', data_format='NHWC', rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), pointwise_initializer=None, weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, activations_datatype_size_byte=1, weights_datatype_size_byte=1, results_datatype_size_byte=4, systolic_array_height=256, systolic_array_width=256, activation_fifo_depth=8, accumulator_array_height=4096, log_file_output_dir='.', model_name='unnamed'): if data_format is not 'NHWC': raise ValueError('data_format has to be NHWC.') layer_variable_getter = _build_variable_getter({ 'bias': 'biases', 'depthwise_kernel': 'depthwise_weights', 'pointwise_kernel': 'pointwise_weights' }) with variable_scope.variable_scope(scope, 'mpusim_separable_convolution2d', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) if pointwise_initializer is None: pointwise_initializer = weights_initializer df = 'channels_last' if num_outputs is not None: # Apply separable conv using the mpusim_separable_conv2d layer. layer = \ mpusim_separable_conv2d.mpusim_separable_conv2d(filters=num_outputs, kernel_size=kernel_size, strides=stride, padding=padding, data_format=df, dilation_rate=utils.two_element_tuple(rate), activation=None, depth_multiplier=depth_multiplier, use_bias=not normalizer_fn and biases_initializer, depthwise_initializer=weights_initializer, pointwise_initializer=pointwise_initializer, bias_initializer=biases_initializer, depthwise_regularizer=weights_regularizer, pointwise_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, name=sc.name, dtype=inputs.dtype.base_dtype, _scope=sc, _reuse=reuse, activations_datatype_size_byte=activations_datatype_size_byte, weights_datatype_size_byte=weights_datatype_size_byte, results_datatype_size_byte=results_datatype_size_byte, systolic_array_height=systolic_array_height, systolic_array_width=systolic_array_width, activation_fifo_depth=activation_fifo_depth, accumulator_array_height=accumulator_array_height, log_file_output_dir=log_file_output_dir, model_name=model_name) outputs = layer.apply(inputs) # Add variables to collections. _add_variable_to_collections(layer.depthwise_kernel, variables_collections, 'weights') _add_variable_to_collections(layer.pointwise_kernel, variables_collections, 'weights') if layer.bias is not None: _add_variable_to_collections(layer.bias, variables_collections, 'biases') if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: # Actually apply depthwise conv instead of separable conv. dtype = inputs.dtype.base_dtype kernel_h, kernel_w = utils.two_element_tuple(kernel_size) stride_h, stride_w = utils.two_element_tuple(stride) num_filters_in = utils.channel_dimension(inputs.get_shape(), df, min_rank=4) weights_collections = utils.get_variable_collections(variables_collections, 'weights') depthwise_shape = [kernel_h, kernel_w, num_filters_in, depth_multiplier] depthwise_weights = variables.model_variable('depthwise_weights', shape=depthwise_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable, collections=weights_collections) strides = [1, 1, stride_h, stride_w] if data_format.startswith('NC') \ else [1, stride_h, stride_w, 1] outputs = \ mpusim_depthwise_conv2d.mpusim_depthwise_conv2d(inputs, depthwise_weights, strides, padding, rate=utils.two_element_tuple(rate), activations_datatype_size_byte=activations_datatype_size_byte, weights_datatype_size_byte=weights_datatype_size_byte, results_datatype_size_byte=results_datatype_size_byte, systolic_array_height=systolic_array_height, systolic_array_width=systolic_array_width, activation_fifo_depth=activation_fifo_depth, accumulator_array_height=accumulator_array_height, log_file_output_dir=log_file_output_dir, model_name=model_name) num_outputs = depth_multiplier*num_filters_in if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = \ utils.get_variable_collections(variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def stack_blocks_dense(net, blocks, output_stride=None, store_non_strided_activations=False, outputs_collections=None): """Stacks ResNet `Blocks` and controls output feature density. First, this function creates scopes for the ResNet in the form of 'block_name/unit_1', 'block_name/unit_2', etc. Second, this function allows the user to explicitly control the ResNet output_stride, which is the ratio of the input to output spatial resolution. This is useful for dense prediction tasks such as semantic segmentation or object detection. Most ResNets consist of 4 ResNet blocks and subsample the activations by a factor of 2 when transitioning between consecutive ResNet blocks. This results to a nominal ResNet output_stride equal to 8. If we set the output_stride to half the nominal network stride (e.g., output_stride=4), then we compute responses twice. Control of the output feature density is implemented by atrous convolution. Args: net: A `Tensor` of size [batch, height, width, channels]. blocks: A list of length equal to the number of ResNet `Blocks`. Each element is a ResNet `Block` object describing the units in the `Block`. output_stride: If `None`, then the output will be computed at the nominal network stride. If output_stride is not `None`, it specifies the requested ratio of input to output spatial resolution, which needs to be equal to the product of unit strides from the start up to some level of the ResNet. For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, then valid values for the output_stride are 1, 2, 6, 24 or None (which is equivalent to output_stride=24). store_non_strided_activations: If True, we compute non-strided (undecimated) activations at the last unit of each block and store them in the `outputs_collections` before subsampling them. This gives us access to higher resolution intermediate activations which are useful in some dense prediction problems but increases 4x the computation and memory cost at the last unit of each block. outputs_collections: Collection to add the ResNet block outputs. Returns: net: Output tensor with stride equal to the specified output_stride. Raises: ValueError: If the target output_stride is not valid. """ # The current_stride variable keeps track of the effective stride of the # activations. This allows us to invoke atrous convolution whenever applying # the next residual unit would result in the activations having stride larger # than the target output_stride. current_stride = 1 # The atrous convolution rate parameter. rate = 1 for block in blocks: with variable_scope.variable_scope(block.scope, 'block', [net]) as sc: block_stride = 1 for i, unit in enumerate(block.args): if store_non_strided_activations and i == len(block.args) - 1: # Move stride from the block's last unit to the end of the block. block_stride = unit.get('stride', 1) unit = dict(unit, stride=1) with variable_scope.variable_scope('unit_%d' % (i + 1), values=[net]): # If we have reached the target output_stride, then we need to employ # atrous convolution with stride=1 and multiply the atrous rate by the # current unit's stride for use in subsequent layers. if output_stride is not None and current_stride == output_stride: net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) rate *= unit.get('stride', 1) else: net = block.unit_fn(net, rate=1, **unit) current_stride *= unit.get('stride', 1) if output_stride is not None and current_stride > output_stride: raise ValueError( 'The target output_stride cannot be reached.') # Collect activations at the block's end before performing subsampling. net = utils.collect_named_outputs(outputs_collections, sc.name, net) # Subsampling of the block's output activations. if output_stride is not None and current_stride == output_stride: rate *= block_stride else: net = subsample(net, block_stride) current_stride *= block_stride if output_stride is not None and current_stride > output_stride: raise ValueError( 'The target output_stride cannot be reached.') if output_stride is not None and current_stride != output_stride: raise ValueError('The target output_stride cannot be reached.') return net
def instance_norm(inputs, center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format=DATA_FORMAT_NHWC, scope=None): """Functional interface for the instance normalization layer. Reference: https://arxiv.org/abs/1607.08022. "Instance Normalization: The Missing Ingredient for Fast Stylization" Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). data_format: A string. `NHWC` (default) and `NCHW` are supported. scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.shape inputs_rank = inputs.shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope(scope, 'InstanceNorm', [inputs], reuse=reuse) as sc: if data_format == DATA_FORMAT_NCHW: reduction_axis = 1 # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list( [1, tf.dimension_value(inputs_shape[1])] + [1] * (inputs_rank - 2)) else: reduction_axis = inputs_rank - 1 params_shape_broadcast = None moments_axes = list(range(inputs_rank)) del moments_axes[reduction_axis] del moments_axes[0] params_shape = inputs_shape[reduction_axis:reduction_axis + 1] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if params_shape_broadcast: beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) if params_shape_broadcast: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments (instance activations). mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute instance normalization. outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon, name='instancenorm') if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def group_norm(inputs, groups=32, channels_axis=-1, reduction_axes=(-3, -2), center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, mean_close_to_zero=False): """Functional interface for the group normalization layer. Reference: https://arxiv.org/abs/1803.08494. "Group Normalization", Yuxin Wu, Kaiming He Args: inputs: A Tensor with at least 2 dimensions one which is channels. All shape dimensions except for batch must be fully defined. groups: Integer. Divide the channels into this number of groups over which normalization statistics are computed. This number must be commensurate with the number of channels in `inputs`. channels_axis: An integer. Specifies index of channels axis which will be broken into `groups`, each of which whose statistics will be computed across. Must be mutually exclusive with `reduction_axes`. Preferred usage is to specify negative integers to be agnostic as to whether a batch dimension is included. reduction_axes: Tuple of integers. Specifies dimensions over which statistics will be accumulated. Must be mutually exclusive with `channels_axis`. Statistics will not be accumulated across axes not specified in `reduction_axes` nor `channel_axis`. Preferred usage is to specify negative integers to be agnostic to whether a batch dimension is included. Some sample usage cases: NHWC format: channels_axis=-1, reduction_axes=[-3, -2] NCHW format: channels_axis=-3, reduction_axes=[-2, -1] center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. mean_close_to_zero: The mean of `input` before ReLU will be close to zero when batch size >= 4k for Resnet-50 on TPU. If `True`, use `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the variance. This is the same behavior as `fused` equals `True` in batch normalization. If `False`, use `nn.moments` to calculate the variance. When `mean` is close to zero, like 1e-4, use `mean` to calculate the variance may have poor result due to repeated roundoff error and denormalization in `mean`. When `mean` is large, like 1e2, sum(`input`^2) is so large that only the high-order digits of the elements are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2) when `mean` is large. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. ValueError: If number of groups is not commensurate with number of channels. ValueError: If reduction_axes or channels_axis are out of bounds. ValueError: If reduction_axes are not mutually exclusive with channels_axis. """ inputs = ops.convert_to_tensor(inputs) if inputs.shape.ndims is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if channels_axis > (inputs.shape.ndims - 1): raise ValueError('Axis is out of bounds.') # Use dynamic shape for not fully defined dimensions in the inputs. dyanmic_shape = array_ops.shape(inputs) input_shape_list = [] for i, dim in enumerate(inputs.shape): if tf.dimension_value(dim) is None: input_shape_list.append(dyanmic_shape[i]) else: input_shape_list.append(dim) # Standardize the channels_axis to be positive and identify # of channels. if channels_axis < 0: channels_axis = inputs.shape.ndims + channels_axis channels = tf.dimension_value(inputs.shape[channels_axis]) if channels is None: raise ValueError('Inputs %s has undefined channel dimension: %d.' % (inputs.name, channels_axis)) # Standardize the reduction_axes to be positive. reduction_axes = list(reduction_axes) for i in range(len(reduction_axes)): if reduction_axes[i] < 0: reduction_axes[i] += inputs.shape.ndims for a in reduction_axes: if a > inputs.shape.ndims: raise ValueError('Axis is out of bounds.') if tf.dimension_value(inputs.shape[a]) is None: raise ValueError('Inputs %s has undefined dimensions %d.' % (inputs.name, a)) if channels_axis == a: raise ValueError('reduction_axis must be mutually exclusive ' 'with channels_axis') if groups > channels: raise ValueError('Invalid groups %d for %d channels.' % (groups, channels)) if channels % groups != 0: raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups)) # Determine axes before channels. Some examples of common image formats: # 'NCHW': before = [N], after = [HW] # 'NHWC': before = [NHW], after = [] axes_before_channels = input_shape_list[:channels_axis] axes_after_channels = input_shape_list[channels_axis + 1:] # Manually broadcast the parameters to conform to the number of groups. params_shape_broadcast = ([1] * len(axes_before_channels) + [groups, channels // groups] + [1] * len(axes_after_channels)) # Reshape the input by the group within the channel dimension. inputs_shape = (axes_before_channels + [groups, channels // groups] + axes_after_channels) inputs = array_ops.reshape(inputs, inputs_shape) # Determine the dimensions across which moments are calculated. moments_axes = [channels_axis + 1] for a in reduction_axes: if a > channels_axis: moments_axes.append(a + 1) else: moments_axes.append(a) with variable_scope.variable_scope(scope, 'GroupNorm', [inputs], reuse=reuse) as sc: # Note that the params_shape is the number of channels always. params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments. if mean_close_to_zero: # One pass algorithm returns better result when mean is close to zero. counts, means_ss, variance_ss, _ = nn.sufficient_statistics( inputs, moments_axes, keep_dims=True) mean, variance = nn.normalize_moments(counts, means_ss, variance_ss, shift=None) else: mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute normalization. gain = math_ops.rsqrt(variance + epsilon) offset = -mean * gain if gamma is not None: gain *= gamma offset *= gamma if beta is not None: offset += beta outputs = inputs * gain + offset # Collapse the groups into the channel dimension. outputs = array_ops.reshape(outputs, input_shape_list) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def test_collect(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a2', t2) self.assertEqual(ops.get_collection('end_points'), [t1, t2])
def xfcn(inputs, dropout_rate, scope='xfcn'): """Defines the xfcn network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ im_size = tf.shape(inputs) with tf.variable_scope(scope, 'xfcn', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs of all intermediate layers. with slim.arg_scope([slim.conv2d, slim.separable_conv2d], outputs_collections=end_points_collection): # Entry flow # Block 1 net = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='xception_65/entry_flow/conv1_1') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_1/BatchNorm') net = tf.nn.relu(net) net = slim.conv2d(net, 64, [3, 3], scope='xception_65/entry_flow/conv1_2') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_2/BatchNorm') net = tf.nn.relu(net) residual_1 = slim.conv2d( net, 128, [1, 1], stride=2, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut' ) residual_1 = slim.batch_norm( residual_1, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 2 net = slim.separable_conv2d( net, 128, [3, 3], activation_fn=None, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_2 = tf.math.add(residual_1, net) net_2_drop = slim.dropout(net_2, keep_prob=dropout_rate) residual_2 = slim.conv2d( net_2, 256, [1, 1], stride=2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut' ) residual_2 = slim.batch_norm( residual_2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut/BatchNorm' ) # block 3 net = tf.nn.relu(net_2) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_3 = tf.math.add(net, residual_2) net_3_drop = slim.dropout(net_3, keep_prob=dropout_rate) residual_3 = slim.conv2d( net_3, 728, [1, 1], stride=2, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut' ) residual_3 = slim.batch_norm( residual_3, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut/BatchNorm' ) # block 4 net = tf.nn.relu(net_3) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_4 = tf.math.add(net, residual_3) net_4_drop = slim.dropout(net_4, keep_prob=dropout_rate) # middle flow # block 5 net = middle_flow_block(net_4, unit_num=1) # block 6 - 20 net = middle_flow_block(net, unit_num=2) net_5_drop = slim.dropout(net, keep_prob=dropout_rate) # Exit flow residual_20 = slim.conv2d( net, 1024, [1, 1], stride=2, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut') residual_20 = slim.batch_norm( residual_20, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 21 net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_6 = tf.math.add(net, residual_20) net_6_drop = slim.dropout(net_6, keep_prob=dropout_rate) # Get side outputs of the network with slim.arg_scope([slim.conv2d], biases_initializer=tf.zeros_initializer()): side_2 = slim.conv2d(net_2_drop, 16, [3, 3], rate=1, scope='conv2_2_16') side_3 = slim.conv2d(net_3_drop, 16, [3, 3], rate=2, scope='conv3_3_16') side_4 = slim.conv2d(net_4_drop, 16, [3, 3], rate=4, scope='conv4_3_16') side_5 = slim.conv2d(net_5_drop, 16, [3, 3], rate=4, scope='conv5_3_16') side_6 = slim.conv2d(net_6_drop, 16, [3, 3], rate=8, scope='conv6_3_16') # Supervise side outputs side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2') side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3') side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4') side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5') side_6_s = slim.conv2d(side_6, 1, [1, 1], scope='score-dsn_6') with slim.arg_scope([slim.convolution2d_transpose], outputs_collections=end_points_collection): # Side outputs side_2_s = slim.convolution2d_transpose( side_2_s, 1, 8, 4, scope='score-dsn_2-up') side_2_s = crop_features(side_2_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_2-cr', side_2_s) side_3_s = slim.convolution2d_transpose( side_3_s, 1, 16, 8, scope='score-dsn_3-up') side_3_s = crop_features(side_3_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_3-cr', side_3_s) side_4_s = slim.convolution2d_transpose( side_4_s, 1, 32, 16, scope='score-dsn_4-up') side_4_s = crop_features(side_4_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_4-cr', side_4_s) side_5_s = slim.convolution2d_transpose( side_5_s, 1, 32, 16, scope='score-dsn_5-up') side_5_s = crop_features(side_5_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_5-cr', side_5_s) side_6_s = slim.convolution2d_transpose( side_6_s, 1, 64, 32, scope='score-dsn_6-up') side_6_s = crop_features(side_6_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_6-cr', side_6_s) # Main output side_2_f = slim.convolution2d_transpose( side_2, 16, 8, 4, scope='score-multi2-up') side_2_f = crop_features(side_2_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi2-cr', side_2_f) side_3_f = slim.convolution2d_transpose( side_3, 16, 16, 8, scope='score-multi3-up') side_3_f = crop_features(side_3_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi3-cr', side_3_f) side_4_f = slim.convolution2d_transpose( side_4, 16, 32, 16, scope='score-multi4-up') side_4_f = crop_features(side_4_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi4-cr', side_4_f) side_5_f = slim.convolution2d_transpose( side_5, 16, 32, 16, scope='score-multi5-up') side_5_f = crop_features(side_5_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi5-cr', side_5_f) side_6_f = slim.convolution2d_transpose( side_6, 16, 64, 32, scope='score-multi6-up') side_6_f = crop_features(side_6_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi6-cr', side_6_f) concat_side = tf.concat( [side_2_f, side_3_f, side_4_f, side_5_f, side_6_f], axis=3) net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse') end_points = utils.convert_collection_to_dict(end_points_collection) return net, end_points