def test_valid_input(self): self.assertEqual(utils.n_positive_integers(1, 2), (2, )) self.assertEqual(utils.n_positive_integers(2, 2), (2, 2)) self.assertEqual(utils.n_positive_integers(2, (2, 3)), (2, 3)) self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) self.assertEqual( utils.n_positive_integers(3, tensor_shape.TensorShape([2, 3, 1])), (2, 3, 1))
def test_valid_input(self): self.assertEqual(utils.n_positive_integers(1, 2), (2,)) self.assertEqual(utils.n_positive_integers(2, 2), (2, 2)) self.assertEqual(utils.n_positive_integers(2, (2, 3)), (2, 3)) self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) self.assertEqual( utils.n_positive_integers(3, tensor_shape.TensorShape([2, 3, 1])), (2, 3, 1))
def slim_conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, **kwargs): kernel_size = layer_utils.n_positive_integers(2, kernel_size) # if padding != 'VALID' and tuple(kernel_size) != (1, 1): # raise ValueError('padding must be VALID: {}'.format(padding)) # return _slim_conv2d_valid(inputs, num_outputs, kernel_size, # stride=stride, **kwargs) if rate != 1: raise ValueError('dilation rate not supported yet: {}'.format(rate)) inputs = as_tensor(inputs) outputs = Tensor() outputs.value = slim.conv2d(inputs.value, num_outputs, kernel_size, stride=stride, padding=padding, **kwargs) # Update receptive fields. relative = receptive_field.conv2d(kernel_size, stride, padding) outputs.fields = { k: receptive_field.compose(v, relative) for k, v in inputs.fields.items() } return outputs
def crop_pyr(im, rect, im_size, scales, pad_value=0, feather=False, feather_margin=0.05, name='crop_pyr'): ''' Args: im: [b, h, w, 3] rect: [b, 4] im_size: (height, width) scales: [s] pad_value: Either scalar constant or tf.Tensor that is broadcast-compatible with image. Returns: [b, s, h, w, 3] ''' with tf.name_scope(name) as scope: if tf.contrib.framework.is_tensor(pad_value): # TODO: This operation seems slow! im -= pad_value crop_ims, rects = crop_pyr(im, rect, im_size, scales, pad_value=0, feather=feather, feather_margin=feather_margin, name=name) crop_ims += tf.expand_dims(pad_value, 1) return crop_ims, rects if feather: im = feather_image(im, margin=feather_margin, background_value=pad_value) # [b, s, 4] rects = geom.grow_rect(tf.expand_dims(scales, -1), tf.expand_dims(rect, -2)) # Extract multiple rectangles from each image. batch_len = tf.shape(im)[0] num_scales, = tf.unstack(tf.shape(scales)) box_ind = tf.tile(tf.expand_dims(tf.range(batch_len), 1), [1, num_scales]) # [b, s, ...] -> [b*s, ...] rects, restore = merge_dims(rects, 0, 2) box_ind, _ = merge_dims(box_ind, 0, 2) crop_ims = tf.image.crop_and_resize(im, geom.rect_to_tf_box(rects), box_ind=box_ind, crop_size=n_positive_integers( 2, im_size), extrapolation_value=pad_value) # [b*s, ...] -> [b, s, ...] crop_ims = restore(crop_ims, 0) return crop_ims, rects
def max_pool2d(x, window, stride=1, padding='SAME', name='MaxPool'): input_rank = x.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3: raise ValueError('Rank of inputs is %d, which is < 3' % input_rank) if input_rank == 3: x = tf.expand_dims(x, 3) window = utils.n_positive_integers(2, window) if len(window) < input_rank - 2: window = (1, ) * (input_rank - len(window) - 2) + window stride = utils.n_positive_integers(2, stride) if len(stride) < input_rank - 2: stride = (1, ) * (input_rank - len(stride) - 2) + stride out = tf.nn.pool(x, window, 'MAX', padding, strides=stride, name=name) return utils.collect_named_outputs(tf.GraphKeys.ACTIVATIONS, name, out)
def multiscale_error(response_size, num_scales, translation_stride, log_scale_step, base_target_size, gt_translation, gt_size): '''Computes error for each element of multi-scale response. Ground-truth is relative to center of response. Args: response_size: Integer or 2-tuple of integers num_scales: Integer translation_stride: Float log_scale_step: Float base_target_size: Float or tensor with shape [b] gt_translation: [b, 2] gt_size: [b] Returns: err_translation: [b, s, h, w, 2] err_log_scale: [b, s] ''' response_size = n_positive_integers(2, response_size) translation_stride = float(translation_stride) log_scale_step = float(log_scale_step) # TODO: Check if ground-truth is within range! base_translations = ( translation_stride * tf.to_float(util.displacement_from_center(response_size))) scales = util.scale_range(tf.constant(num_scales), tf.to_float(log_scale_step)) gt_scale = gt_size / base_target_size # err_log_scale: [b, s] err_log_scale = ( tf.log(scales) - # [s] tf.log(tf.expand_dims(gt_scale, -1))) # [b] -> [b, 1] # translations: [b, s, h, w, 2] translations = ( tf.expand_dims(base_translations, -4) * # [..., h, w, 2] -> [..., 1, h, w, 2] helpers.expand_dims_n(scales, -1, 3)) # [s] -> [s, 1, 1, 1] # err_translation: [b, s, h, w, 2] err_translation = ( translations - # [b, s, h, w, 2] helpers.expand_dims_n(gt_translation, -2, 3) ) # [b, 2] -> [b, 1, 1, 1, 2] return err_translation, err_log_scale
def make_grid_centers(im_size, name='make_grid_centers'): '''Make grid of center positions of each pixel. Args: im_size: (height, width) Returns: Tensor grid of size [height, width, 2] as (x, y). ''' with tf.name_scope(name) as scope: size_y, size_x = n_positive_integers(2, im_size) range_y = (tf.to_float(tf.range(size_y)) + 0.5) / float(size_y) range_x = (tf.to_float(tf.range(size_x)) + 0.5) / float(size_x) grid_y, grid_x = tf.meshgrid(range_y, range_x, indexing='ij') # grid = tf.stack((tf.tile(tf.expand_dims(range_x, 0), [size_y, 1]), # tf.tile(tf.expand_dims(range_y, 1), [1, size_x])), axis=-1) return tf.stack((grid_x, grid_y), axis=-1, name=scope)
def test_label_fns(self): response_size = 7 num_scales = 3 translation_stride = 10 log_scale_step = np.log(2) base_target_size = 30 scores_shape = (1, num_scales) + n_positive_integers(2, response_size) + (1,) gt_translation = [-20, -40] gt_size = 60 label_fns = { 'hard': dict( translation_radius_pos=0.2, translation_radius_neg=0.5, scale_radius_pos=1.1, scale_radius_neg=1.3, ), 'hard_binary': dict( translation_radius=0.2, scale_radius=1.2, ), } for name, kwargs in label_fns.items(): with trySubTest(self, label_fn=name): with self.test_session(): label_fn = regress.LABEL_FNS[name] _, labels, weights = label_fn( response_size, num_scales, translation_stride, log_scale_step, base_target_size, _make_constant_batch(gt_translation), _make_constant_batch(gt_size), **kwargs) # labels: [b, s, h, w, c] assert(len(labels.shape) == 5) self.assertAllGreaterEqual(weights, 0) sum_positive = tf.reduce_sum(weights * labels, axis=(-4, -3, -2, -1)) sum_negative = tf.reduce_sum(weights * (1 - labels), axis=(-4, -3, -2, -1)) self.assertAllGreater(sum_positive, 0) self.assertAllGreater(sum_negative, 0)
def crop(im, rect, im_size, pad_value=0, feather=False, feather_margin=0.05, name='crop'): ''' Args: im: [b, h, w, c] rect: [b, 4] im_size: (height, width) pad_value: Either scalar constant or tf.Tensor that is broadcast-compatible with image. ''' with tf.name_scope(name) as scope: if tf.contrib.framework.is_tensor(pad_value): # TODO: This operation seems slow! im -= pad_value im = crop(im, rect, im_size, pad_value=0, feather=feather, feather_margin=feather_margin, name=name) im += pad_value return im if feather: im = feather_image(im, margin=feather_margin, background_value=pad_value) # Use static shape if possible. batch_len = im.shape[0].value or tf.shape(im)[0] return tf.image.crop_and_resize(im, geom.rect_to_tf_box(rect), box_ind=tf.range(batch_len), crop_size=n_positive_integers( 2, im_size), extrapolation_value=pad_value)
def test_compute_loss_map(self): response_size = 7 num_scales = 3 translation_stride = 10 log_scale_step = np.log(2.0) base_target_size = 30 scores_shape = (1, num_scales) + n_positive_integers(2, response_size) + (1,) gt_translation = [-20, -40] gt_size = 60 scores = tf.random.normal(scores_shape, dtype=tf.float32) losses = { 'sigmoid_hard': dict( method='sigmoid', params=dict(balanced=True, label_method='hard', label_params=dict(translation_radius_pos=0.2, translation_radius_neg=0.5, scale_radius_pos=1.1, scale_radius_neg=1.3))), 'sigmoid_hard_binary': dict( method='sigmoid', params=dict(balanced=True, label_method='hard_binary', label_params=dict(translation_radius=0.2, scale_radius=1.2))), } for loss_name, loss_kwargs in losses.items(): with trySubTest(self, loss=loss_name): _, loss = regress.compute_loss_discrete( scores, num_scales, translation_stride, log_scale_step, base_target_size, _make_constant_batch(gt_translation), _make_constant_batch(gt_size), **loss_kwargs) self.assertEqual(len(loss.shape), 1) with self.test_session(): self.assertTrue(np.all(np.isfinite(loss.eval())))
def diag_xcorr(x, f, stride=1, padding='VALID', name='diag_xcorr', **kwargs): ''' Args: x: [b, ..., hx, wx, c] f: [b, hf, wf, c] Returns: [b, ..., ho, wo, c] ''' with tf.name_scope(name) as scope: assert len(f.shape) == 4 if len(x.shape) == 4: x = tf.expand_dims(x, 1) x = diag_xcorr(x, f, stride, padding, name=name, **kwargs) x = tf.squeeze(x, 1) return x if len(x.shape) > 5: # Merge dims 0, (1, ..., n-4), n-3, n-2, n-1 x, restore = merge_dims(x, 0, len(x.shape) - 3) x = diag_xcorr(x, f, stride, padding, name=name, **kwargs) x = restore(x, 1) return x assert len(x.shape) == 5 stride = layer_utils.n_positive_integers(2, stride) # x.shape is [b, n, hx, wx, c] # f.shape is [b, hf, wf, c] # [b, n, hx, wx, c] -> [n, hx, wx, b, c] -> [n, hx, wx, b*c] x, restore = merge_dims(tf.transpose(x, [1, 2, 3, 0, 4]), 3, 5) # [b, hf, wf, c] -> [hf, wf, b, c] -> [hf, wf, b*c] f, _ = merge_dims(tf.transpose(f, [1, 2, 0, 3]), 2, 4) f = tf.expand_dims(f, axis=3) # [hf, wf, b*c, 1] strides = [1, stride[0], stride[1], 1] x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding=padding, **kwargs) # [n, ho, wo, b*c] -> [n, ho, wo, b, c] -> [b, n, ho, wo, c] x = tf.transpose(restore(x, axis=3), [3, 0, 1, 2, 4]) return x
def convolution(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.convolution, # add weights_nomalizer_* options. """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. `convolution` creates a variable called `weights`, representing the convolutional kernel, that is convolved (actually cross-correlated) with the `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the activations. Finally, if `activation_fn` is not `None`, it is applied to the activations as well. Performs a'trous convolution with input stride/dilation rate equal to `rate` if a value > 1 for any dimension of `rate` is specified. In this case `stride` values != 1 are not supported. Args: inputs: a Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape + [in_channels]` if data_format does not start with "NC" (default), or `[batch_size, in_channels] + input_spatial_shape` if data_format starts with "NC". num_outputs: integer, the number of output filters. kernel_size: a sequence of N positive integers specifying the spatial dimensions of of the filters. Can be a single integer to specify the same value for all spatial dimensions. stride: a sequence of N positive integers specifying the stride at which to compute output. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `stride` value != 1 is incompatible with specifying any `rate` value != 1. padding: one of `"VALID"` or `"SAME"`. data_format: A string or None. Specifies whether the channel dimension of the `input` and output is the last dimension (default, or if `data_format` does not start with "NC"), or the second dimension (if `data_format` starts with "NC"). For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For N=3, currently the only valid value is "NDHWC". rate: a sequence of N positive integers specifying the dilation rate to use for a'trous convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any `rate` value != 1 is incompatible with specifying any `stride` value != 1. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional list of collections for all the variables or a dictionary containing a different list of collection per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. Returns: a tensor representing the output of the operation. Raises: ValueError: if `data_format` is invalid. ValueError: both 'rate' and `stride` are not uniformly 1. """ if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC']: raise ValueError('Invalid data_format: %r' % (data_format, )) with variable_scope.variable_scope(scope, 'Conv', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype input_rank = inputs.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3 or input_rank > 5: raise ValueError( 'Rank of inputs is %d, which is not >= 3 and <= 5' % input_rank) conv_dims = input_rank - 2 kernel_size = utils.n_positive_integers(conv_dims, kernel_size) stride = utils.n_positive_integers(conv_dims, stride) rate = utils.n_positive_integers(conv_dims, rate) if data_format is None or data_format.endswith('C'): num_input_channels = inputs.get_shape()[input_rank - 1].value elif data_format.startswith('NC'): num_input_channels = inputs.get_shape()[1].value else: raise ValueError('Invalid data_format') if num_input_channels is None: raise ValueError('Number of in_channels must be known.') weights_shape = (list(kernel_size) + [num_input_channels, num_outputs]) weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) outputs = nn.convolution(input=inputs, filter=weights, dilation_rate=rate, strides=stride, padding=padding, data_format=data_format) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[num_outputs], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def test_invalid_input(self): with self.assertRaises(ValueError): utils.n_positive_integers('3', [1]) with self.assertRaises(ValueError): utils.n_positive_integers(3.3, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(-1, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(0, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [1, 2]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [-1]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [0]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [0]) with self.assertRaises(ValueError): utils.n_positive_integers(2, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(2, [1, 2, 3]) with self.assertRaises(ValueError): utils.n_positive_integers(2, ['hello', 2]) with self.assertRaises(ValueError): utils.n_positive_integers(2, tf.TensorShape([2, 3, 1])) with self.assertRaises(ValueError): utils.n_positive_integers(3, tf.TensorShape([2, None, 1])) with self.assertRaises(ValueError): utils.n_positive_integers(3, tf.TensorShape(None))
def test_invalid_input(self): with self.assertRaises(ValueError): utils.n_positive_integers('3', [1]) with self.assertRaises(ValueError): utils.n_positive_integers(3.3, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(-1, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(0, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [1, 2]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [-1]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [0]) with self.assertRaises(ValueError): utils.n_positive_integers(1, [0]) with self.assertRaises(ValueError): utils.n_positive_integers(2, [1]) with self.assertRaises(ValueError): utils.n_positive_integers(2, [1, 2, 3]) with self.assertRaises(ValueError): utils.n_positive_integers(2, ['hello', 2]) with self.assertRaises(ValueError): utils.n_positive_integers(2, tensor_shape.TensorShape([2, 3, 1])) with self.assertRaises(ValueError): utils.n_positive_integers(3, tensor_shape.TensorShape([2, None, 1])) with self.assertRaises(ValueError): utils.n_positive_integers(3, tensor_shape.TensorShape(None))
def conv_2d(x, n_units, kernel_size, stride=1, dilation=None, padding="SAME", use_bias=True, use_batch_norm=False, activation_fn=tf.nn.relu, weight_decay=0.0005, trainable=True, reuse=None, is_training=None, weights_initializer=None, biases_initializer=None, bn_initializer=None, name='conv2d'): """Convolution wrapper.""" with tf.variable_scope(name, 'Conv2D', [x], reuse=reuse) as sc: dtype = x.dtype.base_dtype input_rank = x.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3: raise ValueError('Rank of inputs is %d, which is < 3' % input_rank) if input_rank == 3: x = tf.expand_dims(x, 3) # Kernel dimensions kernel_size = utils.n_positive_integers(2, kernel_size) w_shape = list(kernel_size) + [x.get_shape().as_list()[-1], n_units] if len(w_shape) < input_rank: w_shape = [1] * (input_rank - len(w_shape)) + w_shape # Create variable for kernel w_shape, weights_initializer = var_initializer(w_shape, weights_initializer) weights_regularizer = l2_regularizer( weight_decay) if weight_decay > 0 and trainable else None weights = variables.model_variable('weights', shape=w_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable) # Convolution stride = utils.n_positive_integers(2, stride) if len(stride) < input_rank - 2: stride = (1, ) * (input_rank - len(stride) - 2) + stride if dilation is not None: dilation = utils.n_positive_integers(2, dilation) if len(dilation) < input_rank - 2: dilation = (1, ) * (input_rank - len(dilation) - 2) + dilation x = tf.nn.convolution(input=x, filter=weights, strides=stride, dilation_rate=dilation, padding=padding) # Batch normalization if use_batch_norm: x = _bn(x, decay=0.99, scale=True, param_initializers=bn_initializer, is_training=is_training, trainable=trainable, reuse=reuse, scope='bn') # Bias elif use_bias: x = add_bias(x, n_units, biases_initializer, dtype, trainable) # Activation if activation_fn is not None: x = activation_fn(x) return utils.collect_named_outputs(tf.GraphKeys.ACTIVATIONS, sc.original_name_scope, x)
def deconv_2d(x, n_units, kernel_size, stride=1, use_bias=True, padding="SAME", activation_fn=tf.nn.relu, weight_decay=0.0005, trainable=True, reuse=None, weights_initializer=None, biases_initializer=None, name='deconv2d'): """Deconvolution wrapper.""" with tf.variable_scope(name, 'Deconv2D', [x], reuse=reuse) as sc: dtype = x.dtype.base_dtype input_rank = x.get_shape().ndims if input_rank is None: raise ValueError('Rank of inputs must be known') if input_rank < 3: raise ValueError('Rank of inputs is %d, which is < 3' % input_rank) if input_rank == 3: x = tf.expand_dims(x, 3) kernel_size = utils.n_positive_integers(2, kernel_size) w_shape = list(kernel_size) + [n_units, x.get_shape().as_list()[-1]] if len(w_shape) < input_rank: w_shape = [1] * (input_rank - len(w_shape)) + w_shape # print w_shape # Create variable for kernel w_shape, weights_initializer = var_initializer(w_shape, weights_initializer) weights_regularizer = l2_regularizer( weight_decay) if weight_decay > 0 and trainable else None weights = variables.model_variable('weights', shape=w_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, trainable=trainable) # print weights # print ' * {:15s} | {:20s} | {:10s}'.format(name+' W', str(weights.get_shape()), str(weights.dtype)) # Deconvolution sz = x.get_shape().as_list() stide = utils.n_positive_integers(2, stride) output_shape = (sz[0], sz[1] * stride[0] + kernel_size[0] - stride[0], sz[2] * stride[1] + kernel_size[1] - stride[1], n_units) x = tf.nn.conv2d_transpose(x, weights, output_shape, strides=[1, stride[0], stride[1], 1], padding=padding) # print x # Bias if use_bias: x = add_bias(x, n_units, biases_initializer, dtype, trainable) # print x # Activation if activation_fn is not None: x = activation_fn(x) # print x return utils.collect_named_outputs(tf.GraphKeys.ACTIVATIONS, sc.original_name_scope, x)