def fully_connected(input_tensor: tf.Tensor, num_outputs: int, dtype: Optional[Any] = tf.float16, name: Optional[str] = 'fc', *_) -> tf.Tensor: """Applies fully connected layer to `input_tensor`. Args: input_tensor: 2D Tensor of dimensions [batch, in_units] num_outputs: Number of output units dtype: Data type of parameters name: Optional name for this operation Returns: A 2-D Tensor computing matmul(x, weights) + biases, dimensions [batch, num_outputs] """ num_inputs = input_tensor.get_shape()[1] w_init = contrib.layers.xavier_initializer(dtype=dtype) b_init = tf.constant_initializer(0.0, dtype=dtype) with tf.variable_scope(name): weights = tf.get_variable('kernel', shape=[num_inputs, num_outputs], initializer=w_init, dtype=dtype) biases = tf.get_variable('bias', shape=[num_outputs], initializer=b_init, dtype=dtype) return tf.nn.xw_plus_b(input_tensor, weights, biases, name=name)
def embedding_with_vars(spec: MatmulSpec, indices: tf.Tensor, matmul_options: dict, trainable_nz: tf.Variable, metainfo: tf.Variable, embedding_grad_scale: float) -> tf.Tensor: """Returns the tensor containing the embedded sequence using the variables from an existing sparse fully connected layer. :param spec: Matmul specification that contains shape and block size for the tied fc layer. :param indices: The tensor holding the embedding indices. :param matmul_options: Options for the sparse matmul operation of the tied fc layer. :param trainable_nz: The trainable nonzero values variable for the tied fc layer. :param metainfo: The metainfo variable corresponding to the tied fc layer. :param embedding_grad_scale: Scalar value with which to scale the gradient. """ num_tokens = indices.get_shape()[0] result_shape = tf.TensorShape([num_tokens, spec.input_size]) outputs = { "output_types": [spec.data_type], "output_shapes": [result_shape], } json_args = get_json_args(spec, matmul_options, embedding_grad_scale) inputs = [indices, metainfo, trainable_nz] with_grads = [2] # No grads wanted for indices or metainfo return ipu.custom_ops.precompiled_user_op( inputs, library_path=get_lib_path("sparse_embedding"), outs=outputs, inputs_with_gradients=with_grads, attributes=json_args, gradient_attributes=json_args, separate_gradients=True)
def correct_pad(inputs: tf.Tensor, kernel_size): """Returns a tuple for zero-padding for 2D convolution with downsampling. Args: inputs: An integer or tuple/list of 2 integers. kernel_size: An integer or tuple/list of 2 integers. Returns: A tuple. """ input_size = inputs.get_shape().as_list()[1:3] if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) if input_size[0] is None: adjust = (1, 1) else: adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) correct = (kernel_size[0] // 2, kernel_size[1] // 2) return ((correct[0] - adjust[0], correct[0]), (correct[1] - adjust[1], correct[1]))
def conv2d(input: tf.Tensor, output_dim: int, kernel_width: int = 5, kernel_height: int = 5, horizontal_stride: int = 2, vertical_stride: int = 2, weight_initializer: Optional[Initializer] = None, bias_initializer: Optional[Initializer] = None, name: str = "conv2d"): """ Apply a 2D-convolution to a tensor. Parameters ---------- input: tf.Tensor The tensor to which the convolution should be applied. Must be of shape [batch_size, height, width, channels] output_dim: int The number of convolutional filters kernel_width: int, optional The width of the convolutional filters (default 5) kernel_height: int, optional The height of the convolutional filters (default 5) horizontal_stride: int, optional The horizontal stride of the convolutional filters (default 2) vertical_stride: int, optional The vertical stride of the convolutional filters (default 2) weight_initializer: tf.Initializer, optional A custom initializer for the weight matrices of the filters bias_initializer: tf.Initializer, optional A custom initializer for the bias vectors of the filters name: str, optional A name for the operation (default "conv2d") Returns ------- tf.Tensor The result of applying a 2D-convolution to the input tensor. """ shape = input.get_shape().as_list() with tf.variable_scope(name): weights = tf.get_variable( name="weights", shape=[kernel_height, kernel_width, shape[-1], output_dim], initializer=weight_initializer) bias = tf.get_variable(name="bias", shape=[output_dim], initializer=bias_initializer) conv = tf.nn.conv2d(input, filter=weights, strides=[1, vertical_stride, horizontal_stride, 1], padding='SAME') conv = tf.nn.bias_add(conv, bias) return conv
def crop(input_tensor: tf.Tensor, cropping: Tuple[Tuple[int, int], Tuple[int, int]]): """Crop input along width and height dimensions, assumes channels_last. Args: input_tensor: Input to be cropped. cropping: Start and stop index along height and width. Returns: Cropped tensor. """ _, rows, cols, _ = input_tensor.get_shape().as_list() return input_tensor[:, cropping[0][0]:rows - cropping[0][1], cropping[1][0]:cols - cropping[1][1], :]
def se_block(input_feature: tf.Tensor, name: str, ratio: int = 8) -> tf.Tensor: """Implementation of Squeeze-and-Excitation (SE) block as described in https://arxiv.org/abs/1709.01507. Args: input_feature: tf.Tensor to SE block. name: str defining name of SE block. ratio: int defining size of the bottleneck layer. Returns: output: tf.Tensor after feature recalibation using SE block. """ kernel_initializer = tf.variance_scaling_initializer() bias_initializer = tf.constant_initializer(value=0.0) with tf.variable_scope(name): channel = input_feature.get_shape()[-1] # Spatial Squeeze squeeze = tf.reduce_mean(input_feature, axis=[1, 2], keepdims=False) assert squeeze.get_shape()[1:] == (channel) # Excitation excitation = slim.fully_connected( inputs=squeeze, num_outputs=int(channel // ratio), activation_fn=tf.nn.relu, weights_initializer=kernel_initializer, biases_initializer=bias_initializer, scope='bottleneck_fc') assert excitation.get_shape()[1:] == (channel // ratio) excitation = slim.fully_connected( inputs=excitation, num_outputs=int(channel), activation_fn=tf.nn.sigmoid, weights_initializer=kernel_initializer, biases_initializer=bias_initializer, scope='recover_fc') assert excitation.get_shape()[1:] == (channel) excitation = tf.expand_dims(excitation, axis=1) excitation = tf.expand_dims(excitation, axis=1) assert excitation.get_shape()[1:] == (1, 1, channel) output = input_feature * excitation return output
def linear(input: tf.Tensor, output_size: int, weight_initializer: Optional[Initializer] = None, bias_initializer: Optional[Initializer] = None, name: str = "linear") -> tf.Tensor: """ Apply a linear transformation to a tensor. Parameters ---------- input: tf.Tensor The tensor which should be linearly transformed output_size: int The desired output size of the linear transformation weight_initializer: tf.Initializer, optional A custom initializer for the weight matrix of the linear transformation bias_initializer: tf.Initializer, optional A custom initializer for the bias vector of the linear transformation name: str, optional A name for the operation (default "linear") Returns ------- tf.Tensor The linearly transformed input tensor """ shape = input.get_shape().as_list() with tf.variable_scope(name): weights = tf.get_variable(name="weights", shape=[shape[-1], output_size], dtype=tf.float32, initializer=weight_initializer) bias = tf.get_variable(name="bias", shape=[output_size], initializer=bias_initializer) return tf.matmul(input, weights) + bias
def _create_dense(input_tensor: tf.Tensor, net_size: list, start_layer_id: int, layer_name: str, w_dir: dict, b_dir: dict): """ 创建指定网络结构的CNN网络(默认各层都使用relu) 输出层需要另外建立 :param input_tensor: 网络输入, 要求为一个tensor:shape=[1, n] :param net_size: 网络规格, 不包括输入层 :return: tensor """ this_input = input_tensor input_node_num = input_tensor.get_shape().as_list().pop() layer_id = start_layer_id for this_node_num in net_size: with tf.variable_scope(layer_name + str(layer_id)): w = tf.get_variable('w' + str(layer_id), [input_node_num, this_node_num], **w_dir) b = tf.get_variable('b' + str(layer_id), [1, this_node_num], **b_dir) layer = tf.nn.relu(tf.matmul(this_input, w) + b) this_input = layer input_node_num = this_node_num layer_id += 1 return this_input
def depthwise_conv(input_tensor: tf.Tensor, kernel_size: Union[int, Tuple[int, int]], filters_out: Optional[int] = None, stride: Optional[int] = 1, padding: Optional[str] = 'SAME', add_bias: Optional[bool] = True, dtype: Optional[Any] = tf.float16, name: Optional[str] = None, *_): """Apply depthwise conv and optional bias on input tensor with Tensorflow. Performs a depthwise convolution Args: input_tensor: Input data kernel_size: Filter size (assumes equal height and width) filters_out: Number of output filters stride: Stride of the filter padding: Type of padding to use add_bias: Should bias be added dtype: Data type of parameters name: Optional name for this op Returns: Output of convolution operator. """ # Assumes input in NHWC format. filters_in = input_tensor.get_shape()[-1] if isinstance(kernel_size, int): depthwise_kernel_shape = [kernel_size, kernel_size, filters_in, 1] else: depthwise_kernel_shape = kernel_size + (filters_in, 1) w_init = contrib.layers.xavier_initializer(dtype=dtype) name_scope = tf.get_default_graph().get_name_scope() if name_scope not in ["", None]: name = name_scope + "/" + name with tf.get_default_graph().as_default(): with tf.variable_scope(name): depthwise_kernel = tf.get_variable('depthwise_kernel', shape=depthwise_kernel_shape, initializer=w_init, dtype=dtype) output_tensor = tf.nn.depthwise_conv2d(input_tensor, depthwise_kernel, strides=[1, stride, stride, 1], padding=padding.upper()) if add_bias: if filters_out: b_shape = [filters_out] else: b_shape = [filters_in] b_init = tf.zeros_initializer() with tf.variable_scope(name): biases = tf.get_variable('conv/bias', shape=b_shape, initializer=b_init, dtype=dtype) output_tensor += biases return output_tensor
def conv(input_tensor: tf.Tensor, kernel_size: Union[int, Tuple[int, int]], filters_out: int, stride: Optional[int] = 1, padding: Optional[str] = 'SAME', add_bias: Optional[bool] = True, dtype: Optional[Any] = tf.float16, name: Optional[str] = None, weight_suffix: Optional[str] = "kernel", bias_suffix: Optional[str] = "conv/bias", *_): """Apply conv and optional bias on input tensor with Tensorflow. Args: input_tensor: Input data kernel_size: Filter size (assumes equal height and width) filters_out: Number of output filters stride: Stride of the filter padding: Type of padding to use add_bias: Should bias be added dtype: Data type of parameters name: Optional name for this op Returns: Output of convolution operator. """ # Assumes input in NHWC format. filters_in = input_tensor.get_shape()[-1] if isinstance(kernel_size, int): w_shape = [kernel_size, kernel_size, filters_in, filters_out] else: w_shape = kernel_size + (filters_in, filters_out) w_init = contrib.layers.xavier_initializer(dtype=dtype) if name is None: name = unique_object_name("conv2d", zero_based=True) name_scope = tf.get_default_graph().get_name_scope() if name_scope not in ["", None]: name = name_scope + "/" + name with tf.get_default_graph().as_default(): with tf.variable_scope(name): weights = tf.get_variable(weight_suffix, shape=w_shape, initializer=w_init, dtype=dtype) output_tensor = tf.nn.conv2d(input_tensor, weights, [1, stride, stride, 1], padding=padding.upper(), name=name) if add_bias: b_shape = [filters_out] b_init = tf.zeros_initializer() with tf.variable_scope(name): biases = tf.get_variable(bias_suffix, shape=b_shape, initializer=b_init, dtype=dtype) output_tensor += biases return output_tensor
def stateless_dropout(x: tf.Tensor, rate: float, seed: tf.Tensor, noise_shape: Optional[Union[Sequence[int], tf.TensorShape]] = None, name: Optional[Text] = None) -> tf.Tensor: """Computes dropout: randomly sets elements to zero to prevent overfitting. See https://www.tensorflow.org/api_docs/python/tf/nn/dropout. This version differs in that the seed is required if the rate is nonzero. Args: x: A floating point tensor. rate: A scalar `Tensor` with the same type as x. The probability that each element is dropped. For example, setting rate=0.1 would drop 10% of input elements. seed: A shape [2] integer Tensor of seeds to the random number generator. Must have dtype `tf.int32` when compiling to XLA. noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for randomly generated keep/drop flags. name: A name for this operation (optional). Returns: A `Tensor` of the same shape of `x`. Raises: ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating point tensor. `rate=1` is disallowed, because the output would be all zeros, which is likely not what was intended. """ with tf.name_scope(name or 'stateless_dropout') as name: x = tf.convert_to_tensor(x, name='x') if not x.dtype.is_floating: raise ValueError( 'x has to be a floating point tensor since it\'s going ' ' to be scaled. Got a %s tensor instead.' % x.dtype) if isinstance(rate, numbers.Real): if not (rate >= 0 and rate < 1): raise ValueError( 'rate must be a scalar tensor or a float in the ' 'range [0, 1), got %g' % rate) if rate > 0.5: logging.log_first_n( logging.WARN, 'Large dropout rate: %g (>0.5). In TensorFlow ' '.x, dropout() uses dropout rate instead of keep_prob. ' 'Please ensure that this is intended.', 5, rate) # Early return if nothing needs to be dropped. if tf.get_static_value(rate) == 0: return x rate = tf.convert_to_tensor(rate, dtype=x.dtype, name='rate') rate.shape.assert_has_rank(0) noise_shape = _get_noise_shape(x, noise_shape) # Sample a uniform distribution on [0.0, 1.0) and select values larger than # rate. # # NOTE: Random uniform actually can only generate 2^23 floats on [1.0, 2.0) # and subtract 1.0. random_tensor = tf.random.stateless_uniform(noise_shape, seed=seed, dtype=x.dtype) keep_prob = 1 - rate scale = 1 / keep_prob # NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that # float to be selected, hence we use a >= comparison. keep_mask = random_tensor >= rate ret = x * scale * tf.cast(keep_mask, x.dtype) if not tf.executing_eagerly(): ret.set_shape(x.get_shape()) return ret
def deconv2d(input: tf.Tensor, output_shape: Sequence[Union[int, tf.Tensor]], kernel_width: int = 5, kernel_height: int = 5, horizontal_stride: int = 2, vertical_stride: int = 2, weight_initializer: Optional[Initializer] = None, bias_initializer: Optional[Initializer] = None, name: str = "deconv2d"): """ Applies a 2D-deconvolution to a tensor. Parameters ---------- input: tf.Tensor The tensor to which a 2D-deconvolution should be applied. Must be of shape [batch_size, height, width, channels] output_shape: list of int or tf.Tensor The desired output shape. kernel_width: int, optional The width of the convolutional filters (default 5) kernel_height: int, optional The height of the convolutional filters (default 5) horizontal_stride: int, optional The horizontal stride of the convolutional filters (default 2) vertical_stride: int, optional The vertical stride of the convolutional filters (default 2) weight_initializer: tf.Initializer, optional A custom initializer for the weight matrices of the filters bias_initializer: tf.Initializer, optional A custom initializer for the bias vectors of the filters name: str, optional A name for the operation (default "deconv2d") Returns ------- tf.Tensor The result of applying a 2D-deconvolution to the input tensor """ shape = input.get_shape().as_list() with tf.variable_scope(name): # filter : [height, width, output_channels, in_channels] weights = tf.get_variable( name="weights", shape=[kernel_height, kernel_width, output_shape[-1], shape[-1]], initializer=weight_initializer) biases = tf.get_variable(name="bias", shape=[output_shape[-1]], initializer=bias_initializer) deconv = tf.nn.conv2d_transpose( input, filter=weights, output_shape=output_shape, strides=[1, vertical_stride, horizontal_stride, 1]) deconv = tf.nn.bias_add(deconv, biases) deconv.set_shape([None] + output_shape[1:]) return deconv