def call(self, inputs, training=False): """Computes a forward pass. Args: inputs: An input tensor. training: A boolean indicating whether the call is for training or not. Returns: An output tensor and a list of output activations from all the layers. """ activations = {} x = inputs for block in self.blocks: x = block(x, training=training) activations[block.name] = x output = activations['embedder'] if len(self.embedder_output_shape) > 1: output = data_utils.recursively_expand_dims( output, axes=[-1] * (len(self.embedder_output_shape) - 1)) output = data_utils.reshape_by_last_dims( output, last_dim_shape=self.embedder_output_shape) activations['embedder'] = output return output, activations
def multi_head_logits(input_features, output_sizes, name, **kwargs): """Builds a multi-head logit layer with potential bottleneck layer. Args: input_features: A tensor for input features. Shape = [..., sequence_length, feature_dim]. output_sizes: A dictionary for output sizes in the format {output_name: output_size}, where `output_size` can be an integer or a list. name: A string for the name scope. **kwargs: A dictionary for additional arguments. Supported arguments include `num_hidden_nodes`, `weight_initializer`, `bias_initializer`, `weight_max_norm`, `use_batch_norm`, `dropout_rate`, `num_fcs_per_block`, and `num_fc_blocks`. Returns: outputs: A dictionary for the output logits. """ outputs = {} for output_name, output_size in output_sizes.items(): if isinstance(output_size, int): output_size = [output_size] outputs[output_name] = linear( input_features, output_size=np.prod(output_size), weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get('weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/OutputLogits/' + output_name) if len(output_size) > 1: outputs[output_name] = data_utils.recursively_expand_dims( outputs[output_name], axes=[-1] * (len(output_size) - 1)) outputs[output_name] = data_utils.reshape_by_last_dims( outputs[output_name], last_dim_shape=output_size) return outputs
def test_reshape_by_last_dims(self): # Shape = [2, 4, 1]. x = tf.constant([[[1], [2], [3], [4]], [[5], [6], [7], [8]]]) # Shape = [2, 2, 2] reshaped_x = data_utils.reshape_by_last_dims(x, last_dim_shape=[2, 2]) self.assertAllEqual(reshaped_x, [[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
def simple_model(input_features, output_sizes, is_training, name='SimpleModel', num_bottleneck_nodes=0, **kwargs): """Implements `simple base` model with outputs. Note that the code differs from the original architecture by disabling dropout and maximum weight norms by default. Args: input_features: A tensor for input features. Shape = [..., feature_dim]. output_sizes: A dictionary for output sizes in the format {output_name: output_size}, where `output_size` can be an integer or a list. is_training: A boolean for whether it is in training mode. name: A string for the name scope. num_bottleneck_nodes: An integer for size of the bottleneck layer to be added before the output layer(s). No bottleneck layer will be added if non-positive. **kwargs: A dictionary of additional arguments passed to `simple_base`. Returns: outputs: A dictionary for output tensors in the format {output_name: output_tensors}. Output tensor shape = [..., output_size]. activations: A dictionary of addition activation tensors for pre-output model activations. Keys include 'base_activations' and optionally 'bottleneck_activations'. """ net = simple_base(input_features, is_training=is_training, name=name, **kwargs) activations = {'base_activations': net} if num_bottleneck_nodes > 0: net = linear(net, output_size=num_bottleneck_nodes, weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get( 'weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/BottleneckLogits') activations['bottleneck_activations'] = net outputs = {} for output_name, output_size in output_sizes.items(): if isinstance(output_size, int): output_size = [output_size] outputs[output_name] = linear( net, output_size=np.prod(output_size), weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get('weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/OutputLogits/' + output_name) if len(output_size) > 1: outputs[output_name] = data_utils.recursively_expand_dims( outputs[output_name], axes=[-1] * (len(output_size) - 1)) outputs[output_name] = data_utils.reshape_by_last_dims( outputs[output_name], last_dim_shape=output_size) return outputs, activations