示例#1
0
 def __init__(self,
              num_hidden_layers=12,
              hidden_size=768,
              num_attention_heads=12,
              intermediate_size=3072,
              intermediate_activation="gelu",
              hidden_dropout_prob=0.0,
              attention_probs_dropout_prob=0.0,
              initializer_range=0.02,
              backward_compatible=False,
              float_type=tf.float32,
              shared_type="all",
              **kwargs):
     super(Transformer, self).__init__(**kwargs)
     self.num_hidden_layers = num_hidden_layers
     self.hidden_size = hidden_size
     self.num_attention_heads = num_attention_heads
     self.intermediate_size = intermediate_size
     self.intermediate_activation = tf_utils.get_activation(
         intermediate_activation)
     self.hidden_dropout_prob = hidden_dropout_prob
     self.attention_probs_dropout_prob = attention_probs_dropout_prob
     self.initializer_range = initializer_range
     self.backward_compatible = backward_compatible
     self.float_type = float_type
     self.shared_type = shared_type
示例#2
0
    def __init__(self,
                 hidden_size=768,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 intermediate_activation="gelu",
                 hidden_dropout_prob=0.0,
                 attention_probs_dropout_prob=0.0,
                 initializer_range=0.02,
                 backward_compatible=False,
                 float_type=tf.float32,
                 **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.hidden_size = hidden_size
        self.num_attention_heads = num_attention_heads
        self.intermediate_size = intermediate_size
        self.intermediate_activation = tf_utils.get_activation(
            intermediate_activation)
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.initializer_range = initializer_range
        self.backward_compatible = backward_compatible
        self.float_type = float_type

        if self.hidden_size % self.num_attention_heads != 0:
            raise ValueError(
                "The hidden size (%d) is not a multiple of the number of attention "
                "heads (%d)" % (self.hidden_size, self.num_attention_heads))
        self.attention_head_size = int(self.hidden_size /
                                       self.num_attention_heads)
示例#3
0
def _get_transformer_encoder(bert_config,
                             sequence_length,
                             float_dtype=tf.float32):
    """Gets a 'TransformerEncoder' object.

    Args:
      bert_config: A 'modeling.BertConfig' object.
      sequence_length: Maximum sequence length of the training data.
      float_dtype: tf.dtype, tf.float32 or tf.float16.

    Returns:
      A networks.TransformerEncoder object.
    """
    return networks.TransformerEncoder(
        vocab_size=bert_config.vocab_size,
        hidden_size=bert_config.hidden_size,
        num_layers=bert_config.num_hidden_layers,
        num_attention_heads=bert_config.num_attention_heads,
        intermediate_size=bert_config.intermediate_size,
        activation=tf_utils.get_activation('gelu'),
        dropout_rate=bert_config.hidden_dropout_prob,
        attention_dropout_rate=bert_config.attention_probs_dropout_prob,
        sequence_length=sequence_length,
        max_sequence_length=bert_config.max_position_embeddings,
        type_vocab_size=bert_config.type_vocab_size,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
        float_dtype=float_dtype.name)
    def build(self, unused_input_shapes):
        """Implements build() for the layer."""
        self.output_bias = self.add_weight(
            shape=[self.config.vocab_size],
            name='predictions/output_bias',
            initializer=tf.keras.initializers.Zeros())
        self.lm_dense = tf.keras.layers.Dense(
            self.config.embedding_size,
            activation=tf_utils.get_activation(self.config.hidden_act),
            kernel_initializer=self.initializer,
            name='predictions/transform/dense')
        self.lm_layer_norm = tf.keras.layers.LayerNormalization(
            axis=-1, epsilon=1e-12, name='predictions/transform/LayerNorm')

        # Next sentence binary classification dense layer including bias to match
        # TF1.x BERT variable shapes.
        with tf.name_scope('seq_relationship'):
            self.next_seq_weights = self.add_weight(
                shape=[self.num_next_sentence_label, self.config.hidden_size],
                name='output_weights',
                initializer=self.initializer)
            self.next_seq_bias = self.add_weight(
                shape=[self.num_next_sentence_label],
                name='output_bias',
                initializer=tf.keras.initializers.Zeros())
        super(TinyBertPretrainLayer, self).build(unused_input_shapes)
示例#5
0
def get_transformer_encoder(bert_config, sequence_length):
    """get transformer encoder model
    """
    kwargs = dict(
        vocab_size=bert_config.vocab_size,
        hidden_size=bert_config.hidden_size,
        num_hidden_layers=bert_config.num_hidden_layers,
        num_attention_heads=bert_config.num_attention_heads,
        intermediate_size=bert_config.intermediate_size,
        activation=tf_utils.get_activation(bert_config.hidden_act),
        dropout_rate=bert_config.hidden_dropout_prob,
        attention_dropout_rate=bert_config.attention_probs_dropout_prob,
        sequence_length=sequence_length,
        max_sequence_length=bert_config.max_position_embeddings,
        type_vocab_size=bert_config.type_vocab_size,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range),
        name="transformer_encoder")
    if isinstance(bert_config, AlbertConfig):
        kwargs['embedding_width'] = bert_config.embedding_size
        kwargs['num_hidden_groups'] = bert_config.num_hidden_groups
        return layers.AlbertTransformerEncoder(**kwargs)
    else:
        assert isinstance(bert_config, BertConfig)
        return layers.TransformerEncoder(**kwargs)