Exemplo n.º 1
0
  def __init__(self, params, input_ids, target_ids=None, training=None):
    """Constructor for TransformerModel.

    Args:
      params: `BigBirdConfig` dictionary.
      # Run the inputs through the encoder layer to map the symbol
      # representations to continuous representations.
    """
    self.params = copy.deepcopy(params)
    self.scope = params["scope"]

    with tf.compat.v1.variable_scope(
        self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs:
      self.embeder = utils.EmbeddingLayer(
          vocab_size=self.params["vocab_size"],
          emb_dim=self.params["hidden_size"],
          initializer=utils.create_initializer(
              self.params["initializer_range"]),
          scale_emb=self.params["rescale_embedding"],
          use_token_type=False,
          num_token_types=None,
          use_position_embeddings=True,
          max_position_embeddings=self.params["max_position_embeddings"],
          dropout_prob=self.params["hidden_dropout_prob"])

      # encoder
      self.encoder = encoder.EncoderStack(self.params)
      self.encoder_output, encoder_mask = self._encode(input_ids, training)

      # decoder
      self.decoder = decoder.DecoderStack(self.params)
      self.predictions = self._decode_and_predict(target_ids, self.encoder_output,
                                             encoder_mask, training)

      super(TransformerModel, self).__init__(name=self.scope, _scope=vs)
Exemplo n.º 2
0
    def __init__(self, params):
        """Constructor for BertModel.

    Args:
      params: `BigBirdConfig` dictionary.
    """
        self.params = copy.deepcopy(params)
        self.scope = params["scope"]

        with tf.compat.v1.variable_scope(self.scope,
                                         reuse=tf.compat.v1.AUTO_REUSE) as vs:
            self.embeder = utils.EmbeddingLayer(
                vocab_size=self.params["vocab_size"],
                emb_dim=self.params["hidden_size"],
                initializer=utils.create_initializer(
                    self.params["initializer_range"]),
                scale_emb=self.params["rescale_embedding"],
                use_token_type=True,
                num_token_types=self.params["type_vocab_size"],
                use_position_embeddings=True,
                max_position_embeddings=self.params["max_position_embeddings"],
                dropout_prob=self.params["hidden_dropout_prob"])
            self.encoder = encoder.EncoderStack(self.params)
            self.pooler = tf.compat.v1.layers.Dense(
                units=self.params["hidden_size"],
                activation=tf.tanh,
                kernel_initializer=utils.create_initializer(
                    self.params["initializer_range"]),
                name="pooler/dense")
            super(BertModel, self).__init__(name=self.scope, _scope=vs)
Exemplo n.º 3
0
    def __init__(self, params):
        """Constructor for TransformerModel.

    Args:
      params: `BigBirdConfig` dictionary.
    """
        self.params = copy.deepcopy(params)
        self.scope = params["scope"]

        with tf.compat.v1.variable_scope(self.scope,
                                         reuse=tf.compat.v1.AUTO_REUSE) as vs:
            self.embeder = utils.EmbeddingLayer(
                vocab_size=self.params["vocab_size"],
                emb_dim=self.params["hidden_size"],
                initializer=utils.create_initializer(
                    self.params["initializer_range"]),
                scale_emb=self.params["rescale_embedding"],
                use_token_type=False,
                num_token_types=None,
                use_position_embeddings=True,
                max_position_embeddings=self.params["max_position_embeddings"],
                dropout_prob=self.params["hidden_dropout_prob"])
            self.encoder = encoder.EncoderStack(self.params)
            self.decoder = decoder.DecoderStack(self.params)
            super(TransformerModel, self).__init__(name=self.scope, _scope=vs)
Exemplo n.º 4
0
    def __init__(self, params):
        """Constructor for BertModel.

    Args:
      params: `BigBirdConfig` dictionary.
    """
        self.params = copy.deepcopy(params)
        self.scope = params["scope"]
        super(BertModel, self).__init__(name=self.scope)

        # validate params
        self.pad = lambda x: x
        if params["max_encoder_length"] <= 512:
            logging.info("Switching to full attention for short sequences")
            self.params["attention_type"] = "original_full"
        if self.params["attention_type"] == "simulated_sparse" or self.params[
                "attention_type"] == "block_sparse":
            if params["max_encoder_length"] % params["block_size"]:
                logging.info(
                    "Expand max_encoder_length to next multiple of block_size")
                self.params["max_encoder_length"] = (
                    params["max_encoder_length"] // params["block_size"] +
                    1) * params["block_size"]
                pad_size = self.params["max_encoder_length"] - params[
                    "max_encoder_length"]
                paddings = [[0, 0], [0, pad_size]]
                self.pad = lambda x: tf.pad(x, paddings)

        with tf.compat.v1.variable_scope(self.scope,
                                         reuse=tf.compat.v1.AUTO_REUSE):
            self.embeder = utils.EmbeddingLayer(
                vocab_size=self.params["vocab_size"],
                emb_dim=self.params["hidden_size"],
                initializer=utils.create_initializer(
                    self.params["initializer_range"]),
                scale_emb=self.params["rescale_embedding"],
                use_token_type=True,
                num_token_types=self.params["type_vocab_size"],
                use_position_embeddings=True,
                max_position_embeddings=self.params["max_position_embeddings"],
                dropout_prob=self.params["hidden_dropout_prob"])
            self.encoder = encoder.EncoderStack(self.params)
            self.pooler = utils.SimpleDenseLayer(
                input_size=self.params["hidden_size"],
                output_size=self.params["hidden_size"],
                initializer=utils.create_initializer(
                    self.params["initializer_range"]),
                activation=tf.tanh,
                name="pooler/dense")
Exemplo n.º 5
0
  def __init__(self, params,
               input_ids,
               token_type_ids=None,
               training=None):

    """Constructor for BertModel.

    Args:
      params: `BigBirdConfig` dictionary.
      input_ids: int32 Tensor of shape [batch_size, seq_length].
      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
      training: Boolean indicating whether the call is training or inference.

    """
    self.params = copy.deepcopy(params)
    self.scope = params["scope"]

    with tf.compat.v1.variable_scope(
        self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs:

        #token type 의 embedding을 위해서 따로 token type을 만들지 않았다면 모두 0번 token([CLS])의 embedding을 만들어줌
        if token_type_ids is None:
            token_type_ids = tf.zeros_like(input_ids, dtype=tf.int32)

        # input_ids의 input_mask를 생성하는 부분
        input_mask = tf.where(input_ids > 0,
                              tf.ones_like(input_ids), tf.zeros_like(input_ids))
        # 1) embedding process
        # 1-1 embedding layer정의
        self.embeder = utils.EmbeddingLayer(
          vocab_size=self.params["vocab_size"], # 50358
          emb_dim=self.params["hidden_size"], # 768
          initializer=utils.create_initializer(
              self.params["initializer_range"]), #초기화 0.02 truncated_normal_initializer 사
          scale_emb=self.params["rescale_embedding"], # false
          use_token_type=True,
          num_token_types=self.params["type_vocab_size"], # 2
          use_position_embeddings=True, # position embedding 실행
          max_position_embeddings=self.params["max_position_embeddings"],# 4096
          dropout_prob=self.params["hidden_dropout_prob"]) # drop out 10%

        # 1-2 embedding layer 사용 token + token_type + position
        embedding_output = self.embeder.operation(input_ids,
                                        self.params["max_encoder_length"],
                                        token_type_ids=token_type_ids,
                                        training=training)


        # 2) encoder 레이어 정의
        # 2-1 encoder 레이어 정의
        self.encoder = encoder.EncoderStack(self.params)

        # 2-2 encoder 계산 Sparse Attention
        self.sequence_output = self.encoder.operation(embedding_output, input_mask, training)

        # 3) Pooling 레이어 정의

        # The "pooler" converts the encoded sequence tensor of shape
        # [batch_size, seq_length, hidden_size] to a tensor of shape
        # [batch_size, hidden_size]. This is necessary for segment-level
        # (or segment-pair-level) classification tasks where we need a fixed
        # dimensional representation of the segment.
        first_token_tensor = self.sequence_output[:, 0, :] # [CLS] token에 대한 attetion 값을 가져(4, 768)

        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token. We assume that this has been pre-trained
        # 마지막 768 만큼 dense  계산
        self.pooler = tf.compat.v1.layers.Dense(
          units=self.params["hidden_size"],
          activation=tf.tanh,
          kernel_initializer=utils.create_initializer(
              self.params["initializer_range"]),
          name="pooler/dense") # 결과 -> (4, 786)

        self.pooled_output = self.pooler(first_token_tensor)

        super(BertModel, self).__init__(name=self.scope, _scope=vs)