Пример #1
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        super(BERTEncoder, self).__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        with tf.variable_scope(self.variable_scope):

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams=self._hparams.embed)

            # Segment embedding for each type of tokens
            self.segment_embedder = WordEmbedder(
                vocab_size=self._hparams.type_vocab_size,
                hparams=self._hparams.segment_embed)

            # Position embedding
            self.position_embedder = PositionEmbedder(
                position_size=self._hparams.position_size,
                hparams=self._hparams.position_embed)

            # The BERT encoder (a TransformerEncoder)
            self.encoder = TransformerEncoder(hparams=self._hparams.encoder)

            with tf.variable_scope("pooler"):
                kwargs_i = {
                    "units": self._hparams.hidden_size,
                    "activation": tf.tanh
                }
                layer_hparams = {"type": "Dense", "kwargs": kwargs_i}
                self.pooler = get_layer(hparams=layer_hparams)
Пример #2
0
    def test_infer_helpers(self):
        """Tests inference helpers.
        """

        def _test_fn(helper):
            _, next_inputs, _ = helper.next_inputs(
                time=1,
                outputs=tf.ones([self._batch_size, self._vocab_size]),# Not used
                state=None, # Not used
                sample_ids=tf.ones([self._batch_size], dtype=tf.int32))

            self.assertEqual(helper.sample_ids_shape,
                             tf.TensorShape([]))
            self.assertEqual(next_inputs.get_shape(),
                             tf.TensorShape([self._batch_size, self._emb_dim]))

            # Test in an RNN decoder
            output_layer = tf.layers.Dense(self._vocab_size)
            decoder = BasicRNNDecoder(vocab_size=self._vocab_size,
                                      output_layer=output_layer)
            outputs, final_state, sequence_lengths = decoder(
                helper=helper, max_decoding_length=self._max_seq_length)

            cell_dim = decoder.hparams.rnn_cell.kwargs.num_units
            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                outputs_, final_state_, sequence_lengths_ = sess.run(
                    [outputs, final_state, sequence_lengths])
                max_length = max(sequence_lengths_)
                self.assertEqual(
                    outputs_.logits.shape,
                    (self._batch_size, max_length, self._vocab_size))
                self.assertEqual(
                    outputs_.sample_id.shape, (self._batch_size, max_length))
                self.assertEqual(final_state_[0].shape,
                                 (self._batch_size, cell_dim))

        # case-(1)
        helper = GreedyEmbeddingHelper(
            self._embedding, self._start_tokens, self._end_token)
        _test_fn(helper)

        # case-(2)
        embedder = WordEmbedder(self._embedding)
        helper = GreedyEmbeddingHelper(
            embedder, self._start_tokens, self._end_token)
        _test_fn(helper)

        # case-(3)
        word_embedder = WordEmbedder(self._embedding)
        pos_embedder = PositionEmbedder(position_size=self._max_seq_length)

        def _emb_fn(ids, times):
            return word_embedder(ids) + pos_embedder(times)
        helper = GreedyEmbeddingHelper(
            _emb_fn, self._start_tokens, self._end_token)
        _test_fn(helper)
Пример #3
0
    def test_embedder_multi_calls(self):
        """Tests embedders called by multiple times.
        """
        hparams = {
            "dim": 1024,
            "dropout_rate": 0.3,
            "dropout_strategy": "item"
        }
        embedder = WordEmbedder(vocab_size=100, hparams=hparams)
        inputs = tf.ones([64, 16], dtype=tf.int32)
        outputs = embedder(inputs)

        emb_dim = embedder.dim
        if not isinstance(emb_dim, (list, tuple)):
            emb_dim = [emb_dim]
        self.assertEqual(outputs.shape, [64, 16] + emb_dim)

        # Call with inputs in a different shape
        inputs = tf.ones([64, 10, 20], dtype=tf.int32)
        outputs = embedder(inputs)

        emb_dim = embedder.dim
        if not isinstance(emb_dim, (list, tuple)):
            emb_dim = [emb_dim]
        self.assertEqual(outputs.shape, [64, 10, 20] + emb_dim)
Пример #4
0
    def test_encode_with_embedder(self):
        """Tests encoding companioned with :mod:`texar.tf.modules.embedders`.
        """
        embedder = WordEmbedder(vocab_size=20, hparams={"dim": 100})
        inputs = tf.ones([64, 16], dtype=tf.int32)

        encoder = UnidirectionalRNNEncoder()
        outputs, state = encoder(embedder(inputs))

        cell_dim = encoder.hparams.rnn_cell.kwargs.num_units
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            outputs_, state_ = sess.run([outputs, state])
            self.assertEqual(outputs_.shape, (64, 16, cell_dim))
            self.assertEqual(state_[0].shape, (64, cell_dim))
Пример #5
0
    def test_word_embedder_soft_ids(self):
        """Tests the correctness of using soft ids.
        """
        init_value = np.expand_dims(np.arange(5), 1)
        embedder = WordEmbedder(init_value=init_value)

        ids = np.array([3])
        soft_ids = np.array([[0, 0, 0, 1, 0]])

        outputs = embedder(ids=ids)
        soft_outputs = embedder(soft_ids=soft_ids)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            outputs_, soft_outputs_ = sess.run([outputs, soft_outputs])
            self.assertEqual(outputs_, soft_outputs_)
Пример #6
0
    def _test_word_embedder(self, hparams):
        """Tests :class:`texar.tf.modules.WordEmbedder`.
        """
        embedder = WordEmbedder(vocab_size=100, hparams=hparams)

        inputs = tf.ones([64, 16], dtype=tf.int32)
        outputs = embedder(inputs)

        inputs_soft = tf.ones([64, 16, embedder.vocab_size], dtype=tf.float32)
        outputs_soft = embedder(soft_ids=inputs_soft)

        emb_dim = embedder.dim
        if not isinstance(emb_dim, (list, tuple)):
            emb_dim = [emb_dim]

        hparams_dim = hparams["dim"]
        if not isinstance(hparams["dim"], (list, tuple)):
            hparams_dim = [hparams["dim"]]

        self.assertEqual(outputs.shape, [64, 16] + emb_dim)
        self.assertEqual(outputs_soft.shape, [64, 16] + emb_dim)
        self.assertEqual(emb_dim, hparams_dim)
        self.assertEqual(embedder.vocab_size, 100)
        self.assertEqual(len(embedder.trainable_variables), 1)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            outputs_, outputs_soft_ = sess.run(
                [outputs, outputs_soft],
                feed_dict={global_mode(): tf.estimator.ModeKeys.TRAIN})
            self.assertEqual(outputs_.shape, (64, 16) + tuple(emb_dim))
            self.assertEqual(outputs_soft_.shape, (64, 16) + tuple(emb_dim))

        # Tests unknown input shapes
        inputs = tf.placeholder(dtype=tf.int64, shape=[None, None])
        outputs = embedder(inputs)
        self.assertEqual(len(outputs.get_shape()), 2 + len(hparams_dim))

        inputs_soft = tf.placeholder(dtype=tf.int64, shape=[None, None, None])
        outputs_soft = embedder(soft_ids=inputs_soft)
        self.assertEqual(len(outputs_soft.get_shape()), 2 + len(hparams_dim))
Пример #7
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):
        super(GPT2Encoder, self).__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        with tf.variable_scope(self.variable_scope):

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams=self._hparams.embed)

            # Position embedding
            self.position_embedder = PositionEmbedder(
                position_size=self._hparams.position_size,
                hparams=self._hparams.position_embed)

            # The GPT2 encoder (a TransformerEncoder)
            self.encoder = TransformerEncoder(hparams=self._hparams.encoder)
Пример #8
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        super(XLNetEncoder, self).__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        num_layers = self._hparams.num_layers
        use_segments = self._hparams.use_segments
        untie_r = self._hparams.untie_r

        with tf.variable_scope(self.variable_scope):

            if untie_r:
                self.r_w_bias = tf.get_variable('r_w_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
                self.r_r_bias = tf.get_variable('r_r_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
            else:
                self.r_w_bias = tf.get_variable(
                    'r_w_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)
                self.r_r_bias = tf.get_variable(
                    'r_r_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)

            if use_segments:
                self.segment_embed = tf.get_variable('seg_embed', [
                    num_layers, 2, self._hparams.num_heads,
                    self._hparams.head_dim
                ],
                                                     dtype=tf.float32)
                self.r_s_bias = (tf.get_variable(
                    'r_s_bias', [
                        num_layers, self._hparams.num_heads,
                        self._hparams.head_dim
                    ],
                    dtype=tf.float32) if untie_r else tf.get_variable(
                        'r_s_bias',
                        [self._hparams.num_heads, self._hparams.head_dim],
                        dtype=tf.float32))
            else:
                self.segment_embed = None
                self.r_s_bias = None

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams={"dim": self._hparams.hidden_dim})

            # Position embedding
            self.pos_embed = RelativePositionalEncoding(
                hparams={
                    "dim": self._hparams.hidden_dim,
                    "max_seq_len": self._hparams.max_seq_len
                })

            self.attn_layers = []
            self.ff_layers = []
            rel_attn_hparams = dict_fetch(
                self._hparams, RelativeMutiheadAttention.default_hparams())
            rel_attn_hparams["name"] = "rel_attn"

            ff_hparams = dict_fetch(self._hparams,
                                    PositionWiseFF.default_hparams())
            ff_hparams["name"] = "ff"

            for i in range(num_layers):
                with tf.variable_scope("layer_{}".format(i)):
                    if self._hparams.untie_r:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    self.r_s_bias[i],
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    hparams=rel_attn_hparams))
                    else:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    self.r_s_bias,
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    hparams=rel_attn_hparams))
                    self.ff_layers.append(PositionWiseFF(hparams=ff_hparams))

            dropout_hparams = {
                "type": "Dropout",
                "kwargs": {
                    "rate": self._hparams.dropout
                }
            }
            self.dropout = get_layer(hparams=dropout_hparams)

            self.mask_embed = tf.get_variable('mask_emb',
                                              [1, 1, self.hparams.hidden_dim],
                                              dtype=tf.float32)