def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(BERTEncoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) with tf.variable_scope(self.variable_scope): # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Segment embedding for each type of tokens self.segment_embedder = WordEmbedder( vocab_size=self._hparams.type_vocab_size, hparams=self._hparams.segment_embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The BERT encoder (a TransformerEncoder) self.encoder = TransformerEncoder(hparams=self._hparams.encoder) with tf.variable_scope("pooler"): kwargs_i = { "units": self._hparams.hidden_size, "activation": tf.tanh } layer_hparams = {"type": "Dense", "kwargs": kwargs_i} self.pooler = get_layer(hparams=layer_hparams)
def test_infer_helpers(self): """Tests inference helpers. """ def _test_fn(helper): _, next_inputs, _ = helper.next_inputs( time=1, outputs=tf.ones([self._batch_size, self._vocab_size]),# Not used state=None, # Not used sample_ids=tf.ones([self._batch_size], dtype=tf.int32)) self.assertEqual(helper.sample_ids_shape, tf.TensorShape([])) self.assertEqual(next_inputs.get_shape(), tf.TensorShape([self._batch_size, self._emb_dim])) # Test in an RNN decoder output_layer = tf.layers.Dense(self._vocab_size) decoder = BasicRNNDecoder(vocab_size=self._vocab_size, output_layer=output_layer) outputs, final_state, sequence_lengths = decoder( helper=helper, max_decoding_length=self._max_seq_length) cell_dim = decoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, final_state_, sequence_lengths_ = sess.run( [outputs, final_state, sequence_lengths]) max_length = max(sequence_lengths_) self.assertEqual( outputs_.logits.shape, (self._batch_size, max_length, self._vocab_size)) self.assertEqual( outputs_.sample_id.shape, (self._batch_size, max_length)) self.assertEqual(final_state_[0].shape, (self._batch_size, cell_dim)) # case-(1) helper = GreedyEmbeddingHelper( self._embedding, self._start_tokens, self._end_token) _test_fn(helper) # case-(2) embedder = WordEmbedder(self._embedding) helper = GreedyEmbeddingHelper( embedder, self._start_tokens, self._end_token) _test_fn(helper) # case-(3) word_embedder = WordEmbedder(self._embedding) pos_embedder = PositionEmbedder(position_size=self._max_seq_length) def _emb_fn(ids, times): return word_embedder(ids) + pos_embedder(times) helper = GreedyEmbeddingHelper( _emb_fn, self._start_tokens, self._end_token) _test_fn(helper)
def test_embedder_multi_calls(self): """Tests embedders called by multiple times. """ hparams = { "dim": 1024, "dropout_rate": 0.3, "dropout_strategy": "item" } embedder = WordEmbedder(vocab_size=100, hparams=hparams) inputs = tf.ones([64, 16], dtype=tf.int32) outputs = embedder(inputs) emb_dim = embedder.dim if not isinstance(emb_dim, (list, tuple)): emb_dim = [emb_dim] self.assertEqual(outputs.shape, [64, 16] + emb_dim) # Call with inputs in a different shape inputs = tf.ones([64, 10, 20], dtype=tf.int32) outputs = embedder(inputs) emb_dim = embedder.dim if not isinstance(emb_dim, (list, tuple)): emb_dim = [emb_dim] self.assertEqual(outputs.shape, [64, 10, 20] + emb_dim)
def test_encode_with_embedder(self): """Tests encoding companioned with :mod:`texar.tf.modules.embedders`. """ embedder = WordEmbedder(vocab_size=20, hparams={"dim": 100}) inputs = tf.ones([64, 16], dtype=tf.int32) encoder = UnidirectionalRNNEncoder() outputs, state = encoder(embedder(inputs)) cell_dim = encoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, state_ = sess.run([outputs, state]) self.assertEqual(outputs_.shape, (64, 16, cell_dim)) self.assertEqual(state_[0].shape, (64, cell_dim))
def test_word_embedder_soft_ids(self): """Tests the correctness of using soft ids. """ init_value = np.expand_dims(np.arange(5), 1) embedder = WordEmbedder(init_value=init_value) ids = np.array([3]) soft_ids = np.array([[0, 0, 0, 1, 0]]) outputs = embedder(ids=ids) soft_outputs = embedder(soft_ids=soft_ids) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, soft_outputs_ = sess.run([outputs, soft_outputs]) self.assertEqual(outputs_, soft_outputs_)
def _test_word_embedder(self, hparams): """Tests :class:`texar.tf.modules.WordEmbedder`. """ embedder = WordEmbedder(vocab_size=100, hparams=hparams) inputs = tf.ones([64, 16], dtype=tf.int32) outputs = embedder(inputs) inputs_soft = tf.ones([64, 16, embedder.vocab_size], dtype=tf.float32) outputs_soft = embedder(soft_ids=inputs_soft) emb_dim = embedder.dim if not isinstance(emb_dim, (list, tuple)): emb_dim = [emb_dim] hparams_dim = hparams["dim"] if not isinstance(hparams["dim"], (list, tuple)): hparams_dim = [hparams["dim"]] self.assertEqual(outputs.shape, [64, 16] + emb_dim) self.assertEqual(outputs_soft.shape, [64, 16] + emb_dim) self.assertEqual(emb_dim, hparams_dim) self.assertEqual(embedder.vocab_size, 100) self.assertEqual(len(embedder.trainable_variables), 1) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, outputs_soft_ = sess.run( [outputs, outputs_soft], feed_dict={global_mode(): tf.estimator.ModeKeys.TRAIN}) self.assertEqual(outputs_.shape, (64, 16) + tuple(emb_dim)) self.assertEqual(outputs_soft_.shape, (64, 16) + tuple(emb_dim)) # Tests unknown input shapes inputs = tf.placeholder(dtype=tf.int64, shape=[None, None]) outputs = embedder(inputs) self.assertEqual(len(outputs.get_shape()), 2 + len(hparams_dim)) inputs_soft = tf.placeholder(dtype=tf.int64, shape=[None, None, None]) outputs_soft = embedder(soft_ids=inputs_soft) self.assertEqual(len(outputs_soft.get_shape()), 2 + len(hparams_dim))
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super(GPT2Encoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) with tf.variable_scope(self.variable_scope): # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The GPT2 encoder (a TransformerEncoder) self.encoder = TransformerEncoder(hparams=self._hparams.encoder)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(XLNetEncoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) num_layers = self._hparams.num_layers use_segments = self._hparams.use_segments untie_r = self._hparams.untie_r with tf.variable_scope(self.variable_scope): if untie_r: self.r_w_bias = tf.get_variable('r_w_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_r_bias = tf.get_variable('r_r_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) else: self.r_w_bias = tf.get_variable( 'r_w_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) self.r_r_bias = tf.get_variable( 'r_r_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) if use_segments: self.segment_embed = tf.get_variable('seg_embed', [ num_layers, 2, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_s_bias = (tf.get_variable( 'r_s_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) if untie_r else tf.get_variable( 'r_s_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32)) else: self.segment_embed = None self.r_s_bias = None # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams={"dim": self._hparams.hidden_dim}) # Position embedding self.pos_embed = RelativePositionalEncoding( hparams={ "dim": self._hparams.hidden_dim, "max_seq_len": self._hparams.max_seq_len }) self.attn_layers = [] self.ff_layers = [] rel_attn_hparams = dict_fetch( self._hparams, RelativeMutiheadAttention.default_hparams()) rel_attn_hparams["name"] = "rel_attn" ff_hparams = dict_fetch(self._hparams, PositionWiseFF.default_hparams()) ff_hparams["name"] = "ff" for i in range(num_layers): with tf.variable_scope("layer_{}".format(i)): if self._hparams.untie_r: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], self.r_s_bias[i], self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], hparams=rel_attn_hparams)) else: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, self.r_s_bias, self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, hparams=rel_attn_hparams)) self.ff_layers.append(PositionWiseFF(hparams=ff_hparams)) dropout_hparams = { "type": "Dropout", "kwargs": { "rate": self._hparams.dropout } } self.dropout = get_layer(hparams=dropout_hparams) self.mask_embed = tf.get_variable('mask_emb', [1, 1, self.hparams.hidden_dim], dtype=tf.float32)