def transform(self, inputs, mode): embeddings = tf.get_variable( "w_char_embs", shape=[self.vocabulary_size, self.embedding_size], dtype=self.dtype) outputs = embedding_lookup(embeddings, inputs) outputs = tf.layers.dropout( outputs, rate=self.dropout, training=mode == tf.estimator.ModeKeys.TRAIN) # Merge batch and sequence timesteps dimensions. outputs = tf.reshape(outputs, [-1, tf.shape(inputs)[-1], self.embedding_size]) # Pad on both sides. outputs = tf.pad(outputs, [[0, 0], [self.kernel_size - 1, self.kernel_size - 1], [0, 0]]) outputs.set_shape((None, None, self.embedding_size)) outputs = tf.layers.conv1d( outputs, self.num_outputs, self.kernel_size, strides=self.stride) # Max pooling over depth. outputs = tf.reduce_max(outputs, axis=1) # Split batch and sequence timesteps dimensions. outputs = tf.reshape(outputs, [-1, tf.shape(inputs)[1], self.num_outputs]) return outputs
def transform(self, inputs, mode): try: embeddings = tf.get_variable("w_embs", dtype=self.dtype, trainable=self.trainable) except ValueError: # Variable does not exist yet. if self.embedding_file: pretrained = load_pretrained_embeddings( self.embedding_file, self.vocabulary_file, num_oov_buckets=self.num_oov_buckets, with_header=self.embedding_file_with_header, case_insensitive_embeddings=self.case_insensitive_embeddings) self.embedding_size = pretrained.shape[-1] shape = None initializer = tf.constant(pretrained.astype(self.dtype.as_numpy_dtype())) else: shape = [self.vocabulary_size, self.embedding_size] initializer = None embeddings = tf.get_variable( "w_embs", shape=shape, dtype=self.dtype, initializer=initializer, trainable=self.trainable) outputs = embedding_lookup(embeddings, inputs) outputs = tf.layers.dropout( outputs, rate=self.dropout, training=mode == tf.estimator.ModeKeys.TRAIN) return outputs
def encode(self, reuse=None): input_ids, input_length = self.input_ids, self.input_length input_ = embedding_lookup(self.src_emb, input_ids) with tf.variable_scope("encoder", reuse=reuse): return self.encoder.encode(input_, sequence_length=input_length, mode=self.mode)
def encode(self, positions, depth, dtype=tf.float32): positions = tf.minimum(positions, self.maximum_position) embeddings_grad_mask = tf.concat([tf.zeros(shape=[1, depth], dtype=dtype), tf.ones( shape=[self.maximum_position, depth], dtype=dtype)], axis=0) embeddings = tf.get_variable( "w_embs", shape=[self.maximum_position + 1, depth], dtype=dtype, initializer=tf.random_normal_initializer(0, 0.1)) # The gradient for vector at the position of padding is always zero. embeddings = scale_gradient(embeddings, embeddings_grad_mask) return embedding_lookup(embeddings, positions)
def _embed(self, inputs, mode): embeddings = tf.get_variable( "w_char_embs", shape=[self.vocabulary_size, self.embedding_size], dtype=self.dtype) outputs = embedding_lookup(embeddings, inputs) outputs = tf.layers.dropout( outputs, rate=self.dropout, training=mode == tf.estimator.ModeKeys.TRAIN) return outputs
def get_embedding_fn(embedding): """Returns the embedding function. Args: embedding: The embedding tensor or a callable that takes word ids. Returns: A callable that takes word ids. """ if callable(embedding): return embedding else: return lambda ids: embedding_lookup(embedding, ids)
def encode(self, positions, depth, dtype=tf.float32): positions = tf.minimum(positions, self.maximum_position) embeddings = tf.get_variable( "w_embs", shape=[self.maximum_position + 1, depth], dtype=dtype) return embedding_lookup(embeddings, positions)
def decode(self, encoder_outputs_tuple, output_layer=None, reuse=False): (encoder_outputs, encoder_state, encoder_sequence_length) = encoder_outputs_tuple self.encoder_outputs = encoder_outputs input_ids, target_ids_in, target_length_in = self.input_ids, self.target_ids_in, self.target_length_in_or_out with tf.variable_scope("decoder", reuse=reuse): if output_layer is None: output_layer = tf.layers.Dense(self.tgt_vocab_size) output_layer.build([None, encoder_outputs.get_shape()[-1]]) predictions = None logits = None if self.mode != constants.INFER: target_in = embedding_lookup(self.tgt_emb, target_ids_in) logits, _, _ = self.decoder.decode( target_in, target_length_in, vocab_size=self.tgt_vocab_size, initial_state=encoder_state, mode=self.mode, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, output_layer=output_layer) else: batch_size = tf.shape(encoder_sequence_length)[0] maximum_iterations = self.params.get("maximum_iterations", 30) start_tokens = tf.fill([batch_size], constants.START_OF_SENTENCE_ID) end_token = constants.END_OF_SENTENCE_ID decode_type = self.params.get("decode_type", constants.GREEDY) decode_width = self.params.get("decode_width", 1) if decode_type == constants.RANDOM: print("random decode_width:", decode_width) tile_start_tokens = tf.contrib.seq2seq.tile_batch(start_tokens, multiplier=decode_width) tile_encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=decode_width) tile_encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=decode_width) tile_encoder_sequence_length = tf.contrib.seq2seq.tile_batch(encoder_sequence_length, multiplier=decode_width) sampled_ids, _, sampled_length, log_probs, alignment = self.decoder.dynamic_decode( self.tgt_emb, tile_start_tokens, end_token, vocab_size=self.tgt_vocab_size, initial_state=tile_encoder_state, output_layer=output_layer, maximum_iterations=maximum_iterations, mode=self.mode, memory=tile_encoder_outputs, memory_sequence_length=tile_encoder_sequence_length, return_alignment_history=True, sample_from=0, # penalize_previous_words=True # True for Transformer ) sampled_ids = tf.reshape(sampled_ids, (batch_size, decode_width, -1)) sampled_length = tf.reshape(sampled_length, (batch_size, decode_width)) log_probs = tf.reshape(log_probs, (batch_size, decode_width)) elif decode_type == constants.BEAM: sampled_ids, _, sampled_length, log_probs, alignment = \ self.decoder.dynamic_decode_and_search( self.tgt_emb, start_tokens, end_token, vocab_size=self.tgt_vocab_size, initial_state=encoder_state, output_layer=output_layer, beam_width=decode_width, maximum_iterations=maximum_iterations, mode=self.mode, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, return_alignment_history=True) elif decode_type == constants.GREEDY or decode_width <= 1: sampled_ids, _, sampled_length, log_probs, alignment = self.decoder.dynamic_decode( self.tgt_emb, start_tokens, end_token, vocab_size=self.tgt_vocab_size, initial_state=encoder_state, output_layer=output_layer, maximum_iterations=maximum_iterations, mode=self.mode, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, return_alignment_history=True) target_tokens = self.tgt_vocab_rev.lookup(tf.cast(sampled_ids, tf.int64)) predictions = { "ids": sampled_ids, "tokens": target_tokens, "length": sampled_length, "log_probs": log_probs} return logits, predictions