def zero_state(self, batch_size, dtype): # For GMM attention, we only need attention_computer assert (self.attention_mechanism is None) with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self.decoder_rnn_init_state is not None: rnn_cell_state = self.decoder_rnn_init_state else: rnn_cell_state = self.rnn_cell._cell.zero_state( batch_size, dtype) with ops.control_dependencies(self.check_batch_size(batch_size)): rnn_cell_state = nest.map_structure( lambda s: array_ops.identity( s, name="checked_rnn_cell_state"), rnn_cell_state) return GMMTacoDecoderCellState( rnn_cell_state=rnn_cell_state, time=array_ops.zeros([], dtype=tf.int32), attention=rnn_cell_impl._zero_state_tensors( self.attention_layer_size, batch_size, dtype), mu=rnn_cell_impl._zero_state_tensors(self.num_gmm_mixture, batch_size, dtype), alignment_history=tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True))
def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. **NOTE** Please see the initializer documentation for details of how to call `zero_state` if using an `AttentionWrapper` with a `BeamSearchDecoder`. Args: batch_size: `0D` integer tensor: the batch size. dtype: The internal state data type. Returns: An `AttentionWrapperState` tuple containing zeroed out tensors and, possibly, empty `TensorArray` objects. Raises: ValueError: (or, possibly at runtime, InvalidArgument), if `batch_size` does not match the output size of the encoder passed to the wrapper object at initialization time. """ with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with tf.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: tf.identity(s, name="checked_cell_state"), cell_state) return CoverageAttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), coverages=self._item_or_tuple( _zero_state_tensors(attention_mechanism.alignments_size, batch_size, dtype) for attention_mechanism in self._attention_mechanisms), alignments=self._item_or_tuple( attention_mechanism.initial_alignments(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), # since we need to read the alignment history several times, so we need set clear_after_read to False alignment_history=self._item_or_tuple( tf.TensorArray(dtype=dtype, size=0, clear_after_read=False, dynamic_size=True) if self. _alignment_history else () for _ in self._attention_mechanisms))
def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. **NOTE** Please see the initializer documentation for details of how to call `zero_state` if using an `AttentionWrapper` with a `BeamSearchDecoder`. Args: batch_size: `0D` integer tensor: the batch size. dtype: The internal state data type. Returns: An `AttentionWrapperState` tuple containing zeroed out tensors and, possibly, empty `TensorArray` objects. Raises: ValueError: (or, possibly at runtime, InvalidArgument), if `batch_size` does not match the output size of the encoder passed to the wrapper object at initialization time. """ name_scope_str = type(self).__name__ + "ZeroState" with tf.name_scope(name_scope_str, values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) initial_alignment = self._attention_mechanism.initial_alignments( batch_size, dtype) return tf.contrib.seq2seq.AttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=rnn_cell_impl._zero_state_tensors( self._attention_layer_size, batch_size, dtype), alignments=initial_alignment, attention_state=self._attention_mechanism.initial_state( batch_size, dtype), alignment_history=())
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size, state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size, s]` for each s in `state_size`. """ # Try to use the last cached zero_state. This is done to avoid recreating # zeros, especially when eager execution is enabled. state_size = self.state_size is_eager = context.in_eager_mode() if is_eager and hasattr(self, "_last_zero_state"): (last_state_size, last_batch_size, last_dtype, last_output) = getattr(self, "_last_zero_state") if (last_batch_size == batch_size and last_dtype == dtype and last_state_size == state_size): return last_output with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): output = _zero_state_tensors(state_size, batch_size, dtype) if is_eager: self._last_zero_state = (state_size, batch_size, dtype, output) return output
def _create(self, encoder_output, decoder_state_size, **kwargs): """ Creates decoder's initial RNN states according to `decoder_state_size`. Passes the final state of encoder to each layer in decoder. Args: encoder_output: An instance of `collections.namedtuple` from `Encoder.encode()`. decoder_state_size: RNN decoder state size. **kwargs: Returns: The decoder states with the structure determined by `decoder_state_size`. Raises: ValueError: if the structure of encoder RNN state does not have the same structure of decoder RNN state. """ batch_size = tf.shape(encoder_output.attention_length)[0] # of type LSTMStateTuple enc_final_state = _final_state( encoder_output.final_states, direction=self.params["direction"]) assert_state_is_compatible(rnn_cell_impl._zero_state_tensors( decoder_state_size[0], batch_size, tf.float32), enc_final_state) if nest.is_sequence(decoder_state_size): return tuple([enc_final_state for _ in decoder_state_size]) return enc_final_state
def make_decoder_cell(rnn_size, num_layers, encoder_output, source_seq_len, keep_prob, batch_size, encoder_state): for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): single_cell = tf.contrib.rnn.LSTMCell( rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) dec_cell = tf.contrib.rnn.DropoutWrapper(single_cell, input_keep_prob=keep_prob) attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( rnn_size, encoder_output, source_seq_len, normalize=False, name='BahdanauAttention') dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( dec_cell, attention_mechanism, rnn_size) initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( encoder_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32)) return dec_cell, initial_state
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, inputs_length, targets_length, max_target_length, rnn_size, TEXT_2_INT, keep_prob, batch_size, num_layers, direction): with tf.name_scope("RNN_Decoder_Cell"): for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.contrib.rnn.LSTMCell(rnn_size) dec_cell = tf.contrib.rnn.DropoutWrapper( lstm, input_keep_prob=keep_prob) output_layer = Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, inputs_length, normalize=False, name='BahdanauAttention') with tf.name_scope("Attention_Wrapper"): dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( dec_cell, attn_mech, rnn_size) initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( enc_state, _zero_state_tensors(rnn_size, batch_size, tf.float32)) with tf.variable_scope("decode"): training_logits = training_decoding_layer(dec_embed_input, targets_length, dec_cell, initial_state, output_layer, vocab_size, max_target_length) with tf.variable_scope("decode", reuse=True): inference_logits = inference_decoding_layer( embeddings, TEXT_2_INT['<GO>'], TEXT_2_INT['<EOS>'], dec_cell, initial_state, output_layer, max_target_length, batch_size) return training_logits, inference_logits
def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. Args: batch_size: `0D` integer tensor: the batch size. dtype: The internal state data type. Returns: An `TacotronDecoderCellState` tuple containing zeroed out tensors and, possibly, empty `TensorArray` objects. Raises: ValueError: (or, possibly at runtime, InvalidArgument), if `batch_size` does not match the output size of the encoder passed to the wrapper object at initialization time. """ with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of TacotronDecoderCell %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size.") with ops.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) return TacotronDecoderCellState( cell_state=cell_state, time=array_ops.zeros([], dtype=tf.int32), attention=rnn_cell_impl._zero_state_tensors(self._attention_layer_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments(batch_size, dtype), alignment_history=tensor_array_ops.TensorArray(dtype=dtype, size=0, dynamic_size=True))
def zero_state(self, batch_size, dtype): #返回一个0状态(代码参考AttentionWrapper) with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of TacotronDecoderCell %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size.") with ops.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) return tf.contrib.seq2seq.AttentionWrapperState( cell_state=cell_state, time=array_ops.zeros([], dtype=tf.int32), attention=rnn_cell_impl._zero_state_tensors( self._attention_layer_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments( batch_size, dtype), alignment_history=tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True), attention_state=tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True))
def zero_state(self, batch_size, dtype): """Initialize the memory to the key values.""" with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): sent1 = tf.reshape(self.sent1, [-1, self.sent1_length * self.dim]) sent2 = tf.reshape(self.sent2, [-1, self.sent2_length * self.dim]) rh = _zero_state_tensors([self.num_units], batch_size, dtype=tf.float32) state_list = [sent1, sent2, rh[0]] return DoubleStateTuple(*state_list)
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers): '''Create the decoding cell and attention for the training and inference decoding layers''' for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob) output_layer = Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, text_length, normalize=False, name='BahdanauAttention') dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(dec_cell, attn_mech, rnn_size) initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32)) with tf.variable_scope("decode"): training_logits = training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state, output_layer, vocab_size, max_summary_length) with tf.variable_scope("decode", reuse=True): inference_logits = inference_decoding_layer(embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], dec_cell, initial_state, output_layer, max_summary_length, batch_size) return training_logits, inference_logits
def zero_state(self, batch_size, dtype): """Initialize the memory to the key values.""" with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): sent1 = tf.reshape(self.sent1, [-1, self.sent1_length * self.dim]) sent2 = tf.reshape(self.sent2, [-1, self.sent2_length * self.dim]) sent3 = tf.reshape(self.sent3, [-1, self.sent3_length * self.dim]) rh = _zero_state_tensors([self.num_units] * 3, batch_size, dtype) # rh = [tf.tile(tf.expand_dims(self.keys[i], axis=0), [batch_size, 1]) # for i in range(3)] state_list = [sent1, sent2, sent3] + rh return state_list
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers): '''Create the decoding cell and attention for the training and inference decoding layers''' for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob) output_layer = Dense(vocab_size, kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1)) attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, text_length, normalize=False, name='BahdanauAttention') dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(dec_cell, attn_mech, rnn_size) initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32)) with tf.variable_scope("decode"): training_logits = training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state, output_layer, vocab_size, max_summary_length) with tf.variable_scope("decode", reuse=True): inference_logits = inference_decoding_layer(embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], dec_cell, initial_state, output_layer, max_summary_length, batch_size) return training_logits, inference_logits
def zero_state(self, batch_size, dtype): with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): cell_state = self._cell.zero_state(batch_size, dtype) with ops.control_dependencies(self._batch_size_checks(batch_size)): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) return TacotronDecoderCellState( cell_state=cell_state, time=array_ops.zeros([], dtype=tf.int32), attention=rnn_cell_impl._zero_state_tensors( self._attention_layer_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments( batch_size, dtype), alignment_history=tensor_array_ops.TensorArray( dtype=dtype, size=0, dynamic_size=True))
def _create(self, decoder_state_size, **kwargs): """ Creates decoder's initial RNN states according to `decoder_state_size`. If `decoder_state_size` is int/LSTMStateTuple(int, int), return Tensor with shape [batch_size, int] or LSTMStateTuple([batch_size, int], [batch_size, int]). If `decoder_state_size` is a tuple of int/LSTMStateTupe, return a tuple whose elements' structure match the `decoder_state_size` respectively. Args: decoder_state_size: RNN decoder state size. **kwargs: Returns: The decoder states with the structure determined by `decoder_state_size`. """ batch_size = self.batch_size return rnn_cell_impl._zero_state_tensors( decoder_state_size, batch_size, tf.float32)
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ # Keep scope for backwards compatibility. with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return rnn_cell_impl._zero_state_tensors( # pylint: disable=protected-access self.state_size, batch_size, dtype)
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ # Keep scope for backwards compatibility. with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): return rnn_cell_impl._zero_state_tensors( # pylint: disable=protected-access self.state_size, batch_size, dtype)
def _create(self, encoder_output, decoder_state_size, **kwargs): """ Creates decoder's initial RNN states according to `decoder_state_size`. If `decoder_state_size` is int/LSTMStateTuple(int, int), return Tensor with shape [batch_size, int] or LSTMStateTuple([batch_size, int], [batch_size, int]). If `decoder_state_size` is a tuple of int/LSTMStateTupe, return a tuple whose elements' structure match the `decoder_state_size` respectively. Args: encoder_output: An instance of `collections.namedtuple` from `Encoder.encode()`. decoder_state_size: RNN decoder state size. **kwargs: Returns: The decoder states with the structure determined by `decoder_state_size`. """ batch_size = tf.shape(encoder_output.attention_length)[0] return rnn_cell_impl._zero_state_tensors( decoder_state_size, batch_size, tf.float32)
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, TEXT_LENGTH, SUMMARY_LENGTH, MAX_SUMMARY_LENGTH, RNN_SIZE, VOCAB_TO_INT, KEEP_PROB, BATCH_SIZE, NUM_LAYERS): """Create the decoding cell and attention for the training and inference decoding layers""" for layer in range(NUM_LAYERS): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.contrib.rnn.LSTMCell( RNN_SIZE, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=KEEP_PROB) output_layer = Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) attn_mech = tf.contrib.seq2seq.BahdanauAttention(RNN_SIZE, enc_output, TEXT_LENGTH, normalize=False, name='BahdanauAttention') dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( dec_cell, attn_mech, RNN_SIZE) initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( enc_state[0], _zero_state_tensors(RNN_SIZE, BATCH_SIZE, tf.float32)) with tf.variable_scope("decode"): TRAINING_LOGITS = training_decoding_layer(dec_embed_input, SUMMARY_LENGTH, dec_cell, initial_state, output_layer, vocab_size, MAX_SUMMARY_LENGTH) with tf.variable_scope("decode", reuse=True): INFERENCE_LOGITS = inference_decoding_layer( embeddings, VOCAB_TO_INT['<GO>'], VOCAB_TO_INT['<EOS>'], dec_cell, initial_state, output_layer, MAX_SUMMARY_LENGTH, BATCH_SIZE) return TRAINING_LOGITS, INFERENCE_LOGITS
def decoding_layer(self, input, encoder_output, encoder_state): for i in range(self.num_layers): with tf.variable_scope('decoder_{}'.format(i)): decoder_cell = rnn.LSTMCell( self.cell_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) decoder_cell = rnn.DropoutWrapper( decoder_cell, input_keep_prob=self.keep_prob) output_layer = Dense( self.vocab_length, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) attention_mech = seq2seq.BahdanauAttention(self.cell_size, encoder_output, self.in_length, normalize=False) decoder_cell = seq2seq.DynamicAttentionWrapper(decoder_cell, attention_mech, self.cell_size) zero_state = _zero_state_tensors(self.cell_size, self.batch_size, tf.float32) initial_state = seq2seq.DynamicAttentionWrapperState( encoder_state[0], zero_state) with tf.variable_scope("decode"): train_logits = self.train_decoding_layer(input, decoder_cell, initial_state, output_layer) with tf.variable_scope("decode", reuse=True): inference_logits = self.inference_decoding_layer( self.embeddings, decoder_cell, initial_state, output_layer) return train_logits, inference_logits
def initial_alignments(self, batch_size, dtype): '''Returns all the alignment saturated in first block''' max_time = self._alignments_size alignments = _zero_state_tensors(max_time - 1, batch_size, dtype) return tf.concat([tf.fill([batch_size, 1], 1.0), alignments], 1)
def zero_state(self, batch_size, dtype): cell_state = self._cell.zero_state(batch_size, dtype) attention = rnn_cell_impl._zero_state_tensors( self.state_size.attention, batch_size, tf.float32) return SeqMatchSeqAttentionState(cell_state=cell_state, attention=attention)
def zero_input(self, batch_size, dtype): with tf.name_scope(type(self).__name__ + "ZeroInput", values=[batch_size]): output = rnn_cell_impl._zero_state_tensors(self._input_shape, batch_size, dtype) return output
def _create(s, d): return rnn_cell_impl._zero_state_tensors(s, batch_size, d)
def _init_decoder(self): data_y = process_decoding_input(self.data_y, self.vocab_to_int_y, self.batch_size) self.dec_embeddings = tf.Variable(tf.random_uniform( [self.vocab_size_y, self.embedding_size], -1.0, 1.0), dtype=tf.float32) dec_embedded = tf.nn.embedding_lookup(self.dec_embeddings, data_y) with tf.variable_scope("decoder"): dec_cell = rnn_cell(self.cell_size, self.dec_num_layers, self.dec_keep_prob) out_layer = Dense(self.vocab_size_y, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) att_mechanism = seq2seq.BahdanauAttention(self.cell_size, self.enc_outputs, self.x_length, normalize=False) dec_cell = seq2seq.DynamicAttentionWrapper( dec_cell, att_mechanism, attention_size=self.cell_size) init_state = seq2seq.DynamicAttentionWrapperState( cell_state=self.enc_states[0], attention=_zero_state_tensors(self.cell_size, self.batch_size, tf.float32)) with tf.variable_scope("decoding"): train_helper = seq2seq.TrainingHelper( dec_embedded, sequence_length=self.y_length, time_major=False) train_decoder = seq2seq.BasicDecoder(dec_cell, train_helper, init_state, out_layer) train_out, _ = seq2seq.dynamic_decode( train_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.max_length, swap_memory=True) self.decoder_train = train_out.rnn_output with tf.variable_scope("decoding", reuse=True): start_tokens = tf.tile( tf.constant([self.vocab_to_int_y[START]], dtype=tf.int32), [self.batch_size]) infer_helper = seq2seq.GreedyEmbeddingHelper( embedding=self.dec_embeddings, start_tokens=start_tokens, end_token=self.vocab_to_int_y[STOP]) infer_decoder = seq2seq.BasicDecoder(dec_cell, infer_helper, init_state, out_layer) infer_out, _ = seq2seq.dynamic_decode( infer_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.max_length) self.decoder_inference = infer_out.sample_id tf.identity(self.decoder_train, 'decoder_train') tf.identity(self.decoder_inference, 'decoder_inference')
def add_decoder(self): with tf.variable_scope('Decoder') as scope: with tf.device('/cpu:0'): self.dec_Wemb = tf.get_variable('embedding', initializer=tf.random_uniform([ dec_vocab_size + 2, self.dec_emb_size ]), dtype=tf.float32) # get dynamic batch_size batch_size = tf.shape(self.enc_inputs)[0] dec_cell = self.cell(self.hidden_size) attn_mech = tf.contrib.seq2seq.LuongAttention( num_units=self.attn_size, memory=self.enc_outputs, memory_sequence_length=self.enc_sequence_length, normalize=False, name='LuongAttention') dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( cell=dec_cell, attention_mechanism=attn_mech, attention_size=self.attn_size, # attention_history=False (in ver 1.2) name='Attention_Wrapper') initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( cell_state=self.enc_last_state, attention=_zero_state_tensors(self.attn_size, batch_size, tf.float32)) # output projection (replacing `OutputProjectionWrapper`) output_layer = Dense(dec_vocab_size + 2, name='output_projection') if self.mode == 'training': # maxium unrollings in current batch = max(dec_sent_len) + 1(GO symbol) self.max_dec_len = tf.reduce_max(self.dec_sequence_length + 1, name='max_dec_len') self.dec_emb_inputs = tf.nn.embedding_lookup(self.dec_Wemb, self.dec_inputs, name='emb_inputs') training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=self.dec_emb_inputs, sequence_length=self.dec_sequence_length + 1, time_major=False, name='training_helper') training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=dec_cell, helper=training_helper, initial_state=initial_state, output_layer=output_layer) self.train_dec_outputs, train_dec_last_state = tf.contrib.seq2seq.dynamic_decode( training_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.max_dec_len) # dec_outputs: collections.namedtuple(rnn_outputs, sample_id) # dec_outputs.rnn_output: [batch_size x max(dec_sequence_len) x dec_vocab_size+2], tf.float32 # dec_outputs.sample_id [batch_size], tf.int32 # logits: [batch_size x max_dec_len x dec_vocab_size+2] self.logits = tf.identity(self.train_dec_outputs.rnn_output, name='logits') # targets: [batch_size x max_dec_len x dec_vocab_size+2] self.targets = tf.slice(self.dec_inputs, [0, 0], [-1, self.max_dec_len], 'targets') # masks: [batch_size x max_dec_len] # => ignore outputs after `dec_senquence_length+1` when calculating loss self.masks = tf.sequence_mask(self.dec_sequence_length + 1, self.max_dec_len, dtype=tf.float32, name='masks') # Control loss dimensions with `average_across_timesteps` and `average_across_batch` # internal: `tf.nn.sparse_softmax_cross_entropy_with_logits` self.batch_loss = tf.contrib.seq2seq.sequence_loss( logits=self.logits, targets=self.targets, weights=self.masks, name='batch_loss') # prediction sample for validation self.valid_predictions = tf.identity( self.train_dec_outputs.sample_id, name='valid_preds') # List of training variables # self.training_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) elif self.mode == 'inference': start_tokens = tf.tile(tf.constant([self.start_token], dtype=tf.int32), [batch_size], name='start_tokens') inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=self.dec_Wemb, start_tokens=start_tokens, end_token=self.end_token) inference_decoder = tf.contrib.seq2seq.BasicDecoder( cell=dec_cell, helper=inference_helper, initial_state=initial_state, output_layer=output_layer) infer_dec_outputs, infer_dec_last_state = tf.contrib.seq2seq.dynamic_decode( inference_decoder, output_time_major=False, impute_finished=True, maximum_iterations=dec_sentence_length) # [batch_size x dec_sentence_length], tf.int32 self.predictions = tf.identity(infer_dec_outputs.sample_id, name='predictions')
def initial_alignments(self, batch_size, dtype): max_time = self._alignments_size return rnn_cell_impl._zero_state_tensors(max_time, batch_size, dtype)
def initial_state(self, batch_size, dtype): state_size_ = self.state_size return rnn_cell_impl._zero_state_tensors(state_size_, batch_size, dtype)
def _init_decoder(self, forward_only): with tf.variable_scope("decoder") as scope: def output_fn(outputs): return tf.contrib.layers.linear(outputs, self.target_vocab_size, scope=scope) # attention_states: size [batch_size, max_time, num_units] #attention_states = tf.transpose(self.encoder_outputs, [1, 0, 2]) self.batch_size = tf.shape(self.encoder_inputs)[0] self.attn_mech = tf.contrib.seq2seq.LuongAttention( num_units=self.dec_hidden_size, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length, normalize=False, name='LuongAttention') self.dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( cell=self.decoder_cell, attention_mechanism=self.attn_mech, attention_size=self.dec_hidden_size, # attention_history=False (in ver 1.2) name='Attention_Wrapper') self.initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( cell_state=self.encoder_state, attention=_zero_state_tensors(self.dec_hidden_size, self.batch_size, tf.float32)) self.output_layer = Dense(self.target_vocab_size + 2, name='output_projection') if forward_only: start_tokens = tf.tile(tf.constant([model_config.PAD_ID], dtype=tf.int32), [self.batch_size], name='start_tokens') inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=self.dec_embedding_matrix, start_tokens=start_tokens, end_token=model_config.EOS_ID) inference_decoder = tf.contrib.seq2seq.BasicDecoder( cell=self.dec_cell, helper=inference_helper, initial_state=self.initial_state, output_layer=self.output_layer) infer_dec_outputs, infer_dec_last_state = tf.contrib.seq2seq.dynamic_decode( inference_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.target_vocab_size) # [batch_size x dec_sentence_length], tf.int32 self.predictions = tf.identity(infer_dec_outputs.sample_id, name='predictions') else: # maxium unrollings in current batch = max(dec_sent_len) + 1(GO symbol) self.max_dec_len = tf.reduce_max(self.decoder_inputs_length + 1, name='max_dec_len') self.training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=self.decoder_inputs_embedded, sequence_length=self.decoder_inputs_length + 1, time_major=False, name='training_helper') self.training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=self.dec_cell, helper=self.training_helper, initial_state=self.initial_state, output_layer=self.output_layer) self.decoder_outputs, self.decoder_state = tf.contrib.seq2seq.dynamic_decode( self.training_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.max_dec_len) # logits: [batch_size x max_dec_len x dec_vocab_size+2] self.logits = tf.identity(self.decoder_outputs.rnn_output, name='logits') # targets: [batch_size x max_dec_len x dec_vocab_size+2] self.targets = tf.slice(self.decoder_inputs, [0, 0], [-1, self.max_dec_len], 'targets') # masks: [batch_size x max_dec_len] # => ignore outputs after `dec_senquence_length+1` when calculating loss self.masks = tf.sequence_mask(self.decoder_inputs_length + 1, self.max_dec_len, dtype=tf.float32, name='masks') # internal: `tf.nn.sparse_softmax_cross_entropy_with_logits` self.loss = tf.contrib.seq2seq.sequence_loss( logits=self.logits, targets=self.targets, weights=self.masks, name='batch_loss')
cell_decode = [] for a in range(SIZE_RNN_LAYER): cell = rnn.BasicLSTMCell(SIZE_RNN_STATE) cell = rnn.DropoutWrapper(cell, output_keep_prob=keep_prob) cell_decode.append(cell) multi_rnn_decode = rnn.MultiRNNCell(cell_decode, state_is_tuple=True) dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper( cell=multi_rnn_decode, attention_mechanism=attn_luong, attention_size=SIZE_ATTN, name="attention_wrapper") initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState( cell_state=state_enc, attention=_zero_state_tensors(SIZE_ATTN, batch_size, tf.float32)) output_layer = Dense(voc_size_kor, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) # train mode with tf.variable_scope("decoder_layer"): train_helper = tf.contrib.seq2seq.TrainingHelper(inputs=embed_dec, sequence_length=dec_pad_len, time_major=False) train_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, train_helper, initial_state, output_layer) output_train_dec, state_train_dec = tf.contrib.seq2seq.dynamic_decode( decoder=train_decoder, output_time_major=False, impute_finished=True, maximum_iterations=padded_kor_len)