def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, beam_search=True, beam_size=10): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False, beam_search=True, beam_size=10): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2, output_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=4, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope: # Encoder. encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1, encoder_cell2, encoder_inputs, dtype=tf.float32) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(top_states, 1) # Concatenate states of both enocders encoder_state = encoder_state1 + encoder_state2 # Decoder. output_size = None if output_projection is None: output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols) output_size = num_decoder_symbols assert isinstance(feed_previous, bool) return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states, output_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention)
def my_encoder(encoder_inputs, cell, num_encoder_symbols, embedding_size, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) return encoder_outputs, encoder_state
def __init__(self, size, num_layers, vocab_size, buckets): self.__name__ = 'StepGAN' self.cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)]) self.enc_cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)]) self.enc_cell = core_rnn_cell.EmbeddingWrapper( cell=self.enc_cell, embedding_classes=vocab_size, embedding_size=size) self.embedding = variable_scope.get_variable('embedding', [vocab_size, size]) self.D_W = tf.Variable(xavier_init([size * num_layers, 1])) self.D_b = tf.Variable(tf.zeros(shape=[1])) self.real_data = [ tf.placeholder(tf.int32, shape=[None], name='realdata{0}'.format(i)) for i in range(buckets[-1][1]) ]
def embedding_rnn_encoder(encoder_inputs, cell, num_symbols, embedding_size, scope=None, dtype=None): with variable_scope.variable_scope(scope or "embedding_rnn_encoder", dtype=dtype) as scope: dtype = scope.dtype # Note that we use a deep copy of the original cell encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(top_states, 1) return encoder_state, attention_states
def my_inf_encoder(decoder_inputs, cell, num_encoder_symbols, embedding_size, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype, reuse=True) as scope: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) # don't share the parameters with prior encoder with variable_scope.variable_scope("embedding_inf_network", dtype=dtype) as scope: dtype = scope.dtype encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, decoder_inputs, dtype=dtype) return encoder_outputs, encoder_state
def __init__( self, method, model, kdim, edim, kbembed_size, triples_num, size, num_layers, vocab_size, buckets, hops_num=1, #TODO kgpath_len=1, #TODO learning_rate=0.5, learning_rate_decay_factor=0.99, max_gradient_norm=5.0, feed_prev=False, batch_size=32, dtype=tf.float32): model_funcs = importlib.import_module('models.' + model) globals().update(model_funcs.__dict__) # for knowledge graph self.kdim = kdim self.edim = edim self.kbembed_size = kbembed_size self.triples_num = triples_num self.hops_num = hops_num #TODO self.kgpath_len = kgpath_len #TODO # basic self.size = size self.num_layers = num_layers self.vocab_size = vocab_size print('VOCABSIZE:{}'.format(vocab_size)) self.buckets = buckets self.feed_prev = feed_prev self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype) self.op_lr_decay = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) # main model self.cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)]) self.enc_cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)]) self.enc_cell = core_rnn_cell.EmbeddingWrapper( cell=self.enc_cell, embedding_classes=vocab_size, embedding_size=size) # input embedding self.embedding = variable_scope.get_variable('embedding', [vocab_size, size]) # encoder's placeholder self.encoder_inputs = [] for bid in range(buckets[-1][0]): self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name='encoder{0}'.format(bid))) self.seq_len = tf.placeholder(tf.int32, shape=[None], name='enc_seq_len') # decoder's placeholder self.decoder_inputs = [] self.targets = [] self.target_weights = [] self.masks = [] for bid in range(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name='decoder{0}'.format(bid))) self.targets.append( tf.placeholder(tf.int32, shape=[None], name='target{0}'.format(bid))) self.target_weights.append( tf.placeholder(tf.float32, shape=[None], name='weight{0}'.format(bid))) self.masks.append( tf.placeholder(tf.float32, shape=[None], name='mask_unit{0}'.format(bid))) # TODO passed args funcs self.output_projection = build_out_proj(size, vocab_size, kdim) self.kg_projection = build_kg_proj(size, kdim) self.memA, self.memC = build_memnet(size, num_layers, kbembed_size, xavier_init) self.Tpred_W, self.Tpred_b = build_transit_mat(size, kdim, edim, xavier_init) self.S, self.neA = hold_graph(kdim, edim, dtype) self.facts = hold_facts(triples_num, kbembed_size, dtype) self.kg_indices = hold_kg_indices() more_args = (self.Tpred_W, self.Tpred_b, self.kdim, self.edim, self.neA, self.S, self.hops_num, self.kgpath_len, self.kg_projection) mem_args = (self.batch_size, self.size, self.num_layers, self.hops_num, self.facts, self.kg_indices, self.memA, self.memC) if method == 'TRAIN': self.enc_state = [] self.losses = [] self.logits = [] self.decKB_losses = [] self.decN_losses = [] self.ptr_losses = [] self.outputs = [] self.a1s = [] self.kdists = [] self.Ndists = [] self.Rdebugs = [] for j, bucket in enumerate(buckets): with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True if j > 0 else None): _, enc_state = \ encode(self.enc_cell, self.encoder_inputs[:bucket[0]], self.seq_len) enc_state = enc_state_transform(enc_state, mem_args) logits, hiddens, dec_state = \ decode(self.cell, enc_state, \ self.vocab_size, self.embedding, \ self.decoder_inputs[:bucket[1]], \ self.output_projection, \ bucket[1]+1, more_args, \ None, feed_prev=False, \ copy_transform=copy_transform) outputs, a1s, kdists, Ndists, final_logits, Rdebug = copymech( logits, self.output_projection, self.vocab_size, self.kdim, more_args, mem_args, copy_transform) loss = compute_loss(final_logits, self.targets[:bucket[1]], self.target_weights[:bucket[1]], self.output_projection, self.vocab_size) self.enc_state.append(enc_state) self.losses.append(loss) self.logits.append(logits) self.outputs.append(outputs) self.a1s.append(a1s) self.kdists.append(kdists) self.Ndists.append(Ndists) self.Rdebugs.append(Rdebug) # TODO check self.softmax_outputs, self.argmax_outputs = to_check( self.logits, self.outputs, self.output_projection) # update methods self.op_update = [] optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) params = tf.trainable_variables() print(params) for j in range(len(self.buckets)): gradients = tf.gradients(self.losses[j], params) clipped_gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) self.op_update.append( optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) elif method == 'TEST': self.enc_state = [] self.argmax_outputs = [] self.logits = [] self.a1s = [] self.kdists = [] self.Ndists = [] self.Rdebugs = [] for j, bucket in enumerate(buckets): with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True if j > 0 else None): _, enc_state = \ encode(self.enc_cell, self.encoder_inputs[:bucket[0]], self.seq_len) enc_state = enc_state_transform(enc_state, mem_args) logits, argmax_outputs, hiddens, a1s, kdists, Ndists, Rdebugs = \ decode(self.cell, enc_state, \ self.vocab_size, self.embedding, \ self.decoder_inputs[:bucket[1]], \ self.output_projection, \ bucket[1], more_args, \ mem_args, feed_prev=True, \ loop_function=loop_function, \ copy_transform=copy_transform) self.enc_state.append(enc_state) self.argmax_outputs.append(argmax_outputs) self.logits.append(logits) self.a1s.append(a1s) self.kdists.append(kdists) self.Ndists.append(Ndists) self.Rdebugs.append(Rdebugs) params = tf.trainable_variables() print(params) # saver self.saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=None, sharded=True)
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = enc_cell encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return tf.contrib.legacy_seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = tf.contrib.legacy_seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as( structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: core_rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors. The output is of shape [batch_size x cell.output_size] when output_projection is not None (and represents the dense representation of predicted tokens). It is of shape [batch_size x num_decoder_symbols] when output_projection is None. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell, num_encoder_symbols, num_decoder_symbols_dict, embedding_size, feed_previous=False, dtype=dtypes.float32, scope=None): """One-to-many RNN sequence-to-sequence model (multi-task). This is a multi-task sequence-to-sequence model with one encoder and multiple decoders. Reference to multi-task sequence-to-sequence learning can be found here: http://arxiv.org/abs/1511.06114 Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs_dict: A dictionany mapping decoder name (string) to the corresponding decoder_inputs; each decoder_inputs is a list of 1D Tensors of shape [batch_size]; num_decoders is defined as len(decoder_inputs_dict). cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an integer specifying number of symbols for the corresponding decoder; len(num_decoder_symbols_dict) must be equal to num_decoders. embedding_size: Integer, the length of the embedding vector for each symbol. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "one2many_rnn_seq2seq" Returns: A tuple of the form (outputs_dict, state_dict), where: outputs_dict: A mapping from decoder name (string) to a list of the same length as decoder_inputs_dict[name]; each element in the list is a 2D Tensors with shape [batch_size x num_decoder_symbol_list[name]] containing the generated outputs. state_dict: A mapping from decoder name (string) to the final state of the corresponding decoder RNN; it is a 2D Tensor of shape [batch_size x cell.state_size]. """ outputs_dict = {} state_dict = {} with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. for name, decoder_inputs in decoder_inputs_dict.items(): num_decoder_symbols = num_decoder_symbols_dict[name] with variable_scope.variable_scope("one2many_decoder_" + str(name)): decoder_cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. def filled_embedding_rnn_decoder(feed_previous): # pylint: disable=cell-var-from-loop reuse = None if feed_previous else True vs = variable_scope.get_variable_scope() with variable_scope.variable_scope(vs, reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) # pylint: enable=cell-var-from-loop return outputs + [state] outputs_and_state = control_flow_ops.cond( feed_previous, lambda: filled_embedding_rnn_decoder(True), lambda: filled_embedding_rnn_decoder(False)) outputs = outputs_and_state[:-1] state = outputs_and_state[-1] outputs_dict[name] = outputs state_dict[name] = state return outputs_dict, state_dict
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,dec_cell,bi_lstm,attent,beam_search, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: core_rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. if bi_lstm == True: encoder_fw_cell = copy.deepcopy(cell) encoder_fw_cell = core_rnn_cell.EmbeddingWrapper( encoder_fw_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_bw_cell = copy.deepcopy(cell) encoder_bw_cell = core_rnn_cell.EmbeddingWrapper( encoder_bw_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn( encoder_fw_cell,encoder_bw_cell, encoder_inputs, dtype=dtype) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size*2]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) encoder_final_state_c = tf.concat( (output_state_fw.c, output_state_bw.c), 1) encoder_final_state_h = tf.concat( (output_state_fw.h, output_state_bw.h), 1) encoder_state = LSTMStateTuple( c=encoder_final_state_c, h=encoder_final_state_h ) else: encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = core_rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size*1]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: # make new decoder cell cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols #if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell,dec_cell,attent,beam_search, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) '''
def embedding_attention_sampled_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell_1,cell_2, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False, beam_search =True, beam_size = 10 ): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell_1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size)#reuse=tf.get_variable_scope().reuse encoder_outputs, encoder_state = core_rnn.static_rnn( encoder_cell, encoder_inputs, #scope='embedding_attention_decoder/attention_decoder', dtype=dtype) print('####### embedding_attention_seq2seq scope: {}'.format(encoder_cell)) print("Symbols") print(num_encoder_symbols) print(num_decoder_symbols) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell_1.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(axis=1, values=top_states) print(attention_states) # Decoder. output_size = None if output_projection is None: cell_2 = rnn_cell.OutputProjectionWrapper(cell_2, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell_2, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, scope=scope) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state