def embedding_attention_decoder(decoder_inputs, initial_state, attention_states, cell, num_symbols, embedding_size, num_heads=1, output_size=None, output_projection=None, feed_previous=False, scope=None, initial_state_attention=False): with variable_scope.variable_scope(scope or "embedding_attention_decoder"): if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper(cell, num_symbols) output_size = num_symbols return tf_embedding_attention_decoder( decoder_inputs, initial_state, attention_states, cell, num_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, beam_search=True, beam_size=10): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
def get_enc_cell(self, cell_size, vocab_size): cell = core_rnn_cell.GRUCell(cell_size) # TODO # if self.is_training: # cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5) cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size) cell = core_rnn_cell.OutputProjectionWrapper(cell, cell_size) return cell
def get_enc_cell(self, cell_size, vocab_size): cell = core_rnn_cell.GRUCell(cell_size) if self.phase_train: cell = core_rnn_cell.DropoutWrapper( cell, input_keep_prob=0.5, output_keep_prob=0.5) cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size) cell = core_rnn_cell.OutputProjectionWrapper(cell, vocab_size) return cell
def get_pretrain_enc_cell(self, ): cell = gru_ops.GRUBlockCell(1024) if self.is_training: cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5) cell = core_rnn_cell.InputProjectionWrapper(cell, 1024) cell = core_rnn_cell.OutputProjectionWrapper(cell, 1024) cell = core_rnn_cell.DeviceWrapper(cell, device='/gpu:0') return cell
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False, beam_search=True, beam_size=10): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
def get_cell(self, out_proj=True): # cell = tf.nn.rnn_cell.BasicLSTMCell(self._num_lstm_units) cell = tf.contrib.rnn.LayerNormBasicLSTMCell(self._num_lstm_units) if self._phase_train: cell = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=self._dropout_keep_prob, output_keep_prob=self._dropout_keep_prob) if out_proj: cell = core_rnn_cell.OutputProjectionWrapper(cell, self._num_lstm_units) return cell
def do_job(self): first_layer_outputs = [] num_splits = 15 context_frames = SampleRandomSequence(model_input, num_frames, 50) cell = gru_ops.GRUBlockCell(1024) cell = core_rnn_cell.OutputProjectionWrapper(cell, vocab_size) with tf.variable_scope("EncLayer0"): cell = gru_ops.GRUBlockCell(1024) for i in xrange(num_splits): if i > 0: tf.get_variable_scope().reuse_variables() enc_outputs, enc_state = tf.nn.dynamic_rnn(cell, frames, scope="enc0") enc_state = moe_layer(enc_state, 1024, 4, act_func=None, l2_penalty=1e-12) if is_training: enc_state = tf.nn.dropout(enc_state, 0.5) first_layer_outputs.append(enc_state) with tf.variable_scope("EncLayer1"): cell = gru_ops.GRUBlockCell(1024) first_layer_outputs = tf.stack(first_layer_outputs, axis=1) enc_outputs, enc_state = tf.nn.dynamic_rnn(cell, first_layer_outputs, scope="enc1") flatten_outputs = tf.reduce_mean(enc_outputs, axis=1) with tf.variable_scope("FC0"): flatten_outputs = moe_layer(flatten_outputs, 1024, 2, act_func=tf.nn.relu, l2_penalty=1e-8) if is_training: flatten_outputs = tf.nn.dropout(flatten_outputs, 0.5) with tf.variable_scope("FC1"): logits = moe_layer(flatten_outputs, vocab_size, 2, act_func=tf.nn.sigmoid, l2_penalty=1e-8) logits = tf.clip_by_value(logits, 0., 1.) return {"predictions": logits}
def get_enc_cell(self, cell_size, vocab_size): # cell = cudnn_rnn_ops.CudnnGRU(1, cell_size, (1024+128)) cells = [] cell = gru_ops.GRUBlockCell(cell_size) cell = core_rnn_cell.OutputProjectionWrapper(cell, cell_size) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=0.5) cells.append(cell) cell = gru_ops.GRUBlockCell(cell_size) cells.append(cell) cell = tf.contrib.rnn.MultiRNNCell( cells, state_is_tuple=False) return cell
def testBasicRNNSeq2Seq(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 cell = core_rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq_lib.basic_rnn_seq2seq(inp, dec_inp, cell) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def my_stack_embedding_attention_seq2seq(encoder_outputs, encoder_state, decoder_inputs, cell, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=True, dtype=None, scope=None, initial_state_attention=False, epsilon=0.5, mode=None): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return my_embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) else: assert ('Please check the data type of feed_previous!')
def testRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 _, enc_state = rnn.static_rnn( rnn_cell.GRUCell(2), inp, dtype=dtypes.float32) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 cell = core_rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq_lib.rnn_decoder(dec_inp, enc_state, cell) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def advanced_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_decoder_symbols, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "advanced_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy( cell) # different weights, so use deepcopy here _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) print('encoder state', encoder_state[-1].h, type(encoder_state[-1]), sep='\n') # print(encoder_state) # Decoder. # if projection weights are not provided, automatically add with wrapper if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) return advanced_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, output_projection=output_projection, feed_previous=feed_previous)
def embedding_rnn_decoder(decoder_inputs, initial_state, cell, num_symbols, embedding_size, output_projection=None, feed_previous=False, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_decoder"): # Node that we use the original cell if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper(cell, num_symbols) return tf_embedding_rnn_decoder( decoder_inputs, initial_state, cell, num_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous)
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False, pretrained_embedding_path=None): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". (deprecated) initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. pretrained_embedding_path: path to GloVe embedding. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope("embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder encoder_cell = copy.deepcopy(cell) init = tf.constant(np.load(pretrained_embedding_path)) # use glove encoder_cell = rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=300, initializer=init) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: core_rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors. The output is of shape [batch_size x cell.output_size] when output_projection is not None (and represents the dense representation of predicted tokens). It is of shape [batch_size x num_decoder_symbols] when output_projection is None. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def __init__(self, architecture, source_seq_len, target_seq_len, rnn_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, loss_to_use, optimizer_to_use, number_of_actions, cmu_data, alpha, beta, gamma, x_s, one_hot=True, residual_velocities=False, d_layers=2, dtype=tf.float32): if not cmu_data: self.HUMAN_SIZE = 54 # maybe different dataset would give different human size else: self.HUMAN_SIZE = 62 # this is from .asf, 70 is from .bvh self.input_size = self.HUMAN_SIZE + number_of_actions if one_hot else self.HUMAN_SIZE # print( "One hot is ", one_hot ) # print( "Input size is %d" % self.input_size ) self.decoder = AEDecoder(0.01, d_layers, self.HUMAN_SIZE) self.source_seq_len = source_seq_len self.target_seq_len = target_seq_len self.rnn_size = rnn_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype) self.sampling_rate = tf.placeholder(dtype=dtype, shape=()) # === Decay === self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.x_s = x_s # print ("the mode is: ", x_s) # === Transform the inputs === with tf.name_scope("inputs"): enc_in = tf.placeholder( dtype, shape=[None, source_seq_len - 1, self.input_size], name="enc_in") dec_in = tf.placeholder( dtype, shape=[None, target_seq_len, self.input_size], name="dec_in") dec_out = tf.placeholder( dtype, shape=[None, target_seq_len, self.input_size], name="dec_out") self.encoder_inputs = enc_in self.decoder_inputs = dec_in self.decoder_outputs = dec_out enc_in = tf.transpose(enc_in, [1, 0, 2]) dec_in = tf.transpose(dec_in, [1, 0, 2]) dec_out = tf.transpose(dec_out, [1, 0, 2]) enc_in = tf.reshape(enc_in, [-1, self.input_size]) dec_in = tf.reshape(dec_in, [-1, self.input_size]) dec_out = tf.reshape(dec_out, [-1, self.input_size]) enc_in = tf.split(enc_in, source_seq_len - 1, axis=0) dec_in = tf.split(dec_in, target_seq_len, axis=0) dec_out = tf.split(dec_out, target_seq_len, axis=0) self.is_training = tf.placeholder(tf.bool) """arrange cell and transform the input""" only_cell = tf.contrib.rnn.GRUCell(self.rnn_size) # print (len(enc_in), enc_in[0].get_shape()) for index, item in enumerate(enc_in): if index == 0: enc_in_list = item else: enc_in_list = tf.concat([enc_in_list, item], axis=1) enc_in_list = tf.concat([enc_in_list, dec_in[0]], axis=1) outputs = [] outputs_GT = [] outputs_s = [] """define loss function and architecture type""" def lf(prev, i): return prev def lrelu(x, leak=0.2, name="lrelu"): return tf.maximum(x, leak * x) sp_decoder = self.decoder loop_function = lf only_cell = core_rnn_cell.OutputProjectionWrapper( only_cell, self.rnn_size) output_size = self.rnn_size batch_size = array_ops.shape(dec_in[0])[0] state = only_cell.zero_state(batch_size=batch_size, dtype=dtype) initial_state = state state_GT = state state_t = state keep_prob_ = 0.8 initializer_weight = tf.random_uniform_initializer(minval=-0.04, maxval=0.04) initializer_bias = tf.random_uniform_initializer(minval=-0.04, maxval=0.04) def my_drop_out(output): return tf.where( self.is_training, tcl.dropout(output, keep_prob=keep_prob_, is_training=True), output) def my_fc(input_, output, scope, reuse=None): return tcl.fully_connected(input_, output, scope=scope, activation_fn=None, weights_initializer=initializer_weight, biases_initializer=initializer_bias, reuse=reuse) dim_1 = 128 dim_2 = 256 with vs.variable_scope("attention_decoder", dtype=dtype) as scope: prev = None for i, inp in enumerate(dec_in): if i > 0: vs.get_variable_scope().reuse_variables() inp_GT = inp if loop_function is not None and prev is not None: with vs.variable_scope("loop_function", reuse=True): inp = lf(prev, i) # inp is for T-RNN with vs.variable_scope("RNN"): cell_output, state = only_cell(inp, state) vs.get_variable_scope().reuse_variables() cell_output_GT, state_GT = only_cell(inp_GT, state_GT) with vs.variable_scope("t_decoder"): clear_output = sp_decoder.forward( single_input=cell_output, is_training=self.is_training) output = clear_output + inp vs.get_variable_scope().reuse_variables() clear_output_GT = sp_decoder.forward( single_input=cell_output_GT, is_training=self.is_training) output_GT = clear_output_GT + inp_GT prev = output outputs.append(output) outputs_GT.append(output_GT) self.outputs = outputs # GX: temprol output self.dec_out = dec_out # GX: scope mechanism: Decoder's w1 and d1, Encoder's w1 and d1 with tf.name_scope("loss_angles"): loss_angles = tf.reduce_mean( tf.square(tf.subtract(dec_out, outputs))) loss_angles_GT = tf.reduce_mean( tf.square(tf.subtract(dec_out, outputs_GT))) self.loss_t = alpha * loss_angles_GT + beta * loss_angles # print ('current alpha and beta are: ', alpha, beta) self.loss_summary = tf.summary.scalar('loss/loss', self.loss_t) """regularizers""" params = tf.trainable_variables() opt_t = tf.train.AdamOptimizer(self.learning_rate) RNN_var = [var_ for var_ in params if "RNN" in var_.name] t_dec_var = [var_ for var_ in params if "t_decoder" in var_.name] # print ("================= variable ===================================") for reg_var in params: shp = reg_var.get_shape().as_list() # print("- {} shape:{} size:{}".format(reg_var.name, shp, np.prod(shp))) # print ("=================TRNN variable (loss_t)=======================") # for reg_var in RNN_var: # shp = reg_var.get_shape().as_list() # print("- {} shape:{} size:{}".format(reg_var.name, shp, np.prod(shp))) # print ("================T-decoder variable (loss_t)===================") self.reg_t = 0 scale = 0.01 count = 0 for reg_var in t_dec_var: shp = reg_var.get_shape().as_list() # print("- {} shape:{} size:{}".format(reg_var.name, shp, np.prod(shp))) count = count + np.prod(shp) self.reg_t = self.reg_t + scale * tf.nn.l2_loss(reg_var) # print ("total number is ", count) # print ("the scale is ", scale) # print ("===========================================") self.loss_t = self.loss_t + self.reg_t gradients_t = tf.gradients(self.loss_t, RNN_var + t_dec_var) clipped_gradients_t, self.gradient_norms = tf.clip_by_global_norm( gradients_t, max_gradient_norm) self.updates_t = opt_t.apply_gradients(zip(clipped_gradients_t, RNN_var + t_dec_var), global_step=self.global_step) self.updates = self.updates_t # Keep track of the learning rate self.learning_rate_summary = tf.summary.scalar( 'learning_rate/learning_rate', self.learning_rate) self.saver = tf.train.Saver( tf.global_variables(), max_to_keep=10000) # better for drawing plot
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell, num_encoder_symbols, num_decoder_symbols_dict, embedding_size, feed_previous=False, dtype=dtypes.float32, scope=None): """One-to-many RNN sequence-to-sequence model (multi-task). This is a multi-task sequence-to-sequence model with one encoder and multiple decoders. Reference to multi-task sequence-to-sequence learning can be found here: http://arxiv.org/abs/1511.06114 Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs_dict: A dictionany mapping decoder name (string) to the corresponding decoder_inputs; each decoder_inputs is a list of 1D Tensors of shape [batch_size]; num_decoders is defined as len(decoder_inputs_dict). cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an integer specifying number of symbols for the corresponding decoder; len(num_decoder_symbols_dict) must be equal to num_decoders. embedding_size: Integer, the length of the embedding vector for each symbol. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "one2many_rnn_seq2seq" Returns: A tuple of the form (outputs_dict, state_dict), where: outputs_dict: A mapping from decoder name (string) to a list of the same length as decoder_inputs_dict[name]; each element in the list is a 2D Tensors with shape [batch_size x num_decoder_symbol_list[name]] containing the generated outputs. state_dict: A mapping from decoder name (string) to the final state of the corresponding decoder RNN; it is a 2D Tensor of shape [batch_size x cell.state_size]. """ outputs_dict = {} state_dict = {} with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. for name, decoder_inputs in decoder_inputs_dict.items(): num_decoder_symbols = num_decoder_symbols_dict[name] with variable_scope.variable_scope("one2many_decoder_" + str(name)): decoder_cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. def filled_embedding_rnn_decoder(feed_previous): # pylint: disable=cell-var-from-loop reuse = None if feed_previous else True vs = variable_scope.get_variable_scope() with variable_scope.variable_scope(vs, reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) # pylint: enable=cell-var-from-loop return outputs + [state] outputs_and_state = control_flow_ops.cond( feed_previous, lambda: filled_embedding_rnn_decoder(True), lambda: filled_embedding_rnn_decoder(False)) outputs = outputs_and_state[:-1] state = outputs_and_state[-1] outputs_dict[name] = outputs state_dict[name] = state return outputs_dict, state_dict
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None): """Embedding RNN sequence-to-sequence model with tied (shared) parameters. This model first embeds encoder_inputs by a newly created embedding (of shape [num_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs using the same embedding. Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_symbols: Integer; number of symbols for both encoder and decoder. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_symbols] and B has shape [num_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype to use for the initial RNN states (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_tied_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: When output_projection has the wrong shape. """ if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with([None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) with variable_scope.variable_scope(scope or "embedding_tied_rnn_seq2seq"): with ops.device("/cpu:0"): embedding = variable_scope.get_variable("embedding", [num_symbols, embedding_size]) emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in decoder_inputs] if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols) if isinstance(feed_previous, bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, True) if feed_previous else None return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, False) if feed_previous_bool else None reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=reuse): outputs, state = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) return outputs + [state] outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1]
def get_cell(): cell_size = 1024 cell = core_rnn_cell.GRUCell(cell_size) cell = core_rnn_cell.OutputProjectionWrapper(cell, fea_size) return cell
def __init__(self, architecture, source_seq_len, target_seq_len, rnn_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, summaries_dir, loss_to_use, optimizer_to_use, number_of_actions, cmu_data, long_t, alpha, beta, gamma, x_s, one_hot=True, residual_velocities=False, d_layers=2, dtype=tf.float32): if not cmu_data: self.HUMAN_SIZE = 54 # maybe different dataset would give different human size else: self.HUMAN_SIZE = 62 # this is from .asf, 70 is from .bvh self.input_size = self.HUMAN_SIZE + number_of_actions if one_hot else self.HUMAN_SIZE self.decoder = AEDecoder(0.01, d_layers, self.HUMAN_SIZE) self.source_seq_len = source_seq_len self.target_seq_len = target_seq_len self.rnn_size = rnn_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype) self.sampling_rate = tf.placeholder(dtype=dtype, shape=()) # === Decay === self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.x_s = x_s # === Transform the inputs === with tf.name_scope("inputs"): enc_in = tf.placeholder( dtype, shape=[None, source_seq_len - 1, self.input_size], name="enc_in") dec_in = tf.placeholder( dtype, shape=[None, target_seq_len, self.input_size], name="dec_in") dec_out = tf.placeholder( dtype, shape=[None, target_seq_len, self.input_size], name="dec_out") self.encoder_inputs = enc_in self.decoder_inputs = dec_in self.decoder_outputs = dec_out enc_in = tf.transpose(enc_in, [1, 0, 2]) dec_in = tf.transpose(dec_in, [1, 0, 2]) dec_out = tf.transpose(dec_out, [1, 0, 2]) enc_in = tf.reshape(enc_in, [-1, self.input_size]) dec_in = tf.reshape(dec_in, [-1, self.input_size]) dec_out = tf.reshape(dec_out, [-1, self.input_size]) enc_in = tf.split(enc_in, source_seq_len - 1, axis=0) dec_in = tf.split(dec_in, target_seq_len, axis=0) dec_out = tf.split(dec_out, target_seq_len, axis=0) self.is_training = tf.placeholder(tf.bool) """arrange cell and transform the input""" only_cell = tf.contrib.rnn.GRUCell(self.rnn_size) for index, item in enumerate(enc_in): if index == 0: enc_in_list = item else: enc_in_list = tf.concat([enc_in_list, item], axis=1) enc_in_list = tf.concat([enc_in_list, dec_in[0]], axis=1) outputs = [] """define loss function and architecture type""" def lf(prev, i): return prev def lrelu(x, leak=0.2, name="lrelu"): return tf.maximum(x, leak * x) sp_decoder = self.decoder loop_function = lf only_cell = core_rnn_cell.OutputProjectionWrapper( only_cell, self.rnn_size) output_size = self.rnn_size batch_size = array_ops.shape(dec_in[0])[0] state = only_cell.zero_state(batch_size=batch_size, dtype=dtype) initial_state = state state_GT = state state_t = state keep_prob_ = 0.8 initializer_weight = tf.random_uniform_initializer(minval=-0.04, maxval=0.04) initializer_bias = tf.random_uniform_initializer(minval=-0.04, maxval=0.04) def my_drop_out(output): return tf.where( self.is_training, tcl.dropout(output, keep_prob=keep_prob_, is_training=True), output) def my_fc(input_, output, scope, reuse=None): return tcl.fully_connected(input_, output, scope=scope, activation_fn=None, weights_initializer=initializer_weight, biases_initializer=initializer_bias, reuse=reuse) dim_1 = 128 dim_2 = 256 with vs.variable_scope("attention_decoder", dtype=dtype) as scope: prev = None for i, inp in enumerate(dec_in): if i > 0: vs.get_variable_scope().reuse_variables() if loop_function is not None and prev is not None: with vs.variable_scope("loop_function", reuse=True): inp = lf(prev, i) # inp is for T-RNN h0 = inp with vs.variable_scope("s_decoder"): # GX: I don't why I can't have closed form of this. output_r_l = my_drop_out( lrelu(my_fc(h0[:, :14], dim_1, scope="r_l/fc1"))) output_l_l = my_drop_out( lrelu(my_fc(h0[:, 14:22], dim_1, scope="l_l/fc1"))) output_trunk = my_drop_out( lrelu(my_fc(h0[:, 22:34], dim_1, scope="trunk/fc1"))) output_l_u = my_drop_out( lrelu(my_fc(h0[:, 34:44], dim_1, scope="l_u/fc1"))) output_r_u = my_drop_out( lrelu(my_fc(h0[:, 44:54], dim_1, scope="r_u/fc1"))) output_r_l = my_drop_out( lrelu(my_fc(output_r_l, dim_2, scope="r_l/fc2"))) output_l_l = my_drop_out( lrelu(my_fc(output_l_l, dim_2, scope="l_l/fc2"))) output_trunk = my_drop_out( lrelu(my_fc(output_trunk, dim_2, scope="trunk/fc2"))) output_l_u = my_drop_out( lrelu(my_fc(output_l_u, dim_2, scope="l_u/fc2"))) output_r_u = my_drop_out( lrelu(my_fc(output_r_u, dim_2, scope="r_u/fc2"))) output_r_l = my_drop_out( lrelu(my_fc(output_r_l, dim_1, scope="r_l/fc3"))) output_l_l = my_drop_out( lrelu(my_fc(output_l_l, dim_1, scope="l_l/fc3"))) output_trunk = my_drop_out( lrelu(my_fc(output_trunk, dim_1, scope="trunk/fc3"))) output_l_u = my_drop_out( lrelu(my_fc(output_l_u, dim_1, scope="l_u/fc3"))) output_r_u = my_drop_out( lrelu(my_fc(output_r_u, dim_1, scope="r_u/fc3"))) output_s = tf.concat([ output_r_l, output_l_l, output_trunk, output_l_u, output_r_u ], axis=1) output_s = my_fc(output_s, self.HUMAN_SIZE, scope="fc4") output_s = my_drop_out(lrelu(output_s)) output_s = output_s + inp enc_in_list = tf.concat( [enc_in_list[:, self.HUMAN_SIZE:], output_s], axis=1) output = output_s prev = output outputs.append(output) self.outputs = outputs # GX: used for training self.dec_out = dec_out loss_angles = tf.reduce_mean(tf.square(tf.subtract(dec_out, outputs))) self.loss = loss_angles self.loss_summary = tf.summary.scalar('loss/loss', self.loss) """regularizers""" params = tf.trainable_variables() opt_s = tf.train.GradientDescentOptimizer(1e-2) s_dec_var = [var_ for var_ in params if "s_decoder" in var_.name] print("================= variable ===================================") for reg_var in params: shp = reg_var.get_shape().as_list() print("- {} shape:{} size:{}".format(reg_var.name, shp, np.prod(shp))) print("===============decoder variable (loss_s)======================") self.reg = 0 scale = 0.001 count = 0 for reg_var in s_dec_var: shp = reg_var.get_shape().as_list() print("- {} shape:{} size:{}".format(reg_var.name, shp, np.prod(shp))) count = count + np.prod(shp) self.reg = self.reg + scale * tf.nn.l2_loss(reg_var) print("total number is ", count) print("the scale is ", scale) print("===========================================") self.loss = self.loss + self.reg # Update all the trainable parameters gradients = tf.gradients(self.loss, s_dec_var) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms = norm self.updates = opt_s.apply_gradients(zip(clipped_gradients, s_dec_var), global_step=self.global_step) self.learning_rate_summary = tf.summary.scalar( 'learning_rate/learning_rate', self.learning_rate) self.saver = tf.train.Saver( tf.global_variables(), max_to_keep=10000) # better for drawing plot
def get_enc_cell1(self, cell_size): cell = gru_ops.GRUBlockCell(cell_size) cell = core_rnn_cell.OutputProjectionWrapper(cell, 1024) return cell
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell_1,cell_2, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False, beam_search =True, beam_size = 10 ): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell_1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size)#reuse=tf.get_variable_scope().reuse encoder_outputs, encoder_state = core_rnn.static_rnn( encoder_cell, encoder_inputs, #scope='embedding_attention_decoder/attention_decoder', dtype=dtype) print('####### embedding_attention_seq2seq scope: {}'.format(encoder_cell)) print("Symbols") print(num_encoder_symbols) print(num_decoder_symbols) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell_1.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(axis=1, values=top_states) print(attention_states) # Decoder. output_size = None if output_projection is None: cell_2 = rnn_cell.OutputProjectionWrapper(cell_2, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell_2, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
def reader(self, query, key_bank, val_bank, num_slots=0): tile = True if tile: key_bank3d = tf.reshape(key_bank, [1, -1, self._img_cats_mem_size]) key_bank3d = tf.tile(key_bank3d, [self.run_time_batch_size, 1, 1]) else: key_bank3d = tf.reshape(key_bank, [-1, num_slots, self._img_cats_mem_size]) query_size = key_bank3d.get_shape().as_list()[2] cell = tf.contrib.rnn.LayerNormBasicLSTMCell(self._num_lstm_units) if self._phase_train: cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=self._dropout_keep_prob, output_keep_prob=self._dropout_keep_prob) cell = core_rnn_cell.OutputProjectionWrapper(cell, self._num_lstm_units) state = cell.zero_state(self.run_time_batch_size, dtype=tf.float32) num_read_heads = 5 rs, val_out = [], [] for rid in xrange(num_read_heads): r = tf.zeros([self.run_time_batch_size, self._img_cats_mem_size], dtype=tf.float32) rs.append(r) for rid in xrange(num_read_heads): v = tf.zeros([self.run_time_batch_size, self._desc_cats_mem_size], dtype=tf.float32) val_out.append(v) num_steps = self._num_answer_candidates val_outs = [] if tile: val_bank = tf.reshape(val_bank, [1, -1, self._desc_cats_mem_size]) val_bank = tf.tile(val_bank, [self.run_time_batch_size, 1, 1]) else: val_bank = tf.reshape(val_bank, [-1, num_slots, self._desc_cats_mem_size]) num_slots = tf.shape(val_bank)[1] input_labels = tf.split(axis=1, num_or_size_splits=self._num_answer_candidates, value=self._visual_target_label) target_labels = tf.split(axis=1, num_or_size_splits=self._num_answer_candidates, value=self._target_labels) target_weights = tf.split(axis=1, num_or_size_splits=self._num_answer_candidates, value=self._labels_target_weight) losses, eval_score = [], [] num_symbols = self._query_vocab_size embedding_matrix = tf.get_variable("embedding", [num_symbols, self._num_lstm_units]) prev = None loop_function = None if not self._phase_train: loop_function = _extract_argmax_and_embed( embedding_matrix, None, False) with tf.variable_scope('LSTM_reader'): for x in xrange(num_steps): if x > 0: tf.get_variable_scope().reuse_variables() inp = _linear([query] + rs, self._num_lstm_units, True) output, state = cell(inp, state) state_flattened = utils.flatten(state) rs, val_out = [], [] for rid in xrange(num_read_heads): a = similarity.softmax_similarity(state_flattened, key_bank, query_size, scope="softmax_similarity_%d" % rid) r = tf.matmul(tf.expand_dims(a, 1), key_bank3d) r = tf.reshape(r, [-1, self._img_cats_mem_size]) rs.append(r) a3d = tf.reshape(a, [-1, 1, num_slots]) val_out.append(tf.reshape(tf.matmul(a3d, val_bank), [-1, self._desc_cats_mem_size])) q = query mem_out = tf.concat(axis=1, values=rs + val_out + [q]) mem_out = slim.fully_connected( mem_out, 1024, activation_fn=tf.nn.relu, scope="fc0") mem_out = slim.dropout(mem_out, self._dropout_keep_prob, is_training=self._phase_train) logits = slim.fully_connected( mem_out, self._query_vocab_size, activation_fn=None, scope="target_W") if loop_function is not None: prev = logits if self._phase_train: loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.reshape(target_labels[-1], [-1])) losses.append(loss) else: eval_score.append(tf.nn.softmax(logits)) if self._phase_train: loss = tf.add_n(losses) / num_steps self._cls_loss = tf.reduce_mean(loss, name='cross_entropy') else: self.eval_score = eval_score self.eval_score = tf.add_n(eval_score)
def hs2s(encoder_inputs, decoder_inputs, lstm_size, cells, model_size, batch_size, embedding_size, num_input_symbols, num_decoder_symbols, num_heads=1, feed_previous=False, output_projection=None, initial_state_attention=False, dtype=dtypes.float32, scope=None): # run encoder encoder_state, attention_states = encoder(encoder_inputs, lstm_size, cells, model_size, batch_size, embedding_size, num_input_symbols, dtype=dtype, scope=scope) # run decoder output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cells['decoder_h1'], num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): outputs, decoder_state, attns = embed_attn_decoder( decoder_inputs, attention_states, encoder_state, cells, model_size, lstm_size, batch_size, embedding_size, num_decoder_symbols, num_heads=num_heads, feed_previous=feed_previous, output_size=output_size, output_projection=output_projection, initial_state_attention=initial_state_attention) return outputs, decoder_state, attns # If feed_previous is a Tensor, construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=reuse): outputs, decoder_state, attns = embed_attn_decoder( decoder_inputs, attention_states, encoder_state, cells, model_size, lstm_size, batch_size, embedding_size, num_decoder_symbols, num_heads=num_heads, feed_previous=feed_previous_bool, output_size=output_size, output_projection=output_projection, initial_state_attention=initial_state_attention, update_embedding_for_previous=False) return outputs + [decoder_state], attns outputs_and_state, attns = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) return outputs_and_state[:-1], outputs_and_state[-1], attns
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_symbols, embedding_size, num_decoder_symbols=None, output_projection=None, feed_previous=False, dtype=None, scope=None): """Embedding RNN sequence-to-sequence model with tied (shared) parameters. This model first embeds encoder_inputs by a newly created embedding (of shape [num_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs using the same embedding. Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. The decoder output is over symbols from 0 to num_decoder_symbols - 1 if num_decoder_symbols is none; otherwise it is over 0 to num_symbols - 1. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_symbols: Integer; number of symbols for both encoder and decoder. embedding_size: Integer, the length of the embedding vector for each symbol. num_decoder_symbols: Integer; number of output symbols for decoder. If provided, the decoder output is over symbols 0 to num_decoder_symbols - 1. Otherwise, decoder output is over symbols 0 to num_symbols - 1. Note that this assumes that the vocabulary is set up such that the first num_decoder_symbols of num_symbols are part of decoding. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_symbols] and B has shape [num_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype to use for the initial RNN states (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_tied_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_symbols] containing the generated outputs where output_symbols = num_decoder_symbols if num_decoder_symbols is not None otherwise output_symbols = num_symbols. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. Raises: ValueError: When output_projection has the wrong shape. """ with variable_scope.variable_scope( scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope: dtype = scope.dtype if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with([None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) embedding = variable_scope.get_variable( "embedding", [num_symbols, embedding_size], dtype=dtype) emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in decoder_inputs] output_symbols = num_symbols if num_decoder_symbols is not None: output_symbols = num_decoder_symbols if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper(cell, output_symbols) if isinstance(feed_previous, bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, True) if feed_previous else None return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, False) if feed_previous_bool else None reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=reuse): outputs, state = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] # Calculate zero-state to know it's structure. static_batch_size = encoder_inputs[0].get_shape()[0] for inp in encoder_inputs[1:]: static_batch_size.merge_with(inp.get_shape()[0]) batch_size = static_batch_size.value if batch_size is None: batch_size = array_ops.shape(encoder_inputs[0])[0] zero_state = cell.zero_state(batch_size, dtype) if nest.is_sequence(zero_state): state = nest.pack_sequence_as(structure=zero_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = enc_cell encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return tf.contrib.legacy_seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = tf.contrib.legacy_seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as( structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_attention_sampled_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = core_rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, scope=scope) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state