def seq2seq_f(cell, encoder_inputs, decoder_inputs, loop_output): ''' The seq2seq neural network structurei Args: cell: the RNNCell object encoder_inputs: a list of Tensors to feed the encoder decoder_inputs: a list of Tensors to feed the decoder loop_output: True for using the loop_func to construct the next decoder_input element using the previous output element Returns: outputs: a list of Tensors generated by the decoder states: the hidden states at the final step of the encoder ''' if loop_output: def loop_func(prev, i): # simplest construction: using the previous output as the next input return prev # use rnn() directly for modified decoder. _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=tf.float32) # note that the returned states are all hidden states, not just the last one outputs,states = seq2seq.rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_func) else: # using the given decoder inputs outputs,states = seq2seq.basic_rnn_seq2seq( encoder_inputs, decoder_inputs, cell) # one way to bound the output in [-1,1]. but not used. # for x in outputs: # x = tf.tanh(x) # print(states) # the output states is just the last element of all hidden states return outputs,states
def __init__(self, vocab_size, sequence_length, num_units, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor): self.vocab_size = vocab_size self.sequence_length = sequence_length self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) w = training.utils.gaussian_weights_variable([num_units, self.vocab_size]) b = tf.Variable(tf.zeros([self.vocab_size])) lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size) self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for _ in range(sequence_length): self.encoder_inputs.append(tf.placeholder( tf.float32, shape=(batch_size, self.vocab_size))) self.decoder_inputs.append(tf.placeholder( tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append(tf.placeholder( tf.float32, shape=(batch_size,))) # Decoder has one extra cell because it starts with the GO symbol, # and the targets are shifted by one. # Not sure this is actually useful, as it is always set to 0. # As this is inspired by TensorFlow seq2seq models, there might be # something dodgy in there. self.decoder_inputs.append(tf.placeholder( tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append(np.ones((batch_size,))) # Targets used by the sequence loss must be integer indices. targets = [tf.cast(tf.argmax(i, 1), dtype=tf.int32) for i in self.decoder_inputs[1:]] outputs, self.state = seq2seq.basic_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, lstm_cell) self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs] self.loss = seq2seq.sequence_loss(self.logits[:self.sequence_length], targets, self.target_weights[:self.sequence_length], self.vocab_size) params = tf.trainable_variables() opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, self.gradient_norms = tf.clip_by_global_norm( gradients, max_gradient_norm) self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def testBasicRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)] dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)] cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq.basic_rnn_seq2seq(inp, dec_inp, cell) sess.run([tf.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 4)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 2))
weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] # Decoder input: prepend some "GO" token and drop the final # token of the encoder input dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.float32, name="GO")] + enc_inp[:-1]) # Initial memory value for recurrence. #prev_mem = tf.zeros((batch_size, memory_dim)) print("shapes", np.array(enc_inp).shape, np.array(dec_inp).shape, np.array(labels).shape) cell = rnn_cell.GRUCell(memory_dim) dec_outputs, dec_memory = seq2seq.basic_rnn_seq2seq( enc_inp, dec_inp, cell) labels_t = tf.reshape(labels, [5,100]) print(labels_t) print(dec_outputs) loss = seq2seq.sequence_loss(dec_outputs, labels_t, weights, vocab_size) tf.scalar_summary("loss", loss) #magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1]))) #tf.scalar_summary("magnitude at t=1", magnitude) summary_op = tf.merge_all_summaries() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss)
def __init__(self, vocab_size, sequence_length, num_units, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor): self.vocab_size = vocab_size self.sequence_length = sequence_length self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) w = training.utils.gaussian_weights_variable( [num_units, self.vocab_size]) b = tf.Variable(tf.zeros([self.vocab_size])) lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size) self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for _ in range(sequence_length): self.encoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.decoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append( tf.placeholder(tf.float32, shape=(batch_size, ))) # Decoder has one extra cell because it starts with the GO symbol, # and the targets are shifted by one. # Not sure this is actually useful, as it is always set to 0. # As this is inspired by TensorFlow seq2seq models, there might be # something dodgy in there. self.decoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append(np.ones((batch_size, ))) # Targets used by the sequence loss must be integer indices. targets = [ tf.cast(tf.argmax(i, 1), dtype=tf.int32) for i in self.decoder_inputs[1:] ] outputs, self.state = seq2seq.basic_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, lstm_cell) self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs] self.loss = seq2seq.sequence_loss( self.logits[:self.sequence_length], targets, self.target_weights[:self.sequence_length], self.vocab_size) params = tf.trainable_variables() opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, self.gradient_norms = tf.clip_by_global_norm( gradients, max_gradient_norm) self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps self.input_size = input_size = config.input_size self.num_classes = num_classes = config.num_classes self.vid_per_batch = config.vid_per_batch size = config.hidden_size self.cls_weight = config.cls_weight self.bbox_weight = config.bbox_weight self.ending_weight = config.ending_weight self.iter_epoch = config.iter_epoch self.momentum = config.momentum # placeholders for inputs and outputs self._input_data = inputs = tf.placeholder(tf.float32, [batch_size, num_steps, input_size]) self._cls_targets = tf.placeholder(tf.int32, [batch_size, num_steps]) self._bbox_targets = tf.placeholder(tf.float32, [batch_size, num_steps, num_classes * 4]) self._bbox_weights = tf.placeholder(tf.float32, [batch_size, num_steps, num_classes * 4]) self._end_targets = tf.placeholder(tf.float32, [batch_size, num_steps]) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # original inputs: batch_size * input_size * num_steps # after process: num_steps * [batch_size, input_size] inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)] self.type = config.type if self.type == 'residual': lstm_cell = ResLSTMCell(size) elif self.type == 'basic': lstm_cell = tf.models.rnn.rnn_cell.BasicLSTMCell(size) else: raise ValueError('Unknown LSTM cell type: {}.'.format(self.type)) if is_training and config.keep_prob < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) # TODO: decide initial state self._initial_state = cell.zero_state(batch_size, tf.float32) outputs_rev, state = basic_rnn_seq2seq(inputs, inputs[::-1], cell) outputs = outputs_rev[::-1] # output: (num_steps * batch_size) * input_size output = tf.reshape(tf.concat(0, outputs), [-1, size]) self._small_lr_vars = [] # build losses # class score if config.cls_init: # use pre-trained weights to initilize with open(config.cls_init, 'rb') as f: log.info("Loading classificiation params from {}".format(config.cls_init)) cls_w, cls_b = cPickle.load(f) softmax_w = tf.get_variable("softmax_w", initializer=tf.constant(cls_w)) softmax_b = tf.get_variable("softmax_b", initializer=tf.constant(cls_b)) self._small_lr_vars.append(softmax_w.name) self._small_lr_vars.append(softmax_b.name) else: softmax_w = tf.get_variable("softmax_w", [size, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes], initializer=tf.constant_initializer(0.)) logits = tf.matmul(output, softmax_w) + softmax_b self._cls_scores = tf.nn.softmax(logits, name='cls_scores') # transpose cls_targets to make num_steps the leading axis cls_targets = tf.reshape(tf.transpose(self._cls_targets), [-1]) loss_cls_score = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, cls_targets, name='loss_cls_score') self._cls_cost = cls_cost = tf.reduce_sum(loss_cls_score) / batch_size / num_steps # boudning box regression: L2 loss if config.bbox_init: with open(config.bbox_init, 'rb') as f: log.info("Loading bbox regression params from {}".format(config.bbox_init)) bbox_w, bbox_b = cPickle.load(f) bbox_w = tf.get_variable("bbox_w", initializer=tf.constant(bbox_w)) bbox_b = tf.get_variable("bbox_b", initializer=tf.constant(bbox_b)) self._small_lr_vars.append(bbox_w.name) self._small_lr_vars.append(bbox_b.name) else: bbox_w = tf.get_variable("bbox_w", [size, num_classes * 4]) bbox_b = tf.get_variable("bbox_b", [num_classes * 4]) self._bbox_pred = bbox_pred = tf.matmul(output, bbox_w) + bbox_b # permute num_steps and batch_size bbox_targets = tf.reshape(tf.transpose(self._bbox_targets, (1, 0, 2)), [-1, 4 * num_classes]) self._bbox_cost = bbox_cost = tf.nn.l2_loss(bbox_pred - bbox_targets) / batch_size / num_steps / 4. #self._bbox_cost = bbox_cost = tf.constant(0.) # ending signal end_w = tf.get_variable("end_w", [size, 1]) end_b = tf.get_variable("end_b", [1], initializer=tf.constant_initializer(0.)) end_pred = tf.matmul(output, end_w) + end_b end_targets = tf.reshape(tf.transpose(self._end_targets), [-1, 1]) self._end_probs = tf.nn.sigmoid(end_pred, name='end_probs') loss_ending = tf.nn.sigmoid_cross_entropy_with_logits(end_pred, end_targets, name='loss_ending') self._end_cost = end_cost = tf.reduce_sum(loss_ending) / batch_size / num_steps self._cost = cost = cls_cost * self.cls_weight + bbox_cost * self.bbox_weight + end_cost * self.ending_weight self._final_state = state if not is_training: return self._lr = tf.Variable(1.0, trainable=False) tvars = tf.trainable_variables() n_tvars = [] s_tvars = [] for tvar in tvars: if tvar.name in self._small_lr_vars: s_tvars.append(tvar) else: n_tvars.append(tvar) s_grads, global_norm = tf.clip_by_global_norm(tf.gradients(cost, s_tvars), config.max_grad_norm) n_grads, global_norm = tf.clip_by_global_norm(tf.gradients(cost, n_tvars), config.max_grad_norm) n_optimizer = tf.train.MomentumOptimizer(self.lr, self.momentum) s_optimizer = tf.train.MomentumOptimizer(self.lr * 0.01, self.momentum) self._train_op = tf.group( n_optimizer.apply_gradients(zip(n_grads, n_tvars)), s_optimizer.apply_gradients(zip(s_grads, s_tvars))) self.global_norm = global_norm
weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] # Decoder input: prepend some "GO" token and drop the final # token of the encoder input dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.float32, name="GO")] + enc_inp[:-1]) # Initial memory value for recurrence. prev_mem = tf.zeros((batch_size, memory_dim)) cell = rnn_cell.BasicLSTMCell(memory_dim) #enc_inp = np.tile(enc_inp, 2).tolist() logits, state = seq2seq.basic_rnn_seq2seq( enc_inp, dec_inp, cell)#, vocab_size, vocab_size) for i, inp in enumerate(enc_inp): print(i, inp) print("logits", logits) print('labels', labels) loss = seq2seq.sequence_loss(logits, labels, weights) summary_op = tf.scalar_summary("loss", loss) square = tf.square(state) sum = tf.reduce_sum(square) magnitude = tf.sqrt(sum) tf.scalar_summary("magnitude at t=1", magnitude) learning_rate = 0.05 momentum = 0.9