def stack_rnn_seq2seq_with_bottle_memory(encoder_inputs, decoder_inputs, cell, stack_num=3, dtype=dtypes.float32, scope=None): """Stacking RNN seq2seq model with bottleneck. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size] decoder_inputs: A list of 2D Tensors [batch_size x input_size] cell: core_rnn_cell.RNNCell defining the cell function and size. stack_num: the number to stack in seq2seq model dtype: The dtype of the initial state of the RNN cell (default: tf.float32) Returns: outputs: A list of the same length as decoer_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. enc_state: The state of each encoder cell in the final time_step. This is a 2D Tensor of shape [batch_size x cell.state_size] dec_state: The state of each decoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] """ with variable_scope.variable_scope(scope or "stack_rnn_enc_1"): enc_cell = copy.copy(cell) enc_output, enc_state = core_rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) for i in range(2, stack_num): with variable_scope.variable_scope(scope or "stack_rnn_encoder_" + str(i)): enc_cell = copy.copy(cell) enc_output, enc_state = core_rnn.static_rnn(enc_cell, enc_output, dtype=dtype) with variable_scope.variable_scope(scope or "stack_rnn_dec_1"): dec_cell = copy.copy(cell) dec_output, dec_state = seq2seq.rnn_decoder(decoder_inputs, enc_state, dec_cell) for i in range(2, stack_num): with variable_scope.variable_scope(scope or "stack_rnn_decoder_" + str(i)): dec_cell = copy.copy(cell) dec_output, dec_state = core_rnn.static_rnn(dec_cell, dec_output, dtype=dtype) return dec_output, enc_state, dec_state
def custom_rnn_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, dtype=dtypes.float32, initial_state=None, use_previous=False, scope=None, num_units=0): with variable_scope.variable_scope(scope or "custom_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype, scope=scope, initial_state=initial_state) print(enc_state.get_shape) c = tf.tanh( tf.matmul(tf.get_variable("v", [dim_hidden, dim_hidden]), enc_state)) h_prime_init = tf.tanh( tf.matmul(tf.get_variable("v_prime", [dim_hidden, dim_hidden]), c)) if not use_previous: return seq2seq.rnn_decoder(decoder_inputs, LSTMStateTuple(c, h_prime_init), dec_cell, scope=scope) return infer(LSTMStateTuple(c, h_prime_init), dec_cell, num_units)
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [2, 0, 1]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(axis=0, num_or_size_splits=n_steps, value=x) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) ########### outputs==[n_steps,batch_size,n_hidden] #outputs1 = tf.reshape(outputs, [-1,n_hidden]) #outputs2 = tf.matmul(outputs1, weights1['out']) #outputs3 = tf.reshape(outputs2, [-1,batch_size]) #outputs4 = tf.matmul(outputs3, weights2['out'],transpose_a=True) + biases2['out']#'output1'is a 1-D array [n_hidden] #return outputs4 # return tf.matmul(outputs4, weights['out']) + biases['out'] # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def basic_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size]. decoder_inputs: A list of 2D Tensors [batch_size x input_size]. cell: core_rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. state: The state of each decoder cell in the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_state, cell)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn(self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unstack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unstack(inputs) outputs, state = contrib_rnn.static_rnn( self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def benchmarkTfRNNLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = core_rnn_cell_impl.LSTMCell( num_units=num_units, initializer=initializer, state_is_tuple=True) multi_cell = core_rnn_cell_impl.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def benchmarkTfRNNLSTMBlockCellTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units) # pylint: disable=cell-var-from-loop multi_cell = core_rnn_cell_impl.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" % (config_name, self._GetConfigDesc(config)))
def __init__(self, embedding, size, num_layers, max_length, dtype, **kwargs): self.embedding = embedding self.size = size self.num_layers = num_layers self.cell = GRUCell(self.size) if self.num_layers > 1: self.cell = tf.contrib.rnn.MultiRNNCell([self.cell] * self.num_layers) max_length += 2 # account for _GO and _EOS self.lengths = kwargs.get('lengths', tf.placeholder(tf.int32, shape=[None], name="encoder_lengths")) self.inputs = kwargs.get('inputs', [tf.placeholder(tf.int32, shape=[None], name="encoder_input{0}".format(i)) for i in xrange(max_length)]) self.weights = kwargs.get('weights', [tf.placeholder(tf.float32, shape=[None], name="encoder_weight{0}".format(i)) for i in xrange(max_length)]) inputs = [embedding_ops.embedding_lookup(embedding, i) for i in self.inputs] self.outputs, self.state = static_rnn(self.cell, inputs, sequence_length=self.lengths, dtype=dtype) top_states = [array_ops.reshape(e, [-1, 1, self.cell.output_size]) for e in self.outputs] # BiRNN #self.outputs, self.state_fw, self.state_bw = static_bidirectional_rnn(self.cell, self.cell, inputs, sequence_length=self.lengths, dtype=dtype) #self.state = self.state_fw + self.state_bw # aggregate fw+bw state (use this) #top_states = [array_ops.reshape(e, [-1, 1, self.cell.output_size*2]) for e in self.outputs] #self.outputs = [tf.add(*tf.split(1, 2, o)) for o in self.outputs] # concat fw + bw states #self.state = tf.concat([self.state_fw, self.state_bw], 1) # concatenate fw+bw states self.attention_states = array_ops.concat(top_states, 1)
def basic_rnn_seq2seq_with_bottle_memory(encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size] decoder_inputs: A list of 2D Tensors [batch_size x input_size] cell: core_rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "rnn_seq2seq_BN" Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. enc_state: The state of each encoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] dec_state: The state of each decoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(cell, encoder_inputs, dtype=dtype) outputs, dec_state = seq2seq.rnn_decoder(decoder_inputs, enc_state, cell) return outputs, enc_state, dec_state
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unstack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unstack(inputs) outputs, state = contrib_rnn.static_rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell = core_rnn_cell_impl.GRUCell(2) enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] dec, mem = seq2seq_lib.embedding_attention_decoder( dec_inp, enc_state, attn_states, cell, num_symbols=4, embedding_size=2, output_size=3) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 3), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps self.num_layers = num_layers = config.num_layers vocab_size = config.vocab_size self.in_size = in_size = config.hidden_sizes[0] self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) self.is_training = tf.placeholder(dtype=tf.bool, shape=[]) keep_prob_x = 1 - (tf.to_float(self.is_training) * config.drop_x) keep_prob_o = 1 - (tf.to_float(self.is_training) * config.drop_o) embedding = tf.get_variable("embedding", [vocab_size, in_size]) embedding = tf.nn.dropout(embedding, keep_prob_x, noise_shape=[vocab_size, 1]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) def rancell(size): return tf.contrib.rnn.DropoutWrapper(RANCell(size), keep_prob_o) cell = tf.contrib.rnn.MultiRNNCell( [rancell(s) for s in config.hidden_sizes[1:]]) inputs = tf.unstack(inputs, num=num_steps, axis=1) self._initial_state = cell.zero_state(batch_size, tf.float32) outputs, self._final_state = static_rnn(cell, inputs, self._initial_state) output = tf.reshape(tf.stack(outputs, axis=1), [-1, config.hidden_sizes[-1]]) softmax_w = tf.transpose( embedding) if config.tied else tf.get_variable( "softmax_w", [config.hidden_sizes[-1], vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])]) pred_loss = tf.reduce_sum(loss) / batch_size self._cost = cost = pred_loss if not is_training: return tvars = tf.trainable_variables() l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars]) self._cost = cost = pred_loss + config.weight_decay * l2_loss self._lr = tf.Variable(0.0, trainable=False) self._nvars = np.prod(tvars[0].get_shape().as_list()) print(tvars[0].name, tvars[0].get_shape().as_list()) for var in tvars[1:]: sh = var.get_shape().as_list() print(var.name, sh) self._nvars += np.prod(sh) print(self._nvars, 'total variables') grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def testGrid3LSTMCellReLUWithRNN(self): batch_size = 3 input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid3LSTMCell( num_units=num_units, non_recurrent_fn=nn_ops.relu) inputs = max_length * [ array_ops.placeholder( dtypes.float32, shape=(batch_size, input_size)) ] outputs, state = core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(state.get_shape(), (batch_size, 8)) for out, inp in zip(outputs, inputs): self.assertEqual(out.get_shape()[0], inp.get_shape()[0]) self.assertEqual(out.get_shape()[1], num_units) self.assertEqual(out.dtype, inp.dtype) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((batch_size, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for v in values: self.assertTrue(np.all(np.isfinite(v)))
def testGrid1LSTMCellWithRNN(self): batch_size = 3 input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid1LSTMCell(num_units=num_units) # for 1-LSTM, we only feed the first step inputs = ([ array_ops.placeholder( dtypes.float32, shape=(batch_size, input_size)) ] + (max_length - 1) * [array_ops.zeros([batch_size, input_size])]) outputs, state = core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(state.get_shape(), (batch_size, 4)) for out, inp in zip(outputs, inputs): self.assertEqual(out.get_shape(), (3, num_units)) self.assertEqual(out.dtype, inp.dtype) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((batch_size, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for v in values: self.assertTrue(np.all(np.isfinite(v)))
def testEmbeddingRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2) cell = cell_fn() _, enc_state = core_rnn.static_rnn(cell, inp, dtype=dtypes.float32) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_rnn_decoder( dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) res = sess.run([mem]) self.assertEqual(1, len(res)) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape)
def testAttentionDecoder2(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: core_rnn_cell_impl.GRUCell(2) cell = cell_fn() inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4, num_heads=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def testDynamicAttentionDecoderStateIsTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): single_cell = lambda: core_rnn_cell_impl.BasicLSTMCell( # pylint: disable=g-long-lambda 2, state_is_tuple=True) cell = core_rnn_cell_impl.MultiRNNCell( cells=[single_cell() for _ in range(2)], state_is_tuple=True) inp = constant_op.constant(0.5, shape=[2, 2, 2]) enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell, output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual(2, len(res[0])) self.assertEqual((2, 2), res[0][0].c.shape) self.assertEqual((2, 2), res[0][0].h.shape) self.assertEqual((2, 2), res[0][1].c.shape) self.assertEqual((2, 2), res[0][1].h.shape)
def benchmarkTfRNNLSTMBlockCellTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units) # pylint: disable=cell-var-from-loop multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" % (config_name, self._GetConfigDesc(config)))
def benchmarkTfRNNLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = core_rnn_cell_impl.LSTMCell( num_units=num_units, initializer=initializer, state_is_tuple=True) multi_cell = core_rnn_cell_impl.MultiRNNCell([cell] * num_layers) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: core_rnn_cell_impl.GRUCell(2) cell = cell_fn() enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), num_symbols=4, embedding_size=2, output_size=3) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 3), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def testDynamicAttentionDecoderStateIsTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: core_rnn_cell_impl.MultiRNNCell( # pylint: disable=g-long-lambda cells=[core_rnn_cell_impl.BasicLSTMCell(2) for _ in range(2)]) cell = cell_fn() inp = constant_op.constant(0.5, shape=[2, 2, 2]) enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual(2, len(res[0])) self.assertEqual((2, 2), res[0][0].c.shape) self.assertEqual((2, 2), res[0][0].h.shape) self.assertEqual((2, 2), res[0][1].c.shape) self.assertEqual((2, 2), res[0][1].h.shape)
def testAttentionDecoder1(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: core_rnn_cell_impl.GRUCell(2) cell = cell_fn() inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 enc_outputs, enc_state = core_rnn.static_rnn( cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Create a new cell instance for the decoder, since it uses a # different variable scope dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, beam_search=True, beam_size=10): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
def get_RNN_from_words(model, word_idxs, reuse, scope=None): with variable_scope.variable_scope(scope or 'RNN_abstraction', reuse=reuse): # get mean word vectors word_vecs = tf.nn.embedding_lookup(model.word_emb, word_idxs) cell = tf.contrib.rnn.GRUCell(model.embed_size) encoder_outputs, encoder_state = core_rnn.static_rnn(cell, tf.unstack(word_vecs, axis=1), dtype=dtypes.float32) return encoder_state, [word_vecs]
def __init__(self, config): num_layers = config['num_layers'] hidden_size = config['hidden_size'] max_grad_norm = config['max_grad_norm'] self.batch_size = config['batch_size'] sl = config['sl'] learning_rate = config['learning_rate'] num_classes = config['num_classes'] """Place holders""" self.input = tf.placeholder(tf.float32, [None, sl], name='input') self.labels = tf.placeholder(tf.int64, [None], name='labels') self.keep_prob = tf.placeholder("float", name='Drop_out_keep_prob') with tf.name_scope("LSTM_setup") as scope: def single_cell(): return tf.contrib.rnn.DropoutWrapper( LSTMCell(hidden_size), output_keep_prob=self.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [single_cell() for _ in range(num_layers)]) initial_state = cell.zero_state(self.batch_size, tf.float32) input_list = tf.unstack(tf.expand_dims(self.input, axis=2), axis=1) outputs, _ = core_rnn.static_rnn(cell, input_list, dtype=tf.float32) output = outputs[-1] with tf.name_scope("Softmax") as scope: with tf.variable_scope("Softmax_params"): softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) #Use sparse Softmax because we have mutually exclusive classes loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.labels, name='softmax') self.cost = tf.reduce_sum(loss) / self.batch_size with tf.name_scope("Evaluating_accuracy") as scope: correct_prediction = tf.equal(tf.argmax(logits, 1), self.labels) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) h1 = tf.summary.scalar('accuracy', self.accuracy) h2 = tf.summary.scalar('cost', self.cost) """Optimizer""" with tf.name_scope("Optimizer") as scope: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.cost, tvars), max_grad_norm) #We clip the gradients to prevent explosion optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) self.train_op = optimizer.apply_gradients(gradients) self.merged = tf.summary.merge_all() self.init_op = tf.global_variables_initializer() print('Finished computation graph')
def __init__(self, num_layers, hidden_features, max_grad_norm, batch_size, sl, learning_rate, num_classes): self.data_input = tf.placeholder(tf.float32, [None, sl], name='input') self.data_labels = tf.placeholder(tf.float32, [None, num_classes], name='labels') self.dropout_probability = tf.placeholder( "float", name="Dropout_Keep_Probability") with tf.name_scope("LSTM_Setup") as scope: def single_cell(): return tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.LSTMCell(hidden_features), output_keep_prob=self.dropout_probability) cell = tf.contrib.rnn.MultiRNNCell( [single_cell() for x in range(num_layers)]) initial_state = cell.zero_state(batch_size, tf.float32) input_list = tf.unstack(tf.expand_dims(self.data_input, axis=2), axis=1) # print input_list.get_shape() outputs, _ = core_rnn.static_rnn(cell, input_list, dtype=tf.float32) self.output = outputs[-1] with tf.name_scope("Softmax") as scope: with tf.variable_scope("Softmax_params"): softmax_w = tf.get_variable("softmax_w", [hidden_features, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes]) self.logits = tf.nn.xw_plus_b(self.output, softmax_w, softmax_b) # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.data_labels, name="softmax") loss = tf.pow(self.logits - self.data_labels, 2) self.cost = tf.reduce_mean(loss) with tf.name_scope("Evaluating_self.accuracy") as scope: # self.correct_prediction = tf.equal(tf.argmax(self.logits, 1), self.data_labels) # self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, "float")) self.accuracy = tf.reduce_mean(loss) h1 = tf.summary.scalar('self.accuracy', self.accuracy) h2 = tf.summary.scalar('self.cost', self.cost) with tf.name_scope("Optimizer") as scope: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), max_grad_norm) optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) self.train_op = optimizer.apply_gradients(gradients) self.merged = tf.summary.merge_all() self.init_op = tf.global_variables_initializer() print('FINISHED GRAPH')
def seq2seq(encoder_inputs, decoder_inputs, scope=None): """Builds basic encoder-decoder model and returns list of (2D) output tensors.""" with tf.variable_scope(scope or "seq2seq"): encoder_cell = tf.contrib.rnn.GRUCell(self.state_size) encoder_cell = tf.contrib.rnn.EmbeddingWrapper( encoder_cell, self.vocab_size, self.state_size) # BasicEncoder(raw_inputs) -> Embed(raw_inputs) -> [be an RNN] -> encoder state. _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=tf.float32) with tf.variable_scope("decoder"): def loop_function(x): with tf.variable_scope("loop_function"): params = tf.get_variable( "embed_tensor", [self.vocab_size, self.state_size]) return embedding_ops.embedding_lookup( params, tf.argmax(x, 1)) _decoder_cell = tf.contrib.rnn.GRUCell(self.state_size) _decoder_cell = tf.contrib.rnn.EmbeddingWrapper( _decoder_cell, self.vocab_size, self.state_size) # Dear TensorFlow: you should replace the 'reuse' param in # OutputProjectionWrapper with 'scope' and just do scope.reuse in __init__. # sincerely, programming conventions. decoder_cell = tf.contrib.rnn.OutputProjectionWrapper( _decoder_cell, self.vocab_size, reuse=tf.get_variable_scope().reuse) decoder_outputs = [] prev = None decoder_state = None for i, dec_inp in enumerate(decoder_inputs): if self.is_chatting and prev is not None: dec_inp = loop_function(tf.reshape(prev, [1, 1])) if i == 0: output, decoder_state = decoder_cell( dec_inp, encoder_state, scope=tf.get_variable_scope()) else: tf.get_variable_scope().reuse_variables() output, decoder_state = decoder_cell( dec_inp, decoder_state, scope=tf.get_variable_scope()) decoder_outputs.append(output) return decoder_outputs
def testCompatibleNames(self): with self.test_session(use_gpu=self._use_gpu, graph=ops.Graph()): cell = core_rnn_cell_impl.LSTMCell(10) pcell = core_rnn_cell_impl.LSTMCell(10, use_peepholes=True) inputs = [array_ops.zeros([4, 5])] * 6 core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") core_rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") basic_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.test_session(use_gpu=self._use_gpu, graph=ops.Graph()): cell = lstm_ops.LSTMBlockCell(10) pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") core_rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") block_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.test_session(use_gpu=self._use_gpu, graph=ops.Graph()): cell = lstm_ops.LSTMBlockFusedCell(10) pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell") pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell") fused_names = { v.name: v.get_shape() for v in variables.trainable_variables() } self.assertEqual(basic_names, block_names) self.assertEqual(basic_names, fused_names)
def seq_predict_model(X, w, b, time_step_size, vector_size): # 数组转置函数 # X转为:[time_step_size,batch_size,vector_size] X = tf.transpose(X, [1,0,2]) # 调整tensor X的维度 -1表示不指定维度 # X最终的shape为:[time_step_size*batch_size, vector_size] X = tf.reshape(X, [-1, vector_size]) # 以第0维度,把X分为time_step_size份,切分后的shape为[batch_size, vector_size] X=tf.split(X,time_step_size,0) cell = core_rnn_cell.BasicRNNCell(num_units = 10) # state_size为隐层的大小,即为10 initial_state=tf.zeros([batch_size,cell.state_size]) outputs,_states=core_rnn.static_rnn(cell,X,initial_state=initial_state) return tf.matmul(outputs[-1],w)+b,cell.state_size
def seq_predict_model(X, w, b, time_step_size, vector_size): # 数组转置函数 # X转为:[time_step_size,batch_size,vector_size] X = tf.transpose(X, [1, 0, 2]) # 调整tensor X的维度 -1表示不指定维度 # X最终的shape为:[time_step_size*batch_size, vector_size] X = tf.reshape(X, [-1, vector_size]) # 以第0维度,把X分为time_step_size份,切分后的shape为[batch_size, vector_size] X = tf.split(X, time_step_size, 0) cell = core_rnn_cell.BasicLSTMCell(num_units=10, forget_bias=1.0, state_is_tuple=True) outputs, _states = core_rnn.static_rnn(cell, X, dtype=tf.float32) return tf.matmul(outputs[-1], w) + b, cell.state_size
def seq_predict_model(X, w, b, time_step_size, vector_size): # input X shape: [batch_size, time_step_size, vector_size] # transpose X to [time_step_size, batch_size, vector_size] X = tf.transpose(X, [1, 0, 2]) # reshape X to [time_step_size * batch_size, vector_size] X = tf.reshape(X, [-1, vector_size]) # split X, array[time_step_size], shape: [batch_size, vector_size] X = tf.split(X, time_step_size, 0) # LSTM model with state_size = 10 cell = core_rnn_cell.BasicLSTMCell(num_units=10, forget_bias=1.0, state_is_tuple=True) outputs, _states = core_rnn.static_rnn(cell, X, dtype=tf.float32) # Linear activation return tf.matmul(outputs[-1], w) + b, cell.state_size
def seq_predict_model(X, w, b, time_step_size, vector_size): # input X shape: [batch_size, time_step_size, vector_size] # transpose X to [time_step_size, batch_size, vector_size] X = tf.transpose(X, [1, 0, 2]) # reshape X to [time_step_size * batch_size, vector_size] X = tf.reshape(X, [-1, vector_size]) # split X, array[time_step_size], shape: [batch_size, vector_size] X = tf.split(X, time_step_size, 0) cell = core_rnn_cell.BasicRNNCell(num_units=10) initial_state = tf.zeros([batch_size, cell.state_size]) outputs, _states = core_rnn.static_rnn(cell, X, initial_state=initial_state) # Linear activation return tf.matmul(outputs[-1], w) + b, cell.state_size
def testRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 _, enc_state = core_rnn.static_rnn( core_rnn_cell_impl.GRUCell(2), inp, dtype=dtypes.float32) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 cell = core_rnn_cell_impl.OutputProjectionWrapper( core_rnn_cell_impl.GRUCell(2), 4) dec, mem = seq2seq_lib.rnn_decoder(dec_inp, enc_state, cell) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def testGrid2LSTMCellWithRNNAndDynamicBatchSize(self): """Test for #4296 """ input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope('root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid2LSTMCell(num_units=num_units) inputs = max_length * [ array_ops.placeholder( dtypes.float32, shape=(None, input_size)) ] outputs, state = core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) for out, inp in zip(outputs, inputs): self.assertEqual(len(out), 1) self.assertTrue(out[0].get_shape()[0].value is None) self.assertEqual(out[0].get_shape()[1], num_units) self.assertEqual(out[0].dtype, inp.dtype) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((3, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for tp in values[:-1]: for v in tp: self.assertTrue(np.all(np.isfinite(v))) for tp in values[-1]: for st in tp: for v in st: self.assertTrue(np.all(np.isfinite(v)))
def __init__(self,config): num_layers = config['num_layers'] hidden_size = config['hidden_size'] max_grad_norm = config['max_grad_norm'] self.batch_size = config['batch_size'] sl = config['sl'] learning_rate = config['learning_rate'] num_classes = config['num_classes'] """Place holders""" self.input = tf.placeholder(tf.float32, [None, sl], name = 'input') self.labels = tf.placeholder(tf.int64, [None], name='labels') self.keep_prob = tf.placeholder("float", name = 'Drop_out_keep_prob') with tf.name_scope("LSTM_setup") as scope: def single_cell(): return tf.contrib.rnn.DropoutWrapper(LSTMCell(hidden_size),output_keep_prob=self.keep_prob) cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)]) initial_state = cell.zero_state(self.batch_size, tf.float32) input_list = tf.unstack(tf.expand_dims(self.input,axis=2),axis=1) outputs,_ = core_rnn.static_rnn(cell, input_list, dtype=tf.float32) output = outputs[-1] #Generate a classification from the last cell_output #Note, this is where timeseries classification differs from sequence to sequence #modelling. We only output to Softmax at last time step with tf.name_scope("Softmax") as scope: with tf.variable_scope("Softmax_params"): softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) #Use sparse Softmax because we have mutually exclusive classes loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.labels,name = 'softmax') self.cost = tf.reduce_sum(loss) / self.batch_size with tf.name_scope("Evaluating_accuracy") as scope: correct_prediction = tf.equal(tf.argmax(logits,1),self.labels) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) h1 = tf.summary.scalar('accuracy',self.accuracy) h2 = tf.summary.scalar('cost', self.cost) """Optimizer""" with tf.name_scope("Optimizer") as scope: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),max_grad_norm) #We clip the gradients to prevent explosion optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) self.train_op = optimizer.apply_gradients(gradients) # Add histograms for variables, gradients and gradient norms. # The for-loop loops over all entries of the gradient and plots # a histogram. We cut of # for gradient, variable in gradients: #plot the gradient of each trainable variable # if isinstance(gradient, ops.IndexedSlices): # grad_values = gradient.values # else: # grad_values = gradient # # tf.summary.histogram(variable.name, variable) # tf.summary.histogram(variable.name + "/gradients", grad_values) # tf.summary.histogram(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) #Final code for the TensorBoard self.merged = tf.summary.merge_all() self.init_op = tf.global_variables_initializer() print('Finished computation graph')
def testBasicRNNFusedWrapper(self): """This test checks that using a wrapper for BasicRNN works as expected.""" with self.test_session() as sess: initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, state = core_rnn.static_rnn(cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([packed_outputs, state]) basic_grads = sess.run( gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused_static", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_static_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_static/") ] sess.run([variables.global_variables_initializer()]) fused_static_outputs, fused_static_state = sess.run( [outputs, state]) fused_static_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_static_wgrads = sess.run( gradients_impl.gradients(outputs, fused_static_vars)) self.assertAllClose(basic_outputs, fused_static_outputs) self.assertAllClose(basic_state, fused_static_state) self.assertAllClose(basic_grads, fused_static_grads) for basic, fused in zip(basic_wgrads, fused_static_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope("fused_dynamic", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( cell, use_dynamic_rnn=True) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_dynamic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_dynamic/") ] sess.run([variables.global_variables_initializer()]) fused_dynamic_outputs, fused_dynamic_state = sess.run( [outputs, state]) fused_dynamic_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_dynamic_wgrads = sess.run( gradients_impl.gradients(outputs, fused_dynamic_vars)) self.assertAllClose(basic_outputs, fused_dynamic_outputs) self.assertAllClose(basic_state, fused_dynamic_state) self.assertAllClose(basic_grads, fused_dynamic_grads) for basic, fused in zip(basic_wgrads, fused_dynamic_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def __init__(self, config): num_layers = config['num_layers'] hidden_size = config['hidden_size'] max_grad_norm = config['max_grad_norm'] self.batch_size = config['batch_size'] sl = config['sl'] learning_rate = config['learning_rate'] num_classes = config['num_classes'] """Place holders""" self.input = tf.placeholder(tf.float32, [None, sl], name='input') self.labels = tf.placeholder(tf.int64, [None], name='labels') self.keep_prob = tf.placeholder("float", name='Drop_out_keep_prob') with tf.name_scope("LSTM_setup") as scope: def single_cell(): return tf.contrib.rnn.DropoutWrapper( LSTMCell(hidden_size), output_keep_prob=self.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [single_cell() for _ in range(num_layers)]) initial_state = cell.zero_state(self.batch_size, tf.float32) input_list = tf.unstack(tf.expand_dims(self.input, axis=2), axis=1) outputs, _ = core_rnn.static_rnn(cell, input_list, dtype=tf.float32) output = outputs[-1] #Generate a classification from the last cell_output #Note, this is where timeseries classification differs from sequence to sequence #modelling. We only output to Softmax at last time step with tf.name_scope("Softmax") as scope: with tf.variable_scope("Softmax_params"): softmax_w = tf.get_variable("softmax_w", [hidden_size, num_classes]) softmax_b = tf.get_variable("softmax_b", [num_classes]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) #Use sparse Softmax because we have mutually exclusive classes loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.labels, name='softmax') self.cost = tf.reduce_sum(loss) / self.batch_size with tf.name_scope("Evaluating_accuracy") as scope: correct_prediction = tf.equal(tf.argmax(logits, 1), self.labels) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) h1 = tf.summary.scalar('accuracy', self.accuracy) h2 = tf.summary.scalar('cost', self.cost) """Optimizer""" with tf.name_scope("Optimizer") as scope: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.cost, tvars), max_grad_norm) #We clip the gradients to prevent explosion optimizer = tf.train.AdamOptimizer(learning_rate) gradients = zip(grads, tvars) self.train_op = optimizer.apply_gradients(gradients) # Add histograms for variables, gradients and gradient norms. # The for-loop loops over all entries of the gradient and plots # a histogram. We cut of # for gradient, variable in gradients: #plot the gradient of each trainable variable # if isinstance(gradient, ops.IndexedSlices): # grad_values = gradient.values # else: # grad_values = gradient # # tf.summary.histogram(variable.name, variable) # tf.summary.histogram(variable.name + "/gradients", grad_values) # tf.summary.histogram(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) #Final code for the TensorBoard self.merged = tf.summary.merge_all() self.init_op = tf.global_variables_initializer() print('Finished computation graph')
def testBasicRNNFusedWrapper(self): """This test checks that using a wrapper for BasicRNN works as expected.""" with self.test_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) cell = core_rnn_cell_impl.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, state = core_rnn.static_rnn( cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([packed_outputs, state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope( "fused_static", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_static_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_static/") ] sess.run([variables.global_variables_initializer()]) fused_static_outputs, fused_static_state = sess.run([outputs, state]) fused_static_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_static_wgrads = sess.run( gradients_impl.gradients(outputs, fused_static_vars)) self.assertAllClose(basic_outputs, fused_static_outputs) self.assertAllClose(basic_state, fused_static_state) self.assertAllClose(basic_grads, fused_static_grads) for basic, fused in zip(basic_wgrads, fused_static_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope( "fused_dynamic", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( cell, use_dynamic_rnn=True) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_dynamic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_dynamic/") ] sess.run([variables.global_variables_initializer()]) fused_dynamic_outputs, fused_dynamic_state = sess.run([outputs, state]) fused_dynamic_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_dynamic_wgrads = sess.run( gradients_impl.gradients(outputs, fused_dynamic_vars)) self.assertAllClose(basic_outputs, fused_dynamic_outputs) self.assertAllClose(basic_state, fused_dynamic_state) self.assertAllClose(basic_grads, fused_dynamic_grads) for basic, fused in zip(basic_wgrads, fused_dynamic_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" with self.test_session(use_gpu=self._use_gpu) as sess: batch_size = 3 input_size = 4 cell_size = 5 max_sequence_length = 6 inputs = [] for _ in range(max_sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) seq_lengths = constant_op.constant([3, 4, 5]) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) with variable_scope.variable_scope("basic", initializer=initializer): cell = core_rnn_cell_impl.BasicLSTMCell(cell_size, state_is_tuple=True) outputs, state = core_rnn.static_rnn( cell, inputs, dtype=dtypes.float32, sequence_length=seq_lengths) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([outputs, state[0]]) basic_grads = sess.run(gradients_impl.gradients(outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(outputs, variables.trainable_variables())) with variable_scope.variable_scope("fused", initializer=initializer): cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=False) outputs, state = cell( inputs, dtype=dtypes.float32, sequence_length=seq_lengths) sess.run([variables.global_variables_initializer()]) fused_outputs, fused_state = sess.run([outputs, state[0]]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_state, fused_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) # Verify that state propagation works if we turn our sequence into # tiny (single-time) subsequences, i.e. unfuse the cell with variable_scope.variable_scope( "unfused", initializer=initializer) as vs: cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=False) outputs = [] state = None for i, inp in enumerate(inputs): lengths = [int(i < l) for l in seq_lengths.eval()] output, state = cell( [inp], initial_state=state, dtype=dtypes.float32, sequence_length=lengths) vs.reuse_variables() outputs.append(output[0]) outputs = array_ops.stack(outputs) sess.run([variables.global_variables_initializer()]) unfused_outputs, unfused_state = sess.run([outputs, state[0]]) unfused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) unfused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("unfused/") ] unfused_wgrads = sess.run( gradients_impl.gradients(outputs, unfused_vars)) self.assertAllClose(basic_outputs, unfused_outputs) self.assertAllClose(basic_state, unfused_state) self.assertAllClose(basic_grads, unfused_grads) for basic, unfused in zip(basic_wgrads, unfused_wgrads): self.assertAllClose(basic, unfused, rtol=1e-2, atol=1e-2)
def testLSTMBasicToBlockPeeping(self): with self.test_session(use_gpu=self._use_gpu) as sess: batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 5 inputs = [] for _ in range(sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) with variable_scope.variable_scope("basic", initializer=initializer): cell = core_rnn_cell_impl.LSTMCell( cell_size, use_peepholes=True, state_is_tuple=True) outputs, state = core_rnn.static_rnn(cell, inputs, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([outputs, state[0]]) basic_grads = sess.run(gradients_impl.gradients(outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(outputs, variables.trainable_variables())) with variable_scope.variable_scope("block", initializer=initializer): w = variable_scope.get_variable( "w", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) b = variable_scope.get_variable( "b", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) wci = variable_scope.get_variable( "wci", shape=[cell_size], dtype=dtypes.float32) wcf = variable_scope.get_variable( "wcf", shape=[cell_size], dtype=dtypes.float32) wco = variable_scope.get_variable( "wco", shape=[cell_size], dtype=dtypes.float32) _, _, _, _, _, _, outputs = block_lstm( ops.convert_to_tensor( sequence_length, dtype=dtypes.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=0, use_peephole=True) sess.run([variables.global_variables_initializer()]) block_outputs = sess.run(outputs) block_grads = sess.run(gradients_impl.gradients(outputs, inputs)) block_wgrads = sess.run( gradients_impl.gradients(outputs, [w, b, wci, wcf, wco])) self.assertAllClose(basic_outputs, block_outputs) self.assertAllClose(basic_grads, block_grads) for basic, block in zip(basic_wgrads, block_wgrads): self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope("fused", initializer=initializer): cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=True) outputs, state = cell(inputs, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) fused_outputs, fused_state = sess.run([outputs, state[0]]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_state, fused_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)