def testGRUCell(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m = array_ops.zeros([1, 2]) g, _ = rnn_cell_impl.GRUCell(2)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([g], { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1]]) }) # Smoke test self.assertAllClose(res[0], [[0.175991, 0.175991]]) with variable_scope.variable_scope( "other", initializer=init_ops.constant_initializer(0.5)): # Test GRUCell with input_size != num_units. x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 2]) g, _ = rnn_cell_impl.GRUCell(2)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g], { x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1]]) }) # Smoke test self.assertAllClose(res[0], [[0.156736, 0.156736]])
def testMultiRNNCellWithStateTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m_bad = array_ops.zeros([1, 4]) m_good = (array_ops.zeros([1, 2]), array_ops.zeros([1, 2])) # Test incorrectness of state with self.assertRaisesRegexp(ValueError, "Expected state .* a tuple"): rnn_cell_impl.MultiRNNCell( [rnn_cell_impl.GRUCell(2) for _ in range(2)], state_is_tuple=True)(x, m_bad) _, ml = rnn_cell_impl.MultiRNNCell( [rnn_cell_impl.GRUCell(2) for _ in range(2)], state_is_tuple=True)(x, m_good) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( ml, { x.name: np.array([[1., 1.]]), m_good[0].name: np.array([[0.1, 0.1]]), m_good[1].name: np.array([[0.1, 0.1]]) }) # The numbers in results were not calculated, this is just a # smoke test. However, these numbers should match those of # the test testMultiRNNCell. self.assertAllClose(res[0], [[0.175991, 0.175991]]) self.assertAllClose(res[1], [[0.13248, 0.13248]])
def testResidualWrapperWithSlice(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 5]) m = array_ops.zeros([1, 3]) base_cell = rnn_cell_impl.GRUCell(3) g, m_new = base_cell(x, m) variable_scope.get_variable_scope().reuse_variables() def residual_with_slice_fn(inp, out): inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3]) return inp_sliced + out g_res, m_new_res = rnn_cell_impl.ResidualWrapper( base_cell, residual_with_slice_fn)(x, m) sess.run([variables_lib.global_variables_initializer()]) res_g, res_g_res, res_m_new, res_m_new_res = sess.run( [g, g_res, m_new, m_new_res], { x: np.array([[1., 1., 1., 1., 1.]]), m: np.array([[0.1, 0.1, 0.1]]) }) # Residual connections self.assertAllClose(res_g_res, res_g + [1., 1., 1.]) # States are left untouched self.assertAllClose(res_m_new, res_m_new_res)
def testResidualWrapperWithSlice(self): wrapper_type = rnn_cell_wrapper_v2.ResidualWrapper x = ops.convert_to_tensor_v2_with_dispatch(np.array( [[1., 1., 1., 1., 1.]]), dtype="float32") m = ops.convert_to_tensor_v2_with_dispatch(np.array([[0.1, 0.1, 0.1]]), dtype="float32") base_cell = rnn_cell_impl.GRUCell( 3, kernel_initializer=init_ops.constant_initializer(0.5), bias_initializer=init_ops.constant_initializer(0.5)) g, m_new = base_cell(x, m) def residual_with_slice_fn(inp, out): inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3]) return inp_sliced + out g_res, m_new_res = wrapper_type(base_cell, residual_with_slice_fn)(x, m) self.evaluate([variables_lib.global_variables_initializer()]) res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate( [g, g_res, m_new, m_new_res]) # Residual connections self.assertAllClose(res_g_res, res_g + [1., 1., 1.]) # States are left untouched self.assertAllClose(res_m_new, res_m_new_res)
def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self): if not test.is_gpu_available(): # Can't perform this test w/o a GPU return gpu_dev = test.gpu_device_name() with self.test_session(use_gpu=True) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1, 3]) cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev) with ops.device("/cpu:0"): outputs, _ = rnn.dynamic_rnn(cell=cell, inputs=x, dtype=dtypes.float32) run_metadata = config_pb2.RunMetadata() opts = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) sess.run([variables_lib.global_variables_initializer()]) _ = sess.run(outputs, options=opts, run_metadata=run_metadata) cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata) self.assertFalse( [s for s in cpu_stats if "gru_cell" in s.node_name]) self.assertTrue( [s for s in gpu_stats if "gru_cell" in s.node_name])
def testResidualWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) base_cell = rnn_cell_impl.GRUCell(3) g, m_new = base_cell(x, m) variable_scope.get_variable_scope().reuse_variables() wrapper_object = rnn_cell_impl.ResidualWrapper(base_cell) (name, dep), = wrapper_object._checkpoint_dependencies wrapper_object.get_config() # Should not throw an error self.assertIs(dep, base_cell) self.assertEqual("cell", name) g_res, m_new_res = wrapper_object(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([g, g_res, m_new, m_new_res], { x: np.array([[1., 1., 1.]]), m: np.array([[0.1, 0.1, 0.1]]) }) # Residual connections self.assertAllClose(res[1], res[0] + [1., 1., 1.]) # States are left untouched self.assertAllClose(res[2], res[3])
def testRNNCellSerialization(self): for cell in [ rnn_cell_impl.LSTMCell(32, use_peepholes=True, cell_clip=True), rnn_cell_impl.BasicLSTMCell(32, dtype=dtypes.float32), rnn_cell_impl.BasicRNNCell(32, activation="relu", dtype=dtypes.float32), rnn_cell_impl.GRUCell(32, dtype=dtypes.float32) ]: with self.cached_session(): x = keras.Input((None, 5)) layer = keras.layers.RNN(cell) y = layer(x) model = keras.models.Model(x, y) model.compile(optimizer="rmsprop", loss="mse") # Test basic case serialization. x_np = np.random.random((6, 5, 5)) y_np = model.predict(x_np) weights = model.get_weights() config = layer.get_config() # The custom_objects is important here since rnn_cell_impl is # not visible as a Keras layer, and also has a name conflict with # keras.LSTMCell and GRUCell. layer = keras.layers.RNN.from_config( config, custom_objects={ "BasicRNNCell": rnn_cell_impl.BasicRNNCell, "GRUCell": rnn_cell_impl.GRUCell, "LSTMCell": rnn_cell_impl.LSTMCell, "BasicLSTMCell": rnn_cell_impl.BasicLSTMCell }) y = layer(x) model = keras.models.Model(x, y) model.set_weights(weights) y_np_2 = model.predict(x_np) self.assertAllClose(y_np, y_np_2, atol=1e-4)
def testDeviceWrapper(self): with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), "/cpu:14159") outputs, _ = cell(x, m) self.assertTrue("cpu:14159" in outputs.device.lower())
def testDeviceWrapper(self): wrapper_type = rnn_cell_wrapper_v2.DeviceWrapper x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) cell = rnn_cell_impl.GRUCell(3) wrapped_cell = wrapper_type(cell, "/cpu:0") (name, dep), = wrapped_cell._checkpoint_dependencies wrapped_cell.get_config() # Should not throw an error self.assertIs(dep, cell) self.assertEqual("cell", name) outputs, _ = wrapped_cell(x, m) self.assertIn("cpu:0", outputs.device.lower())
def testDeviceWrapper(self): with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) wrapped = rnn_cell_impl.GRUCell(3) cell = rnn_cell_impl.DeviceWrapper(wrapped, "/cpu:14159") (name, dep), = cell._checkpoint_dependencies self.assertIs(dep, wrapped) self.assertEqual("cell", name) outputs, _ = cell(x, m) self.assertTrue("cpu:14159" in outputs.device.lower())
def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) cell = contrib_rnn.OutputProjectionWrapper(rnn_cell_impl.GRUCell(3), 2) g, new_m = cell(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([g, new_m], { x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 3)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.231907, 0.231907]])
def get_rnncell(cell_type, cell_size, keep_prob, num_layer): if cell_type == "gru": cell = rnn_cell.GRUCell(cell_size) else: cell = rnn_cell.LSTMCell(cell_size, use_peepholes=False, forget_bias=1.0) if keep_prob < 1.0: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob) if num_layer > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layer, state_is_tuple=True) return cell
def testMultiRNNCell(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m = array_ops.zeros([1, 4]) _, ml = rnn_cell_impl.MultiRNNCell( [rnn_cell_impl.GRUCell(2) for _ in range(2)], state_is_tuple=False)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run(ml, { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1, 0.1]]) }) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res, [[0.175991, 0.175991, 0.13248, 0.13248]])
def testEmbeddingWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1], dtype=dtypes.int32) m = array_ops.zeros([1, 2]) embedding_cell = contrib_rnn.EmbeddingWrapper( rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2) self.assertEqual(embedding_cell.output_size, 2) g, new_m = embedding_cell(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, new_m], {x.name: np.array([[1]]), m.name: np.array([[0.1, 0.1]])}) self.assertEqual(res[1].shape, (1, 2)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.17139, 0.17139]])
def testResidualWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) base_cell = rnn_cell_impl.GRUCell(3) g, m_new = base_cell(x, m) variable_scope.get_variable_scope().reuse_variables() g_res, m_new_res = rnn_cell_impl.ResidualWrapper(base_cell)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([g, g_res, m_new, m_new_res], { x: np.array([[1., 1., 1.]]), m: np.array([[0.1, 0.1, 0.1]]) }) # Residual connections self.assertAllClose(res[1], res[0] + [1., 1., 1.]) # States are left untouched self.assertAllClose(res[2], res[3])
def get_rnncell(cell_type, cell_size, keep_prob, num_layer): # thanks for this solution from @dimeldo cells = [] for _ in range(num_layer): if cell_type == "gru": cell = rnn_cell.GRUCell(cell_size) else: cell = rnn_cell.LSTMCell(cell_size, use_peepholes=False, forget_bias=1.0) if keep_prob < 1.0: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob) cells.append(cell) if num_layer > 1: cell = rnn_cell.MultiRNNCell(cells, state_is_tuple=True) else: cell = cells[0] return cell
def testResidualWrapper(self): wrapper_type = rnn_cell_wrapper_v2.ResidualWrapper x = ops.convert_to_tensor(np.array([[1., 1., 1.]])) m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]])) base_cell = rnn_cell_impl.GRUCell( 3, kernel_initializer=init_ops.constant_initializer(0.5), bias_initializer=init_ops.constant_initializer(0.5)) g, m_new = base_cell(x, m) wrapper_object = wrapper_type(base_cell) (name, dep), = wrapper_object._checkpoint_dependencies wrapper_object.get_config() # Should not throw an error self.assertIs(dep, base_cell) self.assertEqual("cell", name) g_res, m_new_res = wrapper_object(x, m) self.evaluate([variables_lib.global_variables_initializer()]) res = self.evaluate([g, g_res, m_new, m_new_res]) # Residual connections self.assertAllClose(res[1], res[0] + [1., 1., 1.]) # States are left untouched self.assertAllClose(res[2], res[3])
def __init__(self, encoder_masks, encoder_inputs_tensor, decoder_inputs, target_weights, target_vocab_size, buckets, target_embedding_size, attn_num_layers, attn_num_hidden, forward_only, use_gru): """Create the model. Args: source_vocab_size: size of the source vocabulary. target_vocab_size: size of the target vocabulary. buckets: a list of pairs (I, O), where I specifies maximum input length that will be processed in that bucket, and O specifies maximum output length. Training instances that have inputs longer than I or outputs longer than O will be pushed to the next bucket and padded accordingly. We assume that the list is sorted, e.g., [(2, 4), (8, 16)]. size: number of units in each layer of the model. num_layers: number of layers in the model. max_gradient_norm: gradients will be clipped to maximally this norm. learning_rate: learning rate to start with. learning_rate_decay_factor: decay learning rate by this much when needed. use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. """ self.encoder_inputs_tensor = encoder_inputs_tensor self.decoder_inputs = decoder_inputs self.target_weights = target_weights self.target_vocab_size = target_vocab_size self.buckets = buckets self.encoder_masks = encoder_masks # Create the internal multi-layer cell for our RNN single_cell = rnn_cell_impl.BasicLSTMCell(attn_num_hidden, forget_bias=0.0, state_is_tuple=False) if use_gru: print("using GRU CELL in decoder") single_cell = rnn_cell_impl.GRUCell(attn_num_hidden) cell = single_cell if attn_num_layers > 1: cell = rnn_cell_impl.MultiRNNCell([single_cell] * attn_num_layers, state_is_tuple=False) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode): num_hidden = attn_num_layers * attn_num_hidden lstm_fw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False) # Backward direction cell lstm_bw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False) pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs, initial_state_fw=None, initial_state_bw=None, dtype=tf.float32, sequence_length=None, scope=None) encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])] top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2]) for e in encoder_inputs] attention_states = array_ops.concat(top_states, 1) initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw]) outputs, _, attention_weights_history = embedding_attention_decoder( decoder_inputs, initial_state, attention_states, cell, num_symbols=target_vocab_size, embedding_size=target_embedding_size, num_heads=1, output_size=target_vocab_size, output_projection=None, feed_previous=do_decode, initial_state_attention=False, attn_num_hidden = attn_num_hidden) return outputs, attention_weights_history # Our targets are decoder inputs shifted by one. targets = [decoder_inputs[i + 1] for i in xrange(len(decoder_inputs) - 1)] softmax_loss_function = None # default to tf.nn.sparse_softmax_cross_entropy_with_logits # Training outputs and losses. if forward_only: self.outputs, self.losses, self.attention_weights_histories = model_with_buckets( encoder_inputs_tensor, decoder_inputs, targets, self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, True), softmax_loss_function=softmax_loss_function) else: self.outputs, self.losses, self.attention_weights_histories = model_with_buckets( encoder_inputs_tensor, decoder_inputs, targets, self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, False), softmax_loss_function=softmax_loss_function)