def get_rnncell(cell_type, cell_size, keep_prob, num_layer): # thanks for this solution from @dimeldo cells = [] for _ in range(num_layer): if cell_type == "gru": cell = rnn_cell.GRUCell(cell_size) else: cell = rnn_cell.LSTMCell(cell_size, use_peepholes=False, forget_bias=1.0) if keep_prob < 1.0: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob) cells.append(cell) if num_layer > 1: cell = rnn_cell.MultiRNNCell(cells, state_is_tuple=True) else: cell = cells[0] return cell
def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None): mode = rnn.rnn_mode num_units = rnn.num_units num_layers = rnn.num_layers # To reuse cuDNN-trained models, must use cudnn compatible rnn cells. if mode == CUDNN_LSTM: single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units) elif mode == CUDNN_GRU: single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units) elif mode == CUDNN_RNN_TANH: single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh)) elif mode == CUDNN_RNN_RELU: single_cell = ( lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu)) else: raise ValueError("%s is not supported!" % mode) if not is_bidi: cell = rnn_cell_impl.MultiRNNCell( [single_cell() for _ in range(num_layers)]) return rnn_lib.dynamic_rnn( cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope) else: cells_fw = [single_cell() for _ in range(num_layers)] cells_bw = [single_cell() for _ in range(num_layers)] (outputs, output_state_fw, output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn( cells_fw, cells_bw, inputs, dtype=dtypes.float32, time_major=True, scope=scope) return outputs, (output_state_fw, output_state_bw)
def __init__(self, name=None): super(KerasNetworkTFRNNs, self).__init__(name=name) self._cell = rnn_cell_impl.MultiRNNCell( [rnn_cell_impl.LSTMCell(1) for _ in range(2)])
def testBasicLSTMCell(self): for dtype in [dtypes.float16, dtypes.float32]: np_dtype = dtype.as_numpy_dtype with self.test_session(graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2], dtype=dtype) m = array_ops.zeros([1, 8], dtype=dtype) cell = rnn_cell_impl.MultiRNNCell([ rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) for _ in range(2) ], state_is_tuple=False) self.assertEqual(cell.dtype, None) g, out_m = cell(x, m) # Layer infers the input type. self.assertEqual(cell.dtype, dtype.name) expected_variable_names = [ "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME, "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME ] self.assertEqual( expected_variable_names, [v.name for v in cell.trainable_variables]) self.assertFalse(cell.non_trainable_variables) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8]) }) self.assertEqual(len(res), 2) variables = variables_lib.global_variables() self.assertEqual(expected_variable_names, [v.name for v in variables]) # The numbers in results were not calculated, this is just a # smoke test. self.assertAllClose( res[0], np.array([[0.240, 0.240]], dtype=np_dtype), 1e-2) expected_mem = np.array([[ 0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240 ]], dtype=np_dtype) self.assertAllClose(res[1], expected_mem, 1e-2) with variable_scope.variable_scope( "other", initializer=init_ops.constant_initializer(0.5)): # Test BasicLSTMCell with input_size != num_units. x = array_ops.zeros([1, 3], dtype=dtype) m = array_ops.zeros([1, 4], dtype=dtype) g, out_m = rnn_cell_impl.BasicLSTMCell( 2, state_is_tuple=False)(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, out_m], { x.name: np.array([[1., 1., 1.]], dtype=np_dtype), m.name: 0.1 * np.ones([1, 4], dtype=np_dtype) }) self.assertEqual(len(res), 2)
def _CreateStackedLstmCell(*cell_sizes): subcells = [rnn_cell_impl.LSTMCell(cell_size) for cell_size in cell_sizes] return rnn_cell_impl.MultiRNNCell(subcells)
def __init__(self, encoder_masks, encoder_inputs_tensor, decoder_inputs, target_weights, target_vocab_size, buckets, target_embedding_size, attn_num_layers, attn_num_hidden, forward_only, use_gru): """Create the model. Args: source_vocab_size: size of the source vocabulary. target_vocab_size: size of the target vocabulary. buckets: a list of pairs (I, O), where I specifies maximum input length that will be processed in that bucket, and O specifies maximum output length. Training instances that have inputs longer than I or outputs longer than O will be pushed to the next bucket and padded accordingly. We assume that the list is sorted, e.g., [(2, 4), (8, 16)]. size: number of units in each layer of the model. num_layers: number of layers in the model. max_gradient_norm: gradients will be clipped to maximally this norm. learning_rate: learning rate to start with. learning_rate_decay_factor: decay learning rate by this much when needed. use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. """ self.encoder_inputs_tensor = encoder_inputs_tensor self.decoder_inputs = decoder_inputs self.target_weights = target_weights self.target_vocab_size = target_vocab_size self.buckets = buckets self.encoder_masks = encoder_masks # Create the internal multi-layer cell for our RNN single_cell = rnn_cell_impl.BasicLSTMCell(attn_num_hidden, forget_bias=0.0, state_is_tuple=False) if use_gru: print("using GRU CELL in decoder") single_cell = rnn_cell_impl.GRUCell(attn_num_hidden) cell = single_cell if attn_num_layers > 1: cell = rnn_cell_impl.MultiRNNCell([single_cell] * attn_num_layers, state_is_tuple=False) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode): num_hidden = attn_num_layers * attn_num_hidden lstm_fw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False) # Backward direction cell lstm_bw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False) pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs, initial_state_fw=None, initial_state_bw=None, dtype=tf.float32, sequence_length=None, scope=None) encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])] top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2]) for e in encoder_inputs] attention_states = array_ops.concat(top_states, 1) initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw]) outputs, _, attention_weights_history = embedding_attention_decoder( decoder_inputs, initial_state, attention_states, cell, num_symbols=target_vocab_size, embedding_size=target_embedding_size, num_heads=1, output_size=target_vocab_size, output_projection=None, feed_previous=do_decode, initial_state_attention=False, attn_num_hidden = attn_num_hidden) return outputs, attention_weights_history # Our targets are decoder inputs shifted by one. targets = [decoder_inputs[i + 1] for i in xrange(len(decoder_inputs) - 1)] softmax_loss_function = None # default to tf.nn.sparse_softmax_cross_entropy_with_logits # Training outputs and losses. if forward_only: self.outputs, self.losses, self.attention_weights_histories = model_with_buckets( encoder_inputs_tensor, decoder_inputs, targets, self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, True), softmax_loss_function=softmax_loss_function) else: self.outputs, self.losses, self.attention_weights_histories = model_with_buckets( encoder_inputs_tensor, decoder_inputs, targets, self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, False), softmax_loss_function=softmax_loss_function)
def testIndyLSTMCell(self): for dtype in [dtypes.float16, dtypes.float32]: np_dtype = dtype.as_numpy_dtype with self.session(graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2], dtype=dtype) state_0 = (array_ops.zeros([1, 2], dtype=dtype),) * 2 state_1 = (array_ops.zeros([1, 2], dtype=dtype),) * 2 cell = rnn_cell_impl.MultiRNNCell( [contrib_rnn_cell.IndyLSTMCell(2) for _ in range(2)]) self.assertEqual(cell.dtype, None) self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name) self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name) cell.get_config() # Should not throw an error g, (out_state_0, out_state_1) = cell(x, (state_0, state_1)) # Layer infers the input type. self.assertEqual(cell.dtype, dtype.name) expected_variable_names = [ "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_w:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_u:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME, "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_w:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_u:0" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s:0" % rnn_cell_impl._BIAS_VARIABLE_NAME ] self.assertEqual(expected_variable_names, [v.name for v in cell.trainable_variables]) self.assertFalse(cell.non_trainable_variables) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, out_state_0, out_state_1], { x.name: np.array([[1., 1.]]), state_0[0].name: 0.1 * np.ones([1, 2]), state_0[1].name: 0.1 * np.ones([1, 2]), state_1[0].name: 0.1 * np.ones([1, 2]), state_1[1].name: 0.1 * np.ones([1, 2]), }) self.assertEqual(len(res), 3) variables = variables_lib.global_variables() self.assertEqual(expected_variable_names, [v.name for v in variables]) # Only check the range of outputs as this is just a smoke test. self.assertAllInRange(res[0], -1.0, 1.0) self.assertAllInRange(res[1], -1.0, 1.0) self.assertAllInRange(res[2], -1.0, 1.0) with variable_scope.variable_scope( "other", initializer=init_ops.constant_initializer(0.5)): # Test IndyLSTMCell with input_size != num_units. x = array_ops.zeros([1, 3], dtype=dtype) state = (array_ops.zeros([1, 2], dtype=dtype),) * 2 g, out_state = contrib_rnn_cell.IndyLSTMCell(2)(x, state) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [g, out_state], { x.name: np.array([[1., 1., 1.]], dtype=np_dtype), state[0].name: 0.1 * np.ones([1, 2], dtype=np_dtype), state[1].name: 0.1 * np.ones([1, 2], dtype=np_dtype), }) self.assertEqual(len(res), 2)