def BiRNN(x): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define lstm cells with tensorflow # Forward direction cell #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True) lstm_fw_cell = rnn.DropoutWrapper( cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden), self.keep_prob2) #, use_peepholes=True) #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) # Backward direction cell #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True) lstm_bw_cell = rnn.DropoutWrapper( cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden), self.keep_prob2) #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) # Get lstm cell output try: outputs, _, _ = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs, _ = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) #outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell,x, #dtype=tf.float32) # Linear activation, using rnn inner loop last output #return tf.matmul(outputs[-1], weights['out']) + biases['out'] return tf.concat(outputs, 2) """
def lstm_layer(inputs, batch_size, num_units, lengths=None, stack_size=1, use_cudnn=False, rnn_dropout_drop_amt=0, is_training=True, bidirectional=True): """Create a LSTM layer using the specified backend.""" if use_cudnn: return cudnn_lstm_layer(inputs, batch_size, num_units, lengths, stack_size, rnn_dropout_drop_amt, is_training, bidirectional) else: assert rnn_dropout_drop_amt == 0 cells_fw = [ contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units) for _ in range(stack_size) ] cells_bw = [ contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units) for _ in range(stack_size) ] with tf.variable_scope('cudnn_lstm'): (outputs, unused_state_f, unused_state_b) = contrib_rnn.stack_bidirectional_dynamic_rnn( cells_fw, cells_bw, inputs, dtype=tf.float32, sequence_length=lengths, parallel_iterations=1) return outputs
def rnn(rnn_in_4d, n_cell=4, num_hidden_shrinkage=1): rnn_in_4d = tf.transpose(rnn_in_4d, [0, 2, 1, 3]) third_dim = rnn_in_4d.shape[2] fourth_dim = rnn_in_4d.shape[3] rnn_in_3d = tf.reshape( rnn_in_4d, [-1, rnn_in_4d.shape[1], np.prod([third_dim, fourth_dim])]) # rnn_in_3d = tf.squeeze(rnn_in_4d, axis=[1]) # basic cells which is used to build RNN num_hidden = int((third_dim * fourth_dim).value / num_hidden_shrinkage) fourth_dim = int(fourth_dim.value / num_hidden_shrinkage) fw_cells = [ # tf.nn.rnn_cell.LSTMCell( # num_units=num_hidden, # state_is_tuple=True cudnn_rnn.CudnnCompatibleLSTMCell(num_units=num_hidden, ) for _ in range(n_cell) ] bw_cells = [ # tf.nn.rnn_cell.LSTMCell( # num_units=num_hidden, # state_is_tuple=True cudnn_rnn.CudnnCompatibleLSTMCell(num_units=num_hidden, ) for _ in range(n_cell) ] # stack basic cells fw_stacked = tf.nn.rnn_cell.MultiRNNCell(fw_cells, state_is_tuple=True) bw_stacked = tf.nn.rnn_cell.MultiRNNCell(bw_cells, state_is_tuple=True) # bidirectional RNN # BxTxF -> BxTx2H ((fw, bw), _) = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_stacked, cell_bw=bw_stacked, inputs=rnn_in_3d, dtype=rnn_in_3d.dtype) # BxTxH + BxTxH -> BxTx2H -> Bx1xTX2H # result = tf.expand_dims(tf.concat([fw, bw], -1), 1) rnn_out_4d = tf.reshape(tf.concat( [fw, bw], -1), [-1, rnn_in_4d.shape[1], third_dim, num_hidden * 2]) rnn_out_4d = tf.transpose(rnn_out_4d, [0, 2, 1, 3]) return rnn_out_4d
def bilstm_with_c(self, x, seq_len, lstm_output_dims=None, lstm_layer_count=1, keep_prob=1.0, name="bilstm"): x_shape = x.get_shape() input_dims = int(x_shape[-1]) max_seq_len = int(x_shape[-2]) u = int(input_dims / 2) if lstm_output_dims is None else lstm_output_dims contexts = [] with tf.variable_scope(name, reuse=tf.AUTO_REUSE): if len(x_shape) >= 4: x = tf.reshape(x, [-1, max_seq_len, input_dims]) seq_len = tf.reshape(seq_len, [-1]) for i in range(lstm_layer_count): with tf.variable_scope("lstm_layer_" + str(i + 1), reuse=tf.AUTO_REUSE): cell_fw = cudnn_rnn.CudnnCompatibleLSTMCell(num_units=u) cell_bw = cudnn_rnn.CudnnCompatibleLSTMCell(num_units=u) if keep_prob < 1.0 and self.is_training: cell_fw = tf.nn.rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=keep_prob) cell_bw = tf.nn.rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=keep_prob) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, x, sequence_length=seq_len, dtype=tf.float32) contexts.append( tf.concat([state[0].c, state[1].c], axis=-1)) x = tf.concat(outputs, axis=-1) if len(x_shape) >= 4: return \ tf.reshape(x, [-1 if s is None else s for s in x_shape.as_list()[:-2]] + [max_seq_len, u * 2]), \ [tf.reshape(c, [-1 if s is None else s for s in x_shape.as_list()[:-2]] + [u * 2]) for c in contexts] else: return x, contexts
def _single_lstm(input_emb, input_len, hidden_size, is_fwd, use_cudnn): """Compute the outputs of a single LSTM (subroutine of stacked_bilstm). Be careful if used anywhere outside of stacked_bilstm, which converts the sequences to the time-major format expected by this function. Args: input_emb: <float32> [sequence_length, batch_size, emb] input_len: <int32> [batch_size] hidden_size: Number of units in the LSTM cell. is_fwd: Boolean indicator the directionality of the LSTM. use_cudnn: Boolean indicating the use of cudnn. Returns: output_emb: <float32> [sequence_length, batch_size, emb] """ if not is_fwd: input_emb = tf.reverse_sequence( input_emb, input_len, seq_axis=0, batch_axis=1) if use_cudnn: lstm = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=hidden_size, input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE, direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) lstm.build(input_emb.shape) output_emb, _ = lstm(input_emb) else: cell = contrib_cudnn_rnn.CudnnCompatibleLSTMCell(hidden_size) cell = contrib_rnn.MultiRNNCell([cell]) output_emb, _ = tf.nn.dynamic_rnn( cell=cell, inputs=input_emb, sequence_length=input_len, dtype=tf.float32, time_major=True) if not is_fwd: output_emb = tf.reverse_sequence( output_emb, input_len, seq_axis=0, batch_axis=1) return output_emb
def make_cudnn(inputs, rnn_layer_sizes, batch_size, mode, dropout_keep_prob=1.0, residual_connections=False): """Builds a sequence of cuDNN LSTM layers from the given hyperparameters. Args: inputs: A tensor of RNN inputs. rnn_layer_sizes: A list of integer sizes (in units) for each layer of the RNN. batch_size: The number of examples per batch. mode: 'train', 'eval', or 'generate'. For 'generate', CudnnCompatibleLSTMCell will be used. dropout_keep_prob: The float probability to keep the output of any given sub-cell. residual_connections: Whether or not to use residual connections. Returns: outputs: A tensor of RNN outputs, with shape `[batch_size, inputs.shape[1], rnn_layer_sizes[-1]]`. initial_state: The initial RNN states, a tuple with length `len(rnn_layer_sizes)` of LSTMStateTuples. final_state: The final RNN states, a tuple with length `len(rnn_layer_sizes)` of LSTMStateTuples. """ cudnn_inputs = tf.transpose(inputs, [1, 0, 2]) if len(set(rnn_layer_sizes)) == 1 and not residual_connections: initial_state = tuple( contrib_rnn.LSTMStateTuple( h=tf.zeros([batch_size, num_units], dtype=tf.float32), c=tf.zeros([batch_size, num_units], dtype=tf.float32)) for num_units in rnn_layer_sizes) if mode != 'generate': # We can make a single call to CudnnLSTM since all layers are the same # size and we aren't using residual connections. cudnn_initial_state = state_tuples_to_cudnn_lstm_state(initial_state) cell = contrib_cudnn_rnn.CudnnLSTM( num_layers=len(rnn_layer_sizes), num_units=rnn_layer_sizes[0], direction='unidirectional', dropout=1.0 - dropout_keep_prob) cudnn_outputs, cudnn_final_state = cell( cudnn_inputs, initial_state=cudnn_initial_state, training=mode == 'train') final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state) else: # At generation time we use CudnnCompatibleLSTMCell. cell = contrib_rnn.MultiRNNCell([ contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units) for num_units in rnn_layer_sizes ]) cudnn_outputs, final_state = tf.nn.dynamic_rnn( cell, cudnn_inputs, initial_state=initial_state, time_major=True, scope='cudnn_lstm/rnn') else: # We need to make multiple calls to CudnnLSTM, keeping the initial and final # states at each layer. initial_state = [] final_state = [] for i in range(len(rnn_layer_sizes)): # If we're using residual connections and this layer is not the same size # as the previous layer, we need to project into the new size so the # (projected) input can be added to the output. if residual_connections: if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]: cudnn_inputs = contrib_layers.linear(cudnn_inputs, rnn_layer_sizes[i]) layer_initial_state = (contrib_rnn.LSTMStateTuple( h=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32), c=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32)),) if mode != 'generate': cudnn_initial_state = state_tuples_to_cudnn_lstm_state( layer_initial_state) cell = contrib_cudnn_rnn.CudnnLSTM( num_layers=1, num_units=rnn_layer_sizes[i], direction='unidirectional', dropout=1.0 - dropout_keep_prob) cudnn_outputs, cudnn_final_state = cell( cudnn_inputs, initial_state=cudnn_initial_state, training=mode == 'train') layer_final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state) else: # At generation time we use CudnnCompatibleLSTMCell. cell = contrib_rnn.MultiRNNCell( [contrib_cudnn_rnn.CudnnCompatibleLSTMCell(rnn_layer_sizes[i])]) cudnn_outputs, layer_final_state = tf.nn.dynamic_rnn( cell, cudnn_inputs, initial_state=layer_initial_state, time_major=True, scope='cudnn_lstm/rnn' if i == 0 else 'cudnn_lstm_%d/rnn' % i) if residual_connections: cudnn_outputs += cudnn_inputs cudnn_inputs = cudnn_outputs initial_state += layer_initial_state final_state += layer_final_state outputs = tf.transpose(cudnn_outputs, [1, 0, 2]) return outputs, tuple(initial_state), tuple(final_state)
def get_lstm_cell(num_hidden): return cudnn_rnn.CudnnCompatibleLSTMCell( num_hidden, reuse=reuse_variables)