def encode(self, x=None): if x is None: x = CharLSTMEmbeddings.create_placeholder(self.name) self.x = x with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): Wch = tf.get_variable( "Wch", initializer=tf.constant_initializer(self.weights, dtype=tf.float32, verify_shape=True), shape=[self.vsz, self.dsz], trainable=True ) ech0 = tf.scatter_update(Wch, tf.constant(Offsets.PAD, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, self.dsz])) shape = tf.shape(x) B = shape[0] T = shape[1] W = shape[2] flat_chars = tf.reshape(x, [-1, W]) word_lengths = tf.reduce_sum(tf.cast(tf.equal(flat_chars, Offsets.PAD), tf.int32), axis=1) with tf.control_dependencies([ech0]): embed_chars = tf.nn.embedding_lookup(Wch, flat_chars) fwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers) bwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers) _, rnn_state = tf.nn.bidirectional_dynamic_rnn(fwd_lstm, bwd_lstm, embed_chars, sequence_length=word_lengths, dtype=tf.float32) result = tf.concat([rnn_state[0][-1].h, rnn_state[1][-1].h], axis=1) return tf.reshape(result, [B, T, self.lstmsz])
def pool(self, word_embeddings, dsz, init, **kwargs): """LSTM with dropout yielding a final-state as output :param word_embeddings: The input word embeddings :param dsz: The input word embedding depth :param init: The tensorflow initializer to use (currently ignored) :param kwargs: See below :Keyword Arguments: * *hsz* -- (``int``) The number of hidden units (defaults to `100`) * *cmotsz* -- (``int``) An alias for `hsz` :return: """ hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100)) if type(hsz) is list: hsz = hsz[0] rnntype = kwargs.get('rnntype', 'lstm') nlayers = int(kwargs.get('layers', 1)) if rnntype == 'blstm': rnnfwd = stacked_lstm(hsz, self.pkeep, nlayers) rnnbwd = stacked_lstm(hsz, self.pkeep, nlayers) ((_, _), (fw_final_state, bw_final_state)) = tf.nn.bidirectional_dynamic_rnn( rnnfwd, rnnbwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32) # The output of the BRNN function needs to be joined on the H axis output_state = fw_final_state[-1].h + bw_final_state[-1].h out_hsz = hsz else: rnnfwd = stacked_lstm(hsz, self.pkeep, nlayers) (_, (output_state)) = tf.nn.dynamic_rnn(rnnfwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32) output_state = output_state[-1].h out_hsz = hsz combine = tf.reshape(output_state, [-1, out_hsz]) return combine
def pool(self, word_embeddings, dsz, init, **kwargs): """LSTM with dropout yielding a final-state as output :param word_embeddings: The input word embeddings :param dsz: The input word embedding depth :param init: The tensorflow initializer to use (currently ignored) :param kwargs: See below :Keyword Arguments: * *rnnsz* -- (``int``) The number of hidden units (defaults to `hsz`) * *hsz* -- (``int``) backoff for `rnnsz`, typically a result of stacking params. This keeps things simple so its easy to do things like residual connections between LSTM and post-LSTM stacking layers :return: """ hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100)) vdrop = bool(kwargs.get('variational_dropout', False)) if type(hsz) is list: hsz = hsz[0] rnntype = kwargs.get('rnn_type', kwargs.get('rnntype', 'lstm')) nlayers = int(kwargs.get('layers', 1)) if rnntype == 'blstm': rnnfwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG()) rnnbwd = stacked_lstm(hsz//2, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG()) ((_, _), (fw_final_state, bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(rnnfwd, rnnbwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32) # The output of the BRNN function needs to be joined on the H axis output_state = tf.concat([fw_final_state[-1].h, bw_final_state[-1].h], -1) out_hsz = hsz else: rnnfwd = stacked_lstm(hsz, self.pdrop_value, nlayers, variational=vdrop, training=TRAIN_FLAG()) (_, (output_state)) = tf.nn.dynamic_rnn(rnnfwd, word_embeddings, sequence_length=self.lengths, dtype=tf.float32) output_state = output_state[-1].h out_hsz = hsz combine = tf.reshape(output_state, [-1, out_hsz]) return combine
def encode(self, x=None): if x is None: x = CharLSTMEmbeddings.create_placeholder(self.name) self.x = x with tf.variable_scope(self.scope): Wch = tf.get_variable("Wch", initializer=tf.constant_initializer( self.weights, dtype=tf.float32, verify_shape=True), shape=[self.vsz, self.dsz], trainable=True) ech0 = tf.scatter_update( Wch, tf.constant(Offsets.PAD, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, self.dsz])) shape = tf.shape(x) B = shape[0] T = shape[1] W = shape[2] flat_chars = tf.reshape(x, [-1, W]) word_lengths = tf.reduce_sum(tf.cast( tf.equal(flat_chars, Offsets.PAD), tf.int32), axis=1) with tf.control_dependencies([ech0]): embed_chars = tf.nn.embedding_lookup(Wch, flat_chars) fwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers) bwd_lstm = stacked_lstm(self.lstmsz // 2, self.pdrop, self.layers) _, rnn_state = tf.nn.bidirectional_dynamic_rnn( fwd_lstm, bwd_lstm, embed_chars, sequence_length=word_lengths, dtype=tf.float32) result = tf.concat([rnn_state[0][-1].h, rnn_state[1][-1].h], axis=1) return tf.reshape(result, [B, T, self.lstmsz])