def _build_bidi_rnn_fused(self, inputs, sequence_length, hparams, dtype): if (not np.isclose(hparams.dropout, 0.) and self.mode == tf.contrib.learn.ModeKeys.TRAIN): inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout) fwd_cell = block_lstm.LSTMBlockFusedCell( hparams.num_units, hparams.forget_bias, dtype=dtype) fwd_encoder_outputs, (fwd_final_c, fwd_final_h) = fwd_cell( inputs, dtype=dtype, sequence_length=sequence_length) inputs_r = tf.reverse_sequence( inputs, sequence_length, batch_axis=1, seq_axis=0) bak_cell = block_lstm.LSTMBlockFusedCell( hparams.num_units, hparams.forget_bias, dtype=dtype) bak_encoder_outputs, (bak_final_c, bak_final_h) = bak_cell( inputs_r, dtype=dtype, sequence_length=sequence_length) bak_encoder_outputs = tf.reverse_sequence( bak_encoder_outputs, sequence_length, batch_axis=1, seq_axis=0) bi_encoder_outputs = tf.concat( [fwd_encoder_outputs, bak_encoder_outputs], axis=-1) fwd_state = tf.nn.rnn_cell.LSTMStateTuple(fwd_final_c, fwd_final_h) bak_state = tf.nn.rnn_cell.LSTMStateTuple(bak_final_c, bak_final_h) bi_encoder_state = (fwd_state, bak_state) # mask aren't applied on outputs, but final states are post-masking. return bi_encoder_outputs, bi_encoder_state
def _build_unidi_rnn_fused(self, inputs, state, sequence_length, hparams, dtype): if (not np.isclose(hparams.dropout, 0.) and self.mode == tf.contrib.learn.ModeKeys.TRAIN): inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout) cell = block_lstm.LSTMBlockFusedCell( hparams.num_units, hparams.forget_bias, dtype=dtype) outputs, (final_c, final_h) = cell( inputs, state, dtype=dtype, sequence_length=sequence_length) # mask aren't applied on outputs, but final states are post-masking. return outputs, tf.nn.rnn_cell.LSTMStateTuple(final_c, final_h)
def _build_encoder_layers_unidi(self, inputs, sequence_length, num_uni_layers, hparams, dtype): """Build encoder layers all at once.""" encoder_outputs = None encoder_state = tuple() if hparams.use_fused_lstm: for i in range(num_uni_layers): if (not np.isclose(hparams.dropout, 0.) and self.mode == tf.contrib.learn.ModeKeys.TRAIN): cell_inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout) else: cell_inputs = inputs cell = block_lstm.LSTMBlockFusedCell( hparams.num_units, hparams.forget_bias, dtype=dtype) encoder_outputs, (final_c, final_h) = cell( cell_inputs, dtype=dtype, sequence_length=sequence_length) encoder_state += (tf.nn.rnn_cell.LSTMStateTuple(final_c, final_h),) if i >= num_uni_layers - self.num_encoder_residual_layers: # Add the pre-dropout inputs. Residual wrapper is applied after # dropout wrapper. encoder_outputs += inputs inputs = encoder_outputs elif hparams.use_cudnn_lstm: # Single layer cudnn rnn, dropout isnt applied in the kernel for i in range(num_uni_layers): if (not np.isclose(hparams.dropout, 0.) and self.mode == tf.contrib.learn.ModeKeys.TRAIN): inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout) encoder_outputs, encoder_states = self._build_unidi_rnn_cudnn( inputs, None, # initial_state sequence_length, dtype, hparams, 1, # num_layer is_fwd=True) encoder_state += (tf.nn.rnn_cell.LSTMStateTuple(encoder_states.c, encoder_states.h),) if i >= num_uni_layers - self.num_encoder_residual_layers: encoder_outputs += inputs inputs = encoder_outputs else: uni_cell = model_helper.create_rnn_cell( unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=num_uni_layers, num_residual_layers=self.num_encoder_residual_layers, forget_bias=hparams.forget_bias, dropout=hparams.dropout, dtype=dtype, mode=self.mode, single_cell_fn=self.single_cell_fn, use_block_lstm=hparams.use_block_lstm) if hparams.use_dynamic_rnn: encoder_outputs, encoder_state = tf.nn.dynamic_rnn( uni_cell, inputs, dtype=dtype, sequence_length=sequence_length, time_major=self.time_major) else: encoder_outputs, encoder_state = tf.contrib.recurrent.functional_rnn( uni_cell, inputs, dtype=dtype, sequence_length=sequence_length, time_major=self.time_major, use_tpu=False) return encoder_state, encoder_outputs