def __init__(self, params): super(MTEncoderV1, self).__init__(params) p = self.params assert p.packed_input is False, ( 'Packed inputs are not yet supported for ' 'MTEncoderV1.') with tf.variable_scope(p.name): if p.cc_schedule is not None: self.CreateChild('cc_schedule', p.cc_schedule) self.CreateChild('emb', p.emb) rnn_layers_params = [] # L0 is a bi-directional lstm. # L0's forward lstm cell params = (p.lstm_tpl.Copy() if p.lstm_tpl_bidi is None else p.lstm_tpl_bidi.Copy()) params.name = 'L0_rnn_fwd' params.num_input_nodes = p.emb.embedding_dim params.num_output_nodes = p.lstm_cell_size forward_lstm = params # L0's backward lstm cell params = params.Copy() params.name = 'L0_rnn_bak' backward_lstm = params # L0 layer. params = model_helper.CreateBidirectionalRNNParams( self.params, forward_lstm, backward_lstm) params.name = 'L0' rnn_layers_params.append(params) # The latter layers are all uni-directional lstm. input_size = 2 * p.lstm_cell_size for i in range(1, p.num_lstm_layers): # Forward lstm cell. cell = (p.lstm_tpl.Copy() if p.lstm_tpl_uni is None else p.lstm_tpl_uni.Copy()) cell.name = 'L%d_rnn' % i cell.num_input_nodes = input_size cell.num_output_nodes = p.lstm_cell_size # Forward lstm layer. params = model_helper.CreateUnidirectionalRNNParams( self.params, cell) params.name = 'L%d' % i rnn_layers_params.append(params) input_size = p.lstm_cell_size self.CreateChildren('rnn', rnn_layers_params) dropout_p = layers.DropoutLayer.Params().Set( name='dropout_layer', keep_prob=1.0 - p.dropout_prob, random_seed=p.random_seed + 84828474 if p.random_seed else None) self.CreateChild('dropout', dropout_p)
def __init__(self, params): super(MTEncoderBiRNN, self).__init__(params) p = self.params with tf.variable_scope(p.name): if p.cc_schedule is None: self.cc_schedule = None else: self.CreateChild('cc_schedule', p.cc_schedule) self.CreateChild('emb', p.emb) rnn_layers_params = [] for i in range(p.num_lstm_layers): params = p.lstm_tpl.Copy() params.name = 'L%d_rnn_fwd' % i if i == 0: params.num_input_nodes = p.emb.embedding_dim else: params.num_input_nodes = 2 * p.lstm_cell_size params.num_output_nodes = p.lstm_cell_size params.reset_cell_state = p.packed_input forward_lstm = params params = params.Copy() params.name = 'L%d_rnn_bak' % i params.reset_cell_state = p.packed_input backward_lstm = params params = model_helper.CreateBidirectionalRNNParams( self.params, forward_lstm, backward_lstm) params.packed_input = p.packed_input params.name = 'L%d' % i rnn_layers_params.append(params) self.CreateChildren('rnn', rnn_layers_params) if p.lstm_cell_size * 2 != p.encoder_out_dim: # Project the encoder output to the desired dim. proj_p = p.proj_tpl.Copy().Set(name='proj', batch_norm=False, input_dim=p.lstm_cell_size * 2, output_dim=p.encoder_out_dim) if p.cc_schedule is not None: proj_p.has_bias = False proj_p.activation = 'TANH' else: proj_p.has_bias = True proj_p.activation = 'NONE' self.CreateChild('final_proj', proj_p) dropout_p = layers.DropoutLayer.Params().Set( name='dropout_layer', keep_prob=1.0 - p.dropout_prob, random_seed=p.random_seed + 827366448 if p.random_seed else None) self.CreateChild('dropout', dropout_p) if p.is_transparent: transparent_params = p.transparent_merger_tpl.Copy() transparent_params.name = 'transparent' transparent_params.num_sources = p.num_lstm_layers self.CreateChild('transparent_merger', transparent_params)
def CreateBidirectionalRNNParams(self, forward_p, backward_p): return model_helper.CreateBidirectionalRNNParams( self.params, forward_p, backward_p)