def __init__(self, config: AttendedInputConfig, is_training, features, init_embedding=None): super(AttendedInputModel).__init__() input_ids = features["input_ids"] input_dicts = features["input_dicts"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.dict = input_dicts self.seq_length = seq_length self.is_training = is_training input_shape = model_utils.get_shape_list(input_ids, expected_rank=3) self.batch_size = input_shape[0] self.max_length = input_shape[1] self.window_size = input_shape[2] dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3) self.dict_dim = dict_shape[2] if not is_training: config.embedding_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 if init_embedding is None: self.embedding = tf.get_variable(shape=[config.vocab_size, config.embedding_size], dtype=tf.float32, name='embedding', initializer=tf.truncated_normal_initializer(stddev=0.02)) else: self.embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding') with tf.variable_scope('embedding'): x = tf.nn.embedding_lookup(self.embedding, self.input_ids) def lstm_cell(dim): cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell') cell = rnn.DropoutWrapper(cell, output_keep_prob=1.0 - config.hidden_dropout_prob) return cell with tf.variable_scope('dict'): self.dict = tf.cast(self.dict, dtype=tf.float32) (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell(config.dict_hidden_size), cell_bw=lstm_cell(config.dict_hidden_size), inputs=self.dict, sequence_length=self.seq_length, dtype=tf.float32 ) dict_output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('input_attention'): feat_size = self.window_size input_attention = layers.fully_connected( inputs=dict_output, num_outputs=feat_size, activation_fn=tf.sigmoid ) # [B, L, F] * [B, L, F, E] -> [B, L, F, E] input_attention = tf.expand_dims(input_attention, -1) attend_input = tf.multiply(x, input_attention) attend_input = tf.reshape(attend_input, [self.batch_size, -1, feat_size * config.embedding_size]) attend_input = model_utils.dropout(attend_input, config.embedding_dropout_prob) with tf.variable_scope('character'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell(config.hidden_size), cell_bw=lstm_cell(config.hidden_size), inputs=attend_input, sequence_length=self.seq_length, dtype=tf.float32 ) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): output = tf.concat([dict_output, output], axis=2) scores = layers.fully_connected( inputs=output, num_outputs=config.num_classes, activation_fn=None ) transition_param = tf.get_variable("transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('loss'): # crf self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)
def __init__(self, config: BaselineConfig, is_training, features, init_embedding=None): """Constructor for BertModel. Args: config: `BertConfig` instance. is_training: bool. rue for training model, false for eval model. Controls whether dropout will be applied. input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size]. label_ids: (optional) int64 Tensor of shape [batch_size, seq_length]. seq_length: (optional) int64 Tensor of shape [batch_size]. init_embedding: (optional) Raises: ValueError: The config is invalid or one of the input tensor shapes is invalid. """ super(BaselineModel).__init__() input_ids = features["input_ids"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length self.is_training = is_training input_shape = model_utils.get_shape_list(input_ids, expected_rank=3) self.batch_size = input_shape[0] self.max_length = input_shape[1] self.window_size = input_shape[2] if not is_training: config.embedding_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 if init_embedding is None: self.embedding = tf.get_variable( shape=[config.vocab_size, config.embedding_size], dtype=tf.float32, name='embedding', initializer=tf.truncated_normal_initializer(stddev=0.02)) else: self.embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding') with tf.variable_scope('embedding'): x = tf.nn.embedding_lookup(self.embedding, self.input_ids) feat_size = self.window_size x = tf.reshape( x, [self.batch_size, -1, feat_size * config.embedding_size]) x = model_utils.dropout(x, config.embedding_dropout_prob) def lstm_cell(dim): cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell') cell = rnn.DropoutWrapper(cell, output_keep_prob=1.0 - config.hidden_dropout_prob) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_hidden_layers) return cell with tf.variable_scope('rnn'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell(config.hidden_size), cell_bw=lstm_cell(config.hidden_size), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('loss'): # crf self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)
def __init__(self, config: BiLSTMConfig, is_training, input_ids, label_ids, seq_length, init_embedding=None): """Constructor for BertModel. Args: config: `BertConfig` instance. is_training: bool. rue for training model, false for eval model. Controls whether dropout will be applied. input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size]. label_ids: (optional) int64 Tensor of shape [batch_size, seq_length]. seq_length: (optional) int64 Tensor of shape [batch_size]. init_embedding: (optional) Raises: ValueError: The config is invalid or one of the input tensor shapes is invalid. """ self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length self.is_training = is_training input_shape = model_utils.get_shape_list(input_ids, expected_rank=3) batch_size = input_shape[0] max_length = input_shape[1] window_size = input_shape[2] if not is_training: config.embedding_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 if init_embedding is None: embedding = tf.get_variable( shape=[config.vocab_size, config.embedding_size], dtype=tf.float32, name='embedding', initializer=tf.truncated_normal_initializer(stddev=0.02)) else: embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding') with tf.variable_scope('embedding'): x = tf.nn.embedding_lookup(embedding, input_ids) feat_size = window_size x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size]) x = model_utils.dropout(x, config.embedding_dropout_prob) with tf.variable_scope('rnn_cell'): if config.rnn_cell == 'lstm': fw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size, name='basic_lstm_cell') bw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size, name='basic_lstm_cell') else: fw_cell = rnn.GRUCell(config.hidden_size) bw_cell = rnn.GRUCell(config.hidden_size) fw_cell = rnn.DropoutWrapper(fw_cell, output_keep_prob=1.0 - config.hidden_dropout_prob) bw_cell = rnn.DropoutWrapper(bw_cell, output_keep_prob=1.0 - config.hidden_dropout_prob) fw_multi_cell = rnn.MultiRNNCell([fw_cell] * config.num_hidden_layers) bw_multi_cell = rnn.MultiRNNCell([bw_cell] * config.num_hidden_layers) with tf.variable_scope('rnn'): if config.bi_direction: (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_multi_cell, cell_bw=bw_multi_cell, inputs=x, sequence_length=seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) else: forward_output, _ = tf.nn.dynamic_rnn( cell=fw_multi_cell, inputs=x, sequence_length=seq_length, dtype=tf.float32) output = forward_output with tf.variable_scope('output'): logits = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) self.prediction = tf.argmax(logits, axis=-1) with tf.variable_scope('loss'): weight = tf.sequence_mask(seq_length, dtype=tf.float32) self.loss = tf.contrib.seq2seq.sequence_loss( logits=logits, targets=self.label_ids, weights=weight, average_across_timesteps=True, average_across_batch=True)
def __init__(self, config: DictHyperConfig, is_training, features, init_embedding=None): super(DictHyperModel).__init__() input_ids = features["input_ids"] input_dicts = features["input_dicts"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.dict = input_dicts self.seq_length = seq_length self.is_training = is_training input_shape = model_utils.get_shape_list(input_ids, expected_rank=3) self.batch_size = input_shape[0] self.max_length = input_shape[1] self.window_size = input_shape[2] if not is_training: config.embedding_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 if init_embedding is None: self.embedding = tf.get_variable( shape=[config.vocab_size, config.embedding_size], dtype=tf.float32, name='embedding', initializer=tf.truncated_normal_initializer(stddev=0.02)) else: self.embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding') with tf.variable_scope('embedding'): x = tf.nn.embedding_lookup(self.embedding, self.input_ids) feat_size = self.window_size x = tf.reshape( x, [self.batch_size, -1, feat_size * config.embedding_size]) x = model_utils.dropout(x, config.embedding_dropout_prob) def hyperlstm_cell(dim, input_main_dim, input_hyper_dim): cell = HyperLSTMCell( num_units=dim, input_main_dim=input_main_dim, input_hyper_dim=input_hyper_dim, forget_bias=1.0, use_recurrent_dropout=False, dropout_keep_prob=1.0, use_layer_norm=False, hyper_num_units=config.dict_hidden_size, hyper_embedding_size=config.hyper_embedding_size, hyper_use_recurrent_dropout=False) cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1 - config.hidden_dropout_prob) return cell with tf.variable_scope('hyper'): self.dict = tf.cast(self.dict, dtype=tf.float32) input_main_dim = model_utils.get_shape_list(x, expected_rank=3)[2] input_hyper_dim = model_utils.get_shape_list(self.dict, expected_rank=3)[2] x = tf.concat([x, self.dict], axis=2) (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=hyperlstm_cell(config.hidden_size, input_main_dim, input_hyper_dim), cell_bw=hyperlstm_cell(config.hidden_size, input_main_dim, input_hyper_dim), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('loss'): # crf self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)