def _apply_rule(condition, inputs, gmr_mask, grammar, name=None): """apply_rule. Args: condition (TYPE): NULL inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1 gmr_mask (TYPE): NULL grammar (TYPE): NULL Returns: TODO Raises: NULL """ fc_name = None if name is not None: fc_name = name + '_apply_rule_fc' condition = layers.cast(condition, dtype='float32') gmr_output = layers.fc(inputs, size=grammar.grammar_size, **nn_utils.param_attr(fc_name, INIT_SCALE, need_bias=True)) gmr_output_masked = layers.elementwise_add(gmr_output, gmr_mask) zeros = layers.fill_constant_batch_size_like( gmr_output_masked, shape=[-1, grammar.MAX_TABLE + grammar.MAX_COLUMN + grammar.MAX_VALUE], dtype='float32', value=-INF) final_output = tensor.concat([gmr_output_masked, zeros], axis=-1) true_final_output = layers.elementwise_mul(final_output, condition, axis=0) return true_final_output
def call(self, step_input, cell_state, attn_k, attn_v, padding_mask): """one step call Args: step_input (Variable): [batch_size, hidden_size] cell_state (tuple): (Variable, Variable) Returns: tuple same as input: (Variable, (Variable, Variable)) Raises: NULL """ step_feed, step_state = cell_state step_input = layers.concat([step_input, step_feed], 1) step_out, new_state = self.rnn_cell(step_input, step_state) decode_attn = models.Attention('dot_prod', name=self._name + '_attn') attn_out = decode_attn.forward(step_out, attn_k, attn_v, padding_mask=padding_mask) output = layers.fc(layers.concat([step_out, attn_out], axis=-1), size=self._hidden_size, num_flatten_dims=1, act='tanh', name=self._name + '_out_fc', **nn_utils.param_attr(self._name + '_out_fc', self._init_scale, need_bias=False)) if self._dropout > 0.: output = layers.dropout(x=output, dropout_prob=self._dropout, dropout_implementation="upscale_in_train") return output, [output, new_state]
def _table_to_lf_input(ori_encoding): """trans ori_encoding to size of lf_embedding """ output = layers.fc(input=ori_encoding, size=self.lf_emb_size, num_flatten_dims=2, **nn_utils.param_attr('fc_table2lf_input', self.init_scale, need_bias=False)) return output
def _feature_embedder(self, one_hot_fea, name): """feature embedder Args: one_hot_fea (Variable): shape=[batch_size, feature_dim], dtype=float32 name (str): layers name Returns: TODO Raises: NULL """ output = layers.fc(input=one_hot_fea, size=self.hidden_size, num_flatten_dims=2, **nn_utils.param_attr(name, self.init_scale, need_bias=True)) return output
def _decoder(self, enc_output, enc_state, mode="train", beam_size=1): """decoder Args: enc_output (TYPE): NULL enc_state (TYPE): NULL mode (string): running mode: train|infer. default is "train" beam_size (int): default is 1 Returns: TODO Raises: NULL """ output_layer = functools.partial(gmr_models.grammar_output, name='decoder_output') decode_cell = models.RNNDecodeCell(self.hidden_size, dropout=self.dropout, init_scale=self.init_scale) dec_vocab = gmr_models.DecoderDynamicVocab( self.tname_encoding, self.tname_item_lens, self.cname_encoding, self.cname_item_lens, self.value_encoding, self.value_item_lens, self.column2table_mask) dec_attn_key = layers.fc(self.question_encoding, size=self.hidden_size, num_flatten_dims=2, **nn_utils.param_attr('dec_attn_key', self.init_scale, need_bias=True)) init_state0 = layers.fc(enc_state[0], size=self.hidden_size, num_flatten_dims=1, act='tanh', **nn_utils.param_attr('dec_init_state0_fc', self.init_scale, need_bias=True)) init_state1 = layers.fc(enc_state[1], size=self.hidden_size, num_flatten_dims=1, act='tanh', **nn_utils.param_attr('dec_init_state1_fc', self.init_scale, need_bias=True)) #dec_init_zero = layers.zeros_like(init_state1) init_state = [ decode_cell.get_initial_states(batch_ref=self.question_encoding, shape=[self.hidden_size]), [init_state0, init_state1], ] dec_cell_params = { "attn_k": dec_attn_key, "attn_v": self.question_encoding, "padding_mask": self.question_mask - 1.0 } if mode == "train": ## 解码端词表 emb ## self.train_label_emb = self._lf_embedder(self.train_label, self.label_lens) dec_output, dec_state = fluid.layers.rnn( cell=decode_cell, inputs=self.train_label_emb, initial_states=init_state, sequence_length=None, **dec_cell_params) outputs, _ = output_layer(dec_output, self.infer_actions, self.infer_gmr_mask, self.valid_table_mask, dec_vocab, self.grammar) return layers.elementwise_mul(outputs, self.label_mask, axis=0) elif mode == "infer": gmr_infer_decoder = gmr_models.GrammarInferDecoder( decode_cell, beam_size=self.beam_size, grammar=self.grammar, fn_embedding=self._lf_embedder, fn_output=output_layer) outputs, _ = gmr_models.decode_with_grammar( gmr_infer_decoder, inits=init_state, decode_vocab=dec_vocab, max_step_num=self.max_infer_step, **dec_cell_params) return outputs else: raise ValueError("unsupported running mode: %s" % (mode))
def _table_encoder(self, inputs, input_lens, name_lens, name_pos, name_tok_len, inputs_fea, name, question_encoding=None, q_padding_mask=None): """table encoder. Args: inputs (TYPE): NULL input_lens (TYPE): NULL name_lens (TYPE): NULL name_pos (TYPE): NULL name_tok_len (TYPE): NULL inputs_fea (TYPE): NULL name (str/list): NULL question_encoding(Variable): NULL q_padding_mask(Variable): NULL Returns: TODO Raises: NULL """ if type(name) is tuple or type(name) is list: assert len(name) == 3, "name tuple's len must equal to 3" enc_name, attn_name, fc_name = name else: # type(name) is str enc_name = name + '_rnn' attn_name = name + '_attn' fc_name = name + '_out_fc' if self.table_enc_type == 'birnn': encoder = models.Sequence2DEncoder(self.table_enc_type, dropout=self.dropout, init_scale=self.init_scale, name=enc_name, num_layers=self.encoder_layers, hidden_size=self.hidden_size // 2, bidirectional=True) elif self.table_enc_type == 'simple_sum': encoder = models.Sequence2DEncoder(self.table_enc_type, dropout=self.dropout, init_scale=self.init_scale, name=name) else: raise ValueError("unsupported table encoder type: %s" % (self.table_enc_type)) enc_output, _ = encoder.forward(inputs, input_lens, name_lens, name_pos, name_tok_len) if self.table_attention is not None and question_encoding is not None: attn = models.Attention(score_type=self.table_attention, name=attn_name) ctx = attn.forward(enc_output, question_encoding, padding_mask=q_padding_mask) #enc_output_attn = layers.elementwise_add(enc_output, ctx) enc_output = layers.concat([enc_output, ctx], axis=-1) if inputs_fea is not None: #enc_output = layers.elementwise_add(enc_output, inputs_fea) enc_output = layers.concat([enc_output, inputs_fea], axis=-1) final_output = layers.fc(enc_output, size=self.hidden_size, num_flatten_dims=2, **nn_utils.param_attr(fc_name, self.init_scale, need_bias=True)) return final_output, None
def _ernie_encoder(self, slots_dict): """use ernie to encode question, tables/columns/values Args: slots_dict (TYPE): NULL Returns: TODO Raises: NULL """ batch_instance = slots_dict["question_tokens"][C.RECORD_ID] input_qtc_src = batch_instance[DName.QTC_IDS] input_qtc_pos = batch_instance[DName.QTC_POS_IDS] input_qtc_sent = batch_instance[DName.QTC_SENTENCE_IDS] input_qtc_mask = batch_instance[DName.QTC_MASK_IDS] input_qtc_task = batch_instance[DName.QTC_TASK_IDS] input_qv_src = batch_instance[DName.QV_IDS] input_qv_pos = batch_instance[DName.QV_POS_IDS] input_qv_sent = batch_instance[DName.QV_SENTENCE_IDS] input_qv_mask = batch_instance[DName.QV_MASK_IDS] input_qv_task = batch_instance[DName.QV_TASK_IDS] input_q_pos = batch_instance[DName.Q_POS] input_t_pos = batch_instance[DName.T_POS] input_c_pos = batch_instance[DName.C_POS] input_v_pos = batch_instance[DName.V_POS] q_span_lens = batch_instance[DName.Q_LEN] self.tname_item_lens = batch_instance[DName.T_LEN] self.cname_item_lens = batch_instance[DName.C_LEN] self.value_item_lens = batch_instance[DName.V_LEN] q_span_tok_lens = batch_instance[DName.Q_SPAN_LEN] tname_token_lens = batch_instance[DName.T_TOKS_LEN] cname_token_lens = batch_instance[DName.C_TOKS_LEN] value_token_lens = batch_instance[DName.V_TOKS_LEN] self.all_inputs_name += [ input_qtc_src.name, input_qtc_pos.name, input_qtc_sent.name, input_qtc_mask.name, input_qtc_task.name, input_qv_src.name, input_qv_pos.name, input_qv_sent.name, input_qv_mask.name, input_qv_task.name, input_q_pos.name, input_t_pos.name, input_c_pos.name, input_v_pos.name, q_span_lens.name, q_span_tok_lens.name, self.tname_item_lens.name, self.cname_item_lens.name, self.value_item_lens.name, tname_token_lens.name, cname_token_lens.name, value_token_lens.name ] config_path = self.encoder_params.get("config_path") use_fp16 = self.encoder_params.get("use_fp16", False) ernie_config = ErnieConfig(config_path) ernie_qtc = ErnieModel(src_ids=input_qtc_src, position_ids=input_qtc_pos, sentence_ids=input_qtc_sent, task_ids=input_qtc_task, input_mask=input_qtc_mask, config=ernie_config, use_fp16=use_fp16) qtc_enc_output = ernie_qtc.get_sequence_output() qtc_enc_output = layers.fc(qtc_enc_output, size=self.hidden_size, num_flatten_dims=2, **nn_utils.param_attr('ernie_output', self.init_scale, need_bias=True)) ernie_qv = ErnieModel(src_ids=input_qv_src, position_ids=input_qv_pos, sentence_ids=input_qv_sent, task_ids=input_qv_task, input_mask=input_qv_mask, config=ernie_config, use_fp16=use_fp16) qv_enc_output = ernie_qv.get_sequence_output() qv_enc_output = layers.fc(qv_enc_output, size=self.hidden_size, num_flatten_dims=2, **nn_utils.param_attr('ernie_output', self.init_scale, need_bias=True)) output_state = layers.dropout( x=ernie_qtc.get_pooled_output() + ernie_qv.get_pooled_output(), dropout_prob=self.dropout, dropout_implementation="upscale_in_train") output_state = layers.fc(output_state, size=self.hidden_size, num_flatten_dims=1, **nn_utils.param_attr('all_state', self.init_scale, need_bias=True)) #question_enc = nn_utils.batch_gather(qtc_enc_output, input_q_pos) question_enc, _ = self._table_encoder(qtc_enc_output, None, q_span_lens, input_q_pos, q_span_tok_lens, self.question_fea_emb, name='question_enc') max_q_span_len = input_q_pos.shape[1] self.question_mask = layers.sequence_mask(q_span_lens, maxlen=max_q_span_len, dtype='float32') q_padding_mask = self.question_mask - 1.0 table_enc, _ = self._table_encoder(qtc_enc_output, None, self.tname_item_lens, input_t_pos, tname_token_lens, self.table_fea_emb, name=('table_enc', 'table_enc_attn', 'tab_enc_out'), question_encoding=question_enc, q_padding_mask=q_padding_mask) column_enc, _ = self._table_encoder(qtc_enc_output, None, self.cname_item_lens, input_c_pos, cname_token_lens, self.column_fea_emb, name=('table_enc', 'table_enc_attn', 'col_enc_out'), question_encoding=question_enc, q_padding_mask=q_padding_mask) value_enc, _ = self._table_encoder(qv_enc_output, None, self.value_item_lens, input_v_pos, value_token_lens, self.value_fea_emb, name=('table_enc', 'table_enc_attn', 'val_enc_out'), question_encoding=question_enc, q_padding_mask=q_padding_mask) return [[output_state, output_state], question_enc, table_enc, column_enc, value_enc]
def forward(self, q, v, mask=None): """forward Args: q (Variable): shape = [batch_size, seq_len1, hidden_size] or [batch_size, hidden_size]. dtype = float32 v (Variable): shape = [batch_size, seq_len2, hidden_size]. dtype = float32 mask (Variable): shape = [batch_size, seq_len2]. dtype = v.dtype. Default is None Returns: Variable shape = [batch_size, seq_len2], dtype = float32. Raises: RuntimeError: while giving unsupported score_type. """ input_dim = len(q.shape) if input_dim == 2: q = layers.unsqueeze(q, [1]) if self._score_type == 'dot_prod': ptr_score = layers.matmul(q, v, transpose_y=True) elif self._score_type == 'affine': q_tmp = layers.fc(q, size=v.shape[2], num_flatten_dims=2, **nn_utils.param_attr(self._name, self._init_scale, need_bias=True)) ptr_score = layers.matmul(q_tmp, v, transpose_y=True) elif self._score_type == 'std': if self._hidden_size <= 0: raise ValueError("hidden_size should greater than 0") q_tmp = layers.fc(q, size=self._hidden_size, num_flatten_dims=2, **nn_utils.param_attr(self._name + '_q', self._init_scale, need_bias=True)) v_tmp = layers.fc(v, size=self._hidden_size, num_flatten_dims=2, **nn_utils.param_attr(self._name + '_k', self._init_scale, need_bias=True)) # shape = [batch_size, seq_len1, seq_len2, hidden_size] q_tmp_expand = layers.expand(layers.unsqueeze(q_tmp, [2]), [1, 1, v_tmp.shape[1], 1]) # shape = [batch_size, 1, seq_len2, hidden_size] v_tmp_expand = layers.unsqueeze(v_tmp, [1]) ptr_score = layers.fc(layers.elementwise_add(q_tmp_expand, v_tmp_expand, act='tanh'), size=1, num_flatten_dims=3, **nn_utils.param_attr(self._name + '_w', self._init_scale, need_bias=True)) ptr_score = layers.squeeze(ptr_score, [3]) else: raise RuntimeError( 'Supported score types: dot_prod/affine/std. but got %s' % (self._score_type)) if mask is not None: score_for_mask = layers.transpose(ptr_score, [1, 0, 2]) ptr_score_masked = layers.elementwise_add(score_for_mask, (mask - 1.0) * INF, axis=-1) ptr_score = layers.transpose(ptr_score_masked, [1, 0, 2]) if input_dim == 2: ptr_score = layers.squeeze(ptr_score, [1]) return ptr_score
def forward(self, q, k, v=None, padding_mask=None, num_heads=1): """forward Args: q (Variable): shape = [batch_size, seq_len1, hidden_size_q] k (Variable): shape = [batch_size, seq_len2, hidden_size_k] v (Variable): shape = [batch_size, seq_len2, hidden_size_v] mask (Variable): lens of k and v. Default is None num_heads (int): currently only support 1. Default is 1 Returns: TODO Raises: NULL """ q_shape = q.shape if len(q_shape) == 2: q = layers.unsqueeze(q, [1]) if v is None: v = k if self._score_type == 'dot_prod': # [batch_size, q_lens, k_lens] attn_score = layers.matmul(q, k, transpose_y=True) elif self._score_type == 'affine': k_tmp = layers.fc(k, size=q.shape[2], num_flatten_dims=2, **nn_utils.param_attr(self._name, self._init_scale, need_bias=True)) attn_score = layers.matmul(q, k_tmp, transpose_y=True) elif self._score_type == 'std': if self._hidden_size <= 0: raise ValueError("hidden_size should greater than 0") q_tmp = layers.fc(q, size=self._hidden_size, num_flatten_dims=2, **nn_utils.param_attr(self._name + '_q', self._init_scale, need_bias=True)) k_tmp = layers.fc(k, size=self._hidden_size, num_flatten_dims=2, **nn_utils.param_attr(self._name + '_k', self._init_scale, need_bias=True)) # shape = [batch_size, seq_len1, seq_len2, hidden_size] q_tmp_expand = layers.expand(layers.unsqueeze(q_tmp, [2]), [1, 1, v.shape[1], 1]) # shape = [batch_size, 1, seq_len2, hidden_size] k_tmp_expand = layers.unsqueeze(k_tmp, [1]) attn_score = layers.fc(layers.elementwise_add(q_tmp_expand, k_tmp_expand, act='tanh'), size=1, num_flatten_dims=3, **nn_utils.param_attr(self._name + '_w', self._init_scale, need_bias=True)) attn_score = layers.squeeze(attn_score, [3]) else: raise RuntimeError( 'Supported score types: dot_prod/affine/std. but got %s' % (self._score_type)) if padding_mask is not None: attn_for_mask = layers.transpose(attn_score, [1, 0, 2]) attn_score_masked = layers.elementwise_add(attn_for_mask, padding_mask * INF, axis=-1) attn_score = layers.transpose(attn_score_masked, [1, 0, 2]) weight = layers.softmax(attn_score) attn = layers.matmul(weight, v) if len(q_shape) == 2: attn = layers.squeeze(attn, [1]) return attn