def coattention_layer(seq1, seq1_length, seq2, seq2_length, attn_type='diagonal_bilinear', scaled=True, with_sentinel=False, seq2_to_seq1=None, num_layers=1, encoder=None, **kwargs): """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention.""" if attn_type == 'bilinear': attn_fun = attention.bilinear_attention elif attn_type == 'dot': attn_fun = attention.dot_attention elif attn_type == 'diagonal_bilinear': attn_fun = attention.diagonal_bilinear_attention else: raise ValueError("Unknown attention type: %s" % attn_type) _, _, attn_states1, attn_states2, co_attn_state = attention.coattention( seq1, seq1_length, seq2, seq2_length, scaled, with_sentinel, attn_fun) if num_layers < 2: out = tf.concat([attn_states1, co_attn_state], 2) else: seq1, attn_states1, attn_states2, co_attn_state = [], [attn_states1], [ attn_states2 ], [co_attn_state] for i in range(1, num_layers): with tf.variable_scope(str(i)): enc_1 = sequence_encoder.encoder(attn_states1[-1], seq1_length, name='encoder1', **encoder) enc_2 = sequence_encoder.encoder(attn_states2[-1], seq2_length, name='encoder2', **encoder) seq1.append(enc_1) _, _, new_attn_states1, new_attn_states2, new_co_attn_state = attention.coattention( enc_1, seq1_length, enc_2, seq2_length, scaled, with_sentinel, attn_fun, seq2_to_seq1=seq2_to_seq1) attn_states1.append(new_attn_states1) attn_states2.append(new_attn_states2) co_attn_state.append(new_co_attn_state) out = tf.concat(seq1 + attn_states1 + co_attn_state, 2) return out
def answer_layer(encoded_question, question_length, encoded_support, support_length, support2question, answer2support, is_eval, correct_start=None, topk=1, max_span_size=10000, encoder=None, module='bilinear', repr_dim=100, **kwargs): if module == 'bilinear': return bilinear_answer_layer( repr_dim, encoded_question, question_length, encoded_support, support_length, support2question, answer2support, is_eval, topk, max_span_size) elif module == 'mlp': return mlp_answer_layer(repr_dim, encoded_question, question_length, encoded_support, support_length, support2question, answer2support, is_eval, topk, max_span_size) elif module == 'conditional': return conditional_answer_layer( repr_dim, encoded_question, question_length, encoded_support, support_length, correct_start, support2question, answer2support, is_eval, topk, max_span_size) elif module == 'conditional_bilinear': return conditional_answer_layer( repr_dim, encoded_question, question_length, encoded_support, support_length, correct_start, support2question, answer2support, is_eval, topk, max_span_size, bilinear=True) elif module == 'san': return san_answer_layer( repr_dim, encoded_question, question_length, encoded_support, support_length, support2question, answer2support, is_eval, topk, max_span_size, **kwargs) elif module == 'bidaf': if 'repr_dim' not in encoder: encoder['repr_dim'] = repr_dim encoded_support_end = sequence_encoder.encoder( encoded_support, support_length, name='encoded_support_end', **encoder) encoded_support_end = tf.concat([encoded_support, encoded_support_end], 2) return bidaf_answer_layer(encoded_support, encoded_support_end, support_length, support2question, answer2support, is_eval, topk=1, max_span_size=10000) else: raise ValueError("Unknown answer layer type: %s" % module)
def encode_module(module): module_type = module['module'] reuse = module['name'] in seen_layer seen_layer.add(module['name']) if module_type == 'repeat': reuse = module.get('reuse') for k in range(module['num']): prefix = module['name'] + '/' if reuse else '%s_%d/' % ( module['name'], k) for j, inner_module in enumerate(module['encoder']): # copy this configuration inner_module = copy.deepcopy(inner_module) if 'name' not in inner_module: inner_module['name'] = _unique_module_name( inner_module, j) inner_module['name'] = prefix + inner_module['name'] encode_module(inner_module) return try: key = module['input'] out_key = module.get('output', key) if module['module'] in [ 'concat', 'add', 'mul', 'weighted_add', 'sub' ]: outputs_length[out_key] = outputs_length[key[0]] outputs_mapping[out_key] = outputs_mapping.get(key[0]) if module['module'] == 'concat': outputs[out_key] = tf.concat([outputs[k] for k in key], 2, name=module['name']) return if module['module'] == 'add': outputs[out_key] = tf.add_n([outputs[k] for k in key], name=module['name']) return if module['module'] == 'sub': outputs[out_key] = tf.subtract(outputs[key[0]], outputs[key[1]], name=module['name']) return if module['module'] == 'mul': o = outputs[key[0]] for k in key[1:-1]: o *= outputs[k] outputs[out_key] = tf.multiply(o, outputs[key[-1]], name=module['name']) return if module['module'] == 'weighted_add': bias = module.get('bias', 0.0) g = tf.layers.dense( tf.concat([outputs[k] for k in key], 2), outputs[key[0]].get_shape()[-1].value, tf.sigmoid, bias_initializer=tf.constant_initializer(bias)) outputs[out_key] = tf.identity(g * outputs[key[0]] + (1.0 - g) * outputs[key[0]], name=module['name']) return if 'repr_dim' not in module: module['repr_dim'] = default_repr_dim if 'dependent' in module: dep_key = module['dependent'] outputs[out_key] = interaction_layer( outputs[key], outputs_length[key], outputs[dep_key], outputs_length[dep_key], outputs_mapping.get(key), outputs_mapping.get(dep_key), reuse=reuse, **module) else: if module.get('dropout') is True: # set dropout to default dropout module['dropout'] = dropout outputs[out_key] = encoder(outputs[key], outputs_length[key], reuse=reuse, is_eval=is_eval, **module) outputs_length[out_key] = outputs_length[key] outputs_mapping[out_key] = outputs_mapping.get(key) except Exception as e: logger.error('Creating module %s failed.', module['name']) raise e
def create_output(self, shared_resources, input_tensors): tensors = TensorPortTensors(input_tensors) with tf.variable_scope( "fast_qa", initializer=tf.contrib.layers.xavier_initializer()): # Some helpers batch_size = tf.shape(tensors.question_length)[0] max_question_length = tf.reduce_max(tensors.question_length) support_mask = misc.mask_for_lengths(tensors.support_length) input_size = shared_resources.embeddings.shape[-1] size = shared_resources.config["repr_dim"] with_char_embeddings = shared_resources.config.get( "with_char_embeddings", False) # set shapes for inputs tensors.emb_question.set_shape([None, None, input_size]) tensors.emb_support.set_shape([None, None, input_size]) emb_question = tensors.emb_question emb_support = tensors.emb_support if with_char_embeddings: # compute combined embeddings [char_emb_question, char_emb_support ] = conv_char_embedding(len(shared_resources.char_vocab), size, tensors.word_chars, tensors.word_char_length, [ tensors.question_batch_words, tensors.support_batch_words ]) emb_question = tf.concat([emb_question, char_emb_question], 2) emb_support = tf.concat([emb_support, char_emb_support], 2) input_size += size # set shapes for inputs emb_question.set_shape([None, None, input_size]) emb_support.set_shape([None, None, input_size]) # compute encoder features question_features = tf.ones( tf.stack([batch_size, max_question_length, 2])) v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size], initializer=tf.constant_initializer(1.0)) wiq_w = tf.matmul(tf.gather(emb_question * v_wiqw, tensors.support2question), emb_support, adjoint_b=True) wiq_w = wiq_w + tf.expand_dims(support_mask, 1) question_binary_mask = tf.gather( tf.sequence_mask(tensors.question_length, dtype=tf.float32), tensors.support2question) wiq_w = tf.reduce_sum( tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2), [1]) # [B, L , 2] support_features = tf.stack([tensors.word_in_question, wiq_w], 2) # highway layer to allow for interaction between concatenated embeddings if with_char_embeddings: with tf.variable_scope("char_embeddings") as vs: emb_question = tf.layers.dense( emb_question, size, name="embeddings_projection") emb_question = highway_network(emb_question, 1) vs.reuse_variables() emb_support = tf.layers.dense(emb_support, size, name="embeddings_projection") emb_support = highway_network(emb_support, 1) keep_prob = 1.0 - shared_resources.config.get("dropout", 0.0) emb_question, emb_support = tf.cond( tensors.is_eval, lambda: (emb_question, emb_support), lambda: (tf.nn.dropout(emb_question, keep_prob, noise_shape= [1, 1, emb_question.get_shape()[-1].value]), tf.nn.dropout(emb_support, keep_prob, noise_shape= [1, 1, emb_question.get_shape()[-1].value]))) # extend embeddings with features emb_question_ext = tf.concat([emb_question, question_features], 2) emb_support_ext = tf.concat([emb_support, support_features], 2) # encode question and support encoder_type = shared_resources.config.get('encoder', 'lstm').lower() if encoder_type in ['lstm', 'sru', 'gru']: size = size + 2 if encoder_type == 'sru' else size # to allow for use of residual in SRU encoded_question = encoder(emb_question_ext, tensors.question_length, size, module=encoder_type) encoded_support = encoder(emb_support_ext, tensors.support_length, size, module=encoder_type, reuse=True) projection_initializer = tf.constant_initializer( np.concatenate([np.eye(size), np.eye(size)])) encoded_question = tf.layers.dense( encoded_question, size, tf.tanh, use_bias=False, kernel_initializer=projection_initializer, name='projection_q') encoded_support = tf.layers.dense( encoded_support, size, tf.tanh, use_bias=False, kernel_initializer=projection_initializer, name='projection_s') else: raise ValueError( "Only rnn ('lstm', 'sru', 'gru') encoder allowed for FastQA!" ) answer_layer = shared_resources.config.get('answer_layer', 'conditional').lower() topk = tf.get_variable('topk', initializer=shared_resources.config.get( 'topk', 1), dtype=tf.int32, trainable=False) topk_p = tf.placeholder(tf.int32, [], 'beam_size_setter') topk_assign = topk.assign(topk_p) self._topk_assign = lambda k: self.tf_session.run( topk_assign, {topk_p: k}) if answer_layer == 'conditional': start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \ conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support, tensors.support_length, tensors.correct_start, tensors.support2question, tensors.answer2support, tensors.is_eval, topk=topk, max_span_size=shared_resources.config.get("max_span_size", 10000)) elif answer_layer == 'conditional_bilinear': start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \ conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support, tensors.support_length, tensors.correct_start, tensors.support2question, tensors.answer2support, tensors.is_eval, topk=topk, max_span_size=shared_resources.config.get("max_span_size", 10000), bilinear=True) elif answer_layer == 'bilinear': start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \ bilinear_answer_layer(size, encoded_question, tensors.question_length, encoded_support, tensors.support_length, tensors.support2question, tensors.answer2support, tensors.is_eval, topk=topk, max_span_size=shared_resources.config.get("max_span_size", 10000)) else: raise ValueError span = tf.stack( [doc_idx, predicted_start_pointer, predicted_end_pointer], 1) return TensorPort.to_mapping(self.output_ports, (start_scores, end_scores, span))