Exemplo n.º 1
0
    def calc_multi_head_similarity_fn(h, u, h_mask, u_mask, scope=None, num_units=None):
        if num_units == None:
            num_units = h.get_shape().as_list()[-1]
        # u_mask = tf.to_float(u_mask)
        # h_mask = tf.to_float(h_mask)
        with tf.variable_scope(scope or 'multi_head_attention'):
            # Linear projections
            Q = F(u, num_units, activation=tf.identity, input_keep_prob=input_keep_prob, wd=cfg.wd, is_train=is_train, scope='Q') # (N, T_q, C)
            K = F(h, num_units, activation=tf.identity, input_keep_prob=input_keep_prob, wd=cfg.wd, is_train=is_train, scope='K') # (N, T_k, C)
            V = F(h, num_units, activation=tf.identity, input_keep_prob=input_keep_prob, wd=cfg.wd, is_train=is_train, scope='V') # (N, T_k, C)
            
            # Split and concat
            Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 
            K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 
            V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 

            # Multiplication
            outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k)
            
            # Scale
            outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
            aug_V = tf.tile(tf.expand_dims(V_, 1), [1, tf.shape(Q_)[1], 1, 1]) # [N*h, T_q, T_k, C/h]
            mask = tf.tile(tf.expand_dims(h_mask, 1), [num_heads, tf.shape(Q_)[1], 1]) \
                        & tf.tile(tf.expand_dims(u_mask, 2), [num_heads, 1, tf.shape(K_)[1]]) # [N*h, T_q, T_k]
            outputs = softsel(aug_V, outputs, mask=mask)

            # Key Masking
            # key_masks = tf.sign(tf.abs(u_mask, axis=-1)) # (N, T_k)
            
            #key_masks = tf.tile(h_mask, [num_heads, 1]) # (h*N, T_k)
            #key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(h)[1], 1]) # (h*N, T_q, T_k)
            
            #paddings = tf.ones_like(outputs)*(-2**32+1)
            #outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # (h*N, T_q, T_k)
            # outputs = exp_mask(outputs, key_masks, scope='outputs')

            #outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k)
             
            # Query Masking
            # query_masks = tf.sign(tf.abs(h_mask, axis=-1)) # (N, T_q)
            #query_masks = tf.tile(u_mask, [num_heads, 1]) # (h*N, T_q)
            #query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(h_mask)[1]]) # (h*N, T_q, T_k)
            #outputs *= query_masks # broadcasting. (N, T_q, C)
              
            # Dropouts
            #outputs = tf.layers.dropout(outputs, rate=1-keep_rate, training=is_train)
                   
            # Weighted sum
            #outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h)
            
            # Restore shape
            outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, C)
                  
            # Residual connection
            # tf.assert_equal(tf.shape(outputs)[1], tf.shape(h)[1])
            #outputs = outputs + h
            
            # Normalize
            # outputs = normalize(outputs) # (N, T_q, C)
            return outputs
Exemplo n.º 2
0
 def __call__(self, inputs, state, scope=None):
     with tf.variable_scope(scope or "AttentionCell"):
         memory_logits = self._controller(inputs, state, self._flat_memory)
         sel_mem = softsel(self._flat_memory,
                           memory_logits,
                           mask=self._flat_mask)  # [N, m]
         new_inputs, new_state = self._mapper(inputs, state, sel_mem)
         return self._cell(new_inputs, state)
Exemplo n.º 3
0
def self_attention(config, is_train, p, p_mask, scope=None): #[N, L, 2d]
    with tf.variable_scope(scope or "self_attention"):
        PL = tf.shape(p)[1]
        d = p.get_shape().as_list()[-1]
        p_aug_1 = tf.tile(tf.expand_dims(p, 2), [1, 1, PL, 1])
        p_aug_2 = tf.tile(tf.expand_dims(p, 1), [1, PL, 1, 1]) #[N, PL, HL, 2d]

        p_mask_aug_1 = tf.tile(tf.expand_dims(p_mask, 2), [1, 1, PL])
        p_mask_aug_2 = tf.tile(tf.expand_dims(p_mask, 1), [1, PL, 1])
        self_mask = p_mask_aug_1 & p_mask_aug_2

        h_logits = get_logits([p_aug_1, p_aug_2], None, True, wd=config.wd, mask=self_mask,
                              is_train=is_train, func=config.att_func, scope='h_logits')  # [N, PL, HL]
        self_att = softsel(p_aug_2, h_logits, mask=self_mask, scope='self_att') 

        return self_att
Exemplo n.º 4
0
 def calc_multi_perspective_similarity_fn(h, u, u_f, h_mask, u_mask, num_perspectives=2, keep_rate=1.0, scope=None):
     with tf.variable_scope(scope or 'multi_perspective'):
         N, JX, JQ = tf.shape(h)[0], tf.shape(h)[1], tf.shape(u)[1]
         d = h.get_shape().as_list()[-1]
         l = num_perspectives
         h_u_mask = tf.logical_and(tf.tile(tf.expand_dims(h_mask, -1), [1, 1, JQ]), 
                                     tf.tile(tf.expand_dims(u_mask, 1), [1, JX, 1])) # [N, JX, JQ]
         h1 = match_fn(h, tf.expand_dims(u_f, 1), num_perspectives=num_perspectives, scope='h1')
         h1 = tf.reshape(h1, [N, JX, l])
         h2 = match_fn(h, u, num_perspectives=num_perspectives, scope='h2') # [N, JX, JQ, l]
         h2 = tf.reduce_max(exp_mask(h2, tf.tile(tf.expand_dims(h_u_mask, 3), [1, 1, 1, l])), 2) # [N, JX, l]
         h_u_similarity = calc_similarity_fn(h, u, logit_type='dot', scope='h_u_similarity') # [N, JX, JQ]
         aug_u = tf.tile(tf.expand_dims(u, 1), [1, JX, 1, 1]) # [N, JX, JQ, d]
         u_mean = softsel(aug_u, h_u_similarity, mask=h_u_mask, scope='u_mean') # [N, JX, d]
         h3 = match_fn(tf.reshape(h, [-1, 1, d]), tf.reshape(u_mean, [-1, 1, d]), num_perspectives=num_perspectives, scope='h3')
         h3 = tf.reshape(h3, [N, JX, l])
         max_h_u_similarity = tf.argmax(h_u_similarity, axis=2) # [N, JX, 1]
         max_h_u_similarity = tf.one_hot(max_h_u_similarity, JQ, dtype='float') # [N, JX, JQ]
         u_max_mean = tf.reduce_sum(tf.tile(tf.expand_dims(max_h_u_similarity, 3), [1, 1, 1, d]) * aug_u, 2) # [N, JX, d]
         h4 = match_fn(tf.reshape(h, [-1, 1, d]), tf.reshape(u_mean, [-1, 1, d]), num_perspectives=num_perspectives, scope='h4')
         h4 = tf.reshape(h4, [N, JX, l])
         out = tf.concat([h1, h2, h3, h4], 2) # [N, JX, 4*l]
         return out
    def build_network(self):
        config = self.config

        if config.debug:
            self.tensor_dict = {}

        VW, dw, d = config.n_voc, config.emb_dim, config.hidden_dim
        N, M, JX = tf.shape(self.docs)[0], tf.shape(self.docs)[1], tf.shape(self.docs)[2]
        
        with tf.variable_scope("emb"):
            with tf.variable_scope('emb_var'), tf.device('/cpu:0'):
                word_emb_mat = tf.get_variable('word_emb_mat', shape=[VW, dw], dtype='float', 
                                                initializer=get_initializer(config.embeddings), 
                                                trainable=config.tune_embedding)
                docs_emb = tf.nn.embedding_lookup(word_emb_mat, self.docs) # [N, M, JX, d]

        if config.debug:
            self.tensor_dict['docs_emb'] = docs_emb
                
        wordmask = tf.reshape(self.wordmask, [N*M, JX])
        length = tf.reduce_sum(tf.to_int32(wordmask), 1) # [N*M]
        sentencenum = tf.reduce_sum(tf.to_int32(self.sentencemask), 1) # [N]

        docs_emb = tf.reshape(docs_emb, [N*M, JX, dw])
        
        with tf.variable_scope("wordlstm"):
            document, _ = rnn(config.rnn_type, docs_emb, length, d, 
                                scope='document', 
                                dropout_keep_prob=config.input_keep_prob, 
                                wd=config.wd, 
                                is_train=self.is_train)  # [N*M, JX, 2*d]
        if config.debug:
            self.tensor_dict['document'] = document
                

        with tf.variable_scope("pooling_layer"):
            logits = linear(document, 1, True, scope='logits', squeeze=True, 
                            input_keep_prob=config.input_keep_prob, 
                            wd=config.wd, is_train=self.is_train) # [N*M, JX]
            
            document = softsel(document, logits, wordmask, scope='document')
            document = tf.reshape(document, [N, M, 2*d])

        with tf.variable_scope('sentencelstm'):
            document, _ = rnn(config.rnn_type, document, sentencenum, d, 
                                scope='document', 
                                dropout_keep_prob=config.input_keep_prob, 
                                wd=config.wd, 
                                is_train=self.is_train) # [N, M, 2d]

        with tf.variable_scope("pooling_layer2"):
            logits = linear(document, 1, True, scope='logits', squeeze=True, 
                            input_keep_prob=config.input_keep_prob, 
                            wd=config.wd, is_train=self.is_train) # [N, M]

            document = softsel(document, logits, self.sentencemask, 'document') # [N, d]
        
        with tf.variable_scope("hidden_layer"):
            d = document.get_shape().as_list()[-1]
            document = F(document, d, activation=tf.nn.tanh, scope='document', 
                        input_keep_prob=config.input_keep_prob, 
                        wd=config.wd, is_train=self.is_train)
        
        with tf.variable_scope("logits_layer"):
            logits = linear(document, config.n_classes, True, 
                            wd=config.wd, scope='logits') # [N, n_classes]
            
        self.logits = logits
Exemplo n.º 6
0
def basic_block(config, p, h, p_mask, h_mask, input_keep_prob=1.0, is_train=None, scope=None):
    N, JX, JQ = tf.shape(p)[0], tf.shape(p)[1], tf.shape(h)[1]
    d = p.get_shape().as_list()[-1]
    aug_p_mask = tf.tile(tf.expand_dims(p_mask, 2), [1, 1, JQ])
    aug_h_mask = tf.tile(tf.expand_dims(h_mask, 1), [1, JX, 1])
    mask = aug_p_mask & aug_h_mask # [N, JX, JQ]
    with tf.variable_scope(scope or "basic_block"):
        with tf.variable_scope("att"):
            aug_p = tf.tile(tf.expand_dims(p, 2), [1, 1, JQ, 1]) # [N, JX, JQ, d]
            aug_h = tf.tile(tf.expand_dims(h, 1), [1, JX, 1, 1]) # [N, JX, JQ, d]
            
            similarity = get_logits([aug_p, aug_h], None, True, 
                                    wd=config.wd, mask=mask, 
                                    is_train=is_train, 
                                    func=config.att_func, 
                                    scope='similarity') # [N, JX, JQ]

            h_att = softsel(aug_h, similarity, mask=mask, scope='h_att') # [N, JX, d]
            p_att = softsel(tf.transpose(aug_p, [0, 2, 1, 3]), # [N, JQ, JX, d]
                            tf.transpose(similarity, [0, 2, 1]), 
                            mask=tf.transpose(mask, [0, 2, 1]), 
                            scope='p_att') # [N, JQ, d]

        with tf.variable_scope("self_att"):
            p = self_attention_layer(config, is_train, p, p_mask=p_mask, scope="self_attention") # [N, JX, d]    
            tf.get_variable_scope().reuse_variables()
            h = self_attention_layer(config, is_train, h, p_mask=h_mask, scope="self_attention") # [N, JQ, d]    
        
        p = tf.concat([p * h_att, tf.abs(p - h_att)], -1) # [N, JX, d]
        h = tf.concat([h * p_att, tf.abs(h - p_att)], -1) # [N, JQ, d]

        with tf.variable_scope("projecion"):
            p = F(p, d, activation=tf.nn.relu, scope='p', input_keep_prob=config.input_keep_prob, is_train=is_train, wd=config.wd)
            tf.get_variable_scope().reuse_variables()
            h = F(h, d, activation=tf.nn.relu, scope='p', input_keep_prob=config.input_keep_prob, is_train=is_train)

        with tf.variable_scope("rnn"):
            p_len = tf.reduce_sum(tf.cast(p_mask, tf.int32), 1)
            h_len = tf.reduce_sum(tf.cast(h_mask, tf.int32), 1)
            
            ph, _ = rnn(config.rnn_type, tf.concat([p, h], 0), p_len, config.hidden_size, scope='p_rnn', 
                    dropout_keep_prob=config.input_keep_prob, is_train=is_train, wd=config.wd)
            p, h = tf.split(ph, 2)

            # p, _ = rnn(config.rnn_type, p, p_len, config.hidden_size, scope='p_rnn', 
            #             dropout_keep_prob=config.input_keep_prob, is_train=is_train, wd=config.wd)
            # tf.get_variable_scope().reuse_variables()
            # h, _ = rnn(config.rnn_type, h, h_len, config.hidden_size, scope='p_rnn', 
            #             dropout_keep_prob=config.input_keep_prob, is_train=is_train)
            p = p * tf.to_float(tf.expand_dims(p_mask, -1)) # [N, JX, d]
            h = h * tf.to_float(tf.expand_dims(h_mask, -1)) # [N, JQ, d]

        p_max = tf.reduce_max(p, 1)
        h_max = tf.reduce_max(h, 1)

        attentive = True
        if attentive:
            p_att_logits = linear(p, 1, True, scope='p_att_logits', squeeze=True, 
                                input_keep_prob=config.input_keep_prob, is_train=is_train, wd=config.wd)
            p_att = softsel(p, p_att_logits, mask=p_mask, scope='p_att')
            tf.get_variable_scope().reuse_variables()
            h_att_logits = linear(h, 1, True, scope='p_att_logits', squeeze=True, 
                                input_keep_prob=config.input_keep_prob, is_train=is_train)
            h_att = softsel(h, h_att_logits, mask=h_mask, scope='h_att')

            p_max = tf.concat([p_max, p_att], -1)
            h_max = tf.concat([h_max, h_att], -1)

    return p, h, p_max, h_max