Exemplo n.º 1
0
def att_match(mid, pat, mid_mask, pat_mask, hidden, keep_prob, is_train):
    mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train)
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_a = attention(mid_d, hidden, mask=mid_mask)
    pat_a = attention(pat_d, hidden, mask=pat_mask)
    mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1)
    pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1)
    pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1)
    sur_sim = cosine(mid_v, pat_v_d)
    pat_sim = cosine(pat_v, pat_v_d)
    return sur_sim, pat_sim
Exemplo n.º 2
0
def mean_match(mid, pat, mid_mask, pat_mask, keep_prob, is_train):
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_v = mean(mid, mask=mid_mask)
    pat_v = mean(pat, mask=pat_mask)
    pat_v_d = mean(pat_d, mask=pat_mask)
    sur_sim = cosine(mid_v, pat_v, weighted=False)
    pat_sim = cosine(pat_v, pat_v_d, weighted=False)
    return sur_sim, pat_sim
Exemplo n.º 3
0
def lstm_match(mid, pat, mid_mask, pat_mask, mid_len, pat_len, hidden,
               keep_prob, is_train):

    rnn = Cudnn_RNN(num_layers=1, num_units=hidden // 2)
    mid, _ = rnn(mid, seq_len=mid_len, concat_layers=False)
    pat, _ = rnn(pat, seq_len=pat_len, concat_layers=False)

    mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train)
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_a = attention(mid_d, hidden, mask=mid_mask)
    pat_a = attention(pat_d, hidden, mask=pat_mask)

    mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1)
    pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1)
    pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1)
    sur_sim = cosine(mid_v, pat_v_d)
    pat_sim = cosine(pat_v, pat_v_d)
    return sur_sim, pat_sim
Exemplo n.º 4
0
 def __init__(self,
              cell,
              num_layers,
              num_units,
              batch_size,
              input_size,
              keep_prob=1.0,
              is_train=None,
              scope="native_rnn"):
     self.num_layers = num_layers
     self.cell_type = cell
     self.inits = []
     self.dropout_mask = []
     self.num_units = num_units
     self.scope = scope
     for layer in range(num_layers):
         input_size_ = input_size if layer == 0 else 2 * num_units
         init_fw = rnn.get_cell(cell, num_units).get_init_state(
             shape=[batch_size], scope="fw_{}".format(layer))
         init_bw = rnn.get_cell(cell, num_units).get_init_state(
             shape=[batch_size], scope="bw_{}".format(layer))
         mask_fw = dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode=None)
         mask_bw = dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode=None)
         self.inits.append((
             init_fw,
             init_bw,
         ))
         self.dropout_mask.append((
             mask_fw,
             mask_bw,
         ))
Exemplo n.º 5
0
    def ready(self):
        config = self.config
        x, senti, neg_senti, negation = self.x, self.senti, self.neg_senti, self.negation

        word_mat, op_word_mat = self.word_mat, self.op_word_mat

        score_scale = config.score_scale

        with tf.variable_scope("encoder"):
            x = tf.nn.embedding_lookup(word_mat, x)
            x = tf.expand_dims(x, -1)
            x = dropout(x, keep_prob=config.keep_prob, is_train=self.is_train)

            pooled_outputs = []
            for f_size in self.filter_sizes:
                conv = tf.layers.conv2d(x,
                                        filters=self.num_filters,
                                        kernel_size=[f_size, self.emb_dim],
                                        strides=(1, 1),
                                        padding='VALID',
                                        activation=tf.nn.relu)

                pool = tf.layers.max_pooling2d(
                    conv,
                    pool_size=[config.max_len - f_size + 1, 1],
                    strides=(1, 1),
                    padding='VALID')

                pooled_outputs.append(pool)

            h_pool = tf.concat(pooled_outputs, 3)
            h_pool_flat = tf.reshape(
                h_pool, [-1, self.num_filters * len(self.filter_sizes)])
            h_drop = dropout(h_pool_flat,
                             config.keep_prob,
                             is_train=self.is_train)

        with tf.variable_scope("predict"):
            logit = tf.layers.dense(h_drop,
                                    config.score_scale,
                                    activation=None)
            self.prob = tf.nn.softmax(logit)
            self.pred = tf.argmax(self.prob, axis=-1)
            self.golden = self.y
            self.loss = tf.reduce_mean(
                tf.reduce_sum(-self.golden * tf.log(self.prob + 1e-6), axis=1))

        with tf.variable_scope("decoder"):
            senti_emb = tf.nn.embedding_lookup(op_word_mat, senti)
            self.senti_emb = senti_emb

            neg_senti_emb = tf.nn.embedding_lookup(op_word_mat, neg_senti)
            self.neg_senti_emb = neg_senti_emb

            self.vae_loss, entropy_term, self.W_decoder, self.u, self.u_neg_sample, self.log_u, self.log_u_neg_sample = selectional_preference(
                senti_emb, neg_senti_emb, negation, self.prob, score_scale)
            self.entropy_term_loss = tf.multiply(self.alpha,
                                                 entropy_term,
                                                 name="entropy_term")

            opinion_reg, self.similarity, self.b_x_b_mean, self.b_x_b_min, self.b_x_b_max = get_regularizer_score_pairwise(
                config, self.prob, senti_emb, negation)
            self.opinion_reg_loss = tf.multiply(
                self.beta, opinion_reg, name="opinion_words_regulazation")
Exemplo n.º 6
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len) #[10, ?,300]

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("content_modeling"):

            logits4, c_semantics = content_model(init, match, config.hidden)

        with tf.variable_scope("cross_passage_attention"):
            self.query_num = int(config.batch_size/config.passage_num)
            c_semantics = tf.reshape(c_semantics, shape=[self.query_num, config.passage_num, -1])
            attnc_key = tf.tile(tf.expand_dims(c_semantics, axis=2), [1, 1, config.passage_num, 1])
            attnc_mem = tf.tile(tf.expand_dims(c_semantics, axis=1), [1, config.passage_num, 1, 1])
            attnc_w = tf.reduce_sum(attnc_key*attnc_mem, axis=-1)
            attnc_mask = tf.ones([config.passage_num, config.passage_num])-tf.diag([1.0]*config.passage_num)
            attnc_w = tf.nn.softmax(attnc_w*attnc_mask, axis=-1)
            attncp = tf.reduce_sum(tf.tile(tf.expand_dims(attnc_w, axis=-1), [1, 1, 1, 2*config.hidden])*attnc_mem, axis= 2)
        
        
        with tf.variable_scope("pseudo_label"):
            self.is_select = tf.reshape(tf.squeeze(self.is_select), shape=[self.query_num, config.passage_num])
            self.is_select = self.is_select/tf.tile(tf.reduce_sum(self.is_select, axis=-1, keepdims=True), [1, config.passage_num])
            sim_matrix = attnc_w
            lb_matrix = tf.tile(tf.expand_dims(self.is_select, axis=1), [1, config.passage_num, 1])
            self.pse_is_select = tf.reduce_sum(sim_matrix*lb_matrix, axis=-1) + tf.constant([0.00000001]*config.passage_num, dtype=tf.float32)    # avoid all zero
            self.pse_is_select = self.pse_is_select/tf.tile(tf.reduce_sum(self.pse_is_select, axis=-1, keepdims=True), [1,config.passage_num])
            alpha = 0.7
            self.fuse_label = alpha*self.is_select + (1-alpha)*tf.stop_gradient(self.pse_is_select)
        

        with tf.variable_scope("predict_passage"):
            init = tf.reshape(init, shape=[self.query_num, config.passage_num, -1])
            attn_concat = tf.concat([init, attncp, c_semantics], axis=-1)
            d1 = tf.layers.dense(attn_concat, 2*config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #150
            d2 = tf.layers.dense(d1, config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #75
            logits3 = tf.squeeze(tf.layers.dense(d2, 1, activation= None, bias_initializer= tf.glorot_uniform_initializer()))
        
        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 30)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            #logits3 = tf.reduce_max(tf.reduce_max(outer, axis=2), axis=1)
            self.is_select_p = tf.nn.sigmoid(logits3)

            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
           
            weighted_losses = weighted_loss(config, 0.000001, self.y1, losses) #0.01
            weighted_losses2 = weighted_loss(config, 0.000001, self.y2, losses2) #0.01
            
            losses3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits3, labels=tf.stop_gradient(self.fuse_label)))
            
            in_answer_weight = tf.ones_like(self.in_answer) + 3*self.in_answer
            
            losses4 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits4, labels=tf.stop_gradient(self.in_answer))*in_answer_weight, axis=-1)

            weighted_losses4 = weighted_loss(config, 0.000001, self.in_answer, losses4)
            
            self.loss_dict = {'pos_s loss':losses, 'pos_e loss':losses2, 'select loss':losses3, 'in answer':losses4}
            for key, values in self.loss_dict.items():
                self.loss_dict[key] = tf.reduce_mean(values)
            
            self.loss = tf.reduce_mean(weighted_losses + weighted_losses2 + losses3+ weighted_losses4)
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        gi = []
        att_vP = []

        for i in range(config.max_para):
            print(i)
            with tf.variable_scope("emb" + str(i)):
                with tf.variable_scope("char" + str(i)):
                    #CL = tf.Print(CL,[CL],message="CL:")
                    #PL = tf.Print(PL,[PL],message="PL:")
                    #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr.get_shape()],message="ch_pr:")
                    self.ch_pr_ = self.ch_pr[:, i * 400:(i + 1) * 400, :]
                    print(self.ch_pr_.get_shape())
                    #self.c_pr = tf.reshape(self.c_pr, [N, 12, PL])
                    #print(self.ch.get_shape())
                    #print(self.ch_pr.get_shape())
                    #print(self.c.get_shape())
                    #print(self.c_pr.get_shape())
                    #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr[:,2:,:]],message="ch_pr")
                    ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                     self.char_mat, self.ch_pr_), [N * PL, CL, dc])
                    #	self.char_mat, self.ch), [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
                    #qh_emb = tf.Print(qh_emb,[qh_emb],message="qh_emb")
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    #state_fw = tf.Print(state_fw,[state_fw],message="state_fw")
                    #state_bw = tf.Print(state_bw,[state_bw],message="state_bw")
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
                    #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
                with tf.name_scope("word" + str(i)):
                    c_emb = tf.nn.embedding_lookup(
                        self.word_mat, self.c_pr[:, i * 400:(i + 1) * 400])
                    q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            with tf.variable_scope("encoding" + str(i)):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=c_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                c = rnn(c_emb, seq_len=self.c_len)
                q = rnn(q_emb, seq_len=self.q_len)

            with tf.variable_scope("attention" + str(i)):
                qc_att = dot_attention(c,
                                       q,
                                       mask=self.q_mask,
                                       hidden=d,
                                       keep_prob=config.keep_prob,
                                       is_train=self.is_train)
                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=qc_att.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                att = rnn(qc_att, seq_len=self.c_len)
                # att is the v_P
                if i == 0:
                    att_vP = att
                else:
                    att_vP = tf.concat([att_vP, att], axis=1)
                #att = tf.Print(att,[att],message="att:")
                print("att:", att.get_shape().as_list())
                print("att_vP:", att_vP.get_shape().as_list())
            #att_vP = tf.Print(att_vP,[tf.shape(att_vP)],message="att_vP:")
            """
			with tf.variable_scope("match"):
				self_att = dot_attention(
					att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
				rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				match = rnn(self_att, seq_len=self.c_len)
			"""
        with tf.variable_scope("pointer"):

            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            print("rQ:", init.get_shape().as_list())
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, att, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            #losses1_2 = tf.reduce_mean(losses1_2, axis=0)
            self.loss = tf.reduce_mean(losses + losses2)

            # print losses
            #condition = tf.greater(self.loss, 11)
            #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
            #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        if config.with_passage_ranking:
            gi = None
            for i in range(config.max_para):
                # Passage ranking
                with tf.variable_scope("passage-ranking-attention" + str(i)):

                    #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                    vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                    pr_att = pr_attention(
                        batch=N,
                        hidden=init.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
                    r_P = pr_att(init, vj_P, d, self.c_mask)
                    #r_P = tf.Print(r_P,[r_P],message="r_p")
                    # Wg
                    concatenate = tf.concat([init, r_P], axis=1)
                    g = tf.nn.tanh(
                        dense(concatenate,
                              hidden=d,
                              use_bias=False,
                              scope="g" + str(i)))
                    g_ = dense(g, 1, use_bias=False, scope="g_" + str(i))
                    #g = tf.Print(g,[g],message="g")
                    if i == 0:
                        gi = tf.reshape(g_, [N, 1])
                    else:
                        gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1)
            #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32)
            #self.gi = tf.nn.softmax(gi_)
            #self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
            #			logits=gi_, labels=tf.reshape(self.pr,[-1,1]))
            self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
                logits=gi, labels=self.pr)
            #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
            #	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
            self.pr_loss = tf.reduce_mean(self.losses3)
            #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
            self.r = tf.constant(0.8)
            self.e_loss1 = tf.multiply(self.r, self.loss)
            self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r),
                                       self.pr_loss)
            self.e_loss = tf.add(self.e_loss1, self.e_loss2)
Exemplo n.º 8
0
    def ready(self):
        config = self.config
        x, w_mask, w_len, num_sent, senti, weight, neg_senti = self.x, self.w_mask, self.w_len, self.sent_num, self.senti, self.weight, self.neg_senti
        word_mat, asp_word_mat, query_mat = self.word_mat, self.asp_word_mat, self.query_mat

        num_aspect = self.num_aspect
        score_scale = config.score_scale
        batch = tf.floordiv(tf.shape(x)[0], num_sent)
        query_mat = tf.reshape(query_mat,
                               [config.num_aspects, -1, config.emb_dim])

        with tf.variable_scope("word_level"):
            x = dropout(tf.nn.embedding_lookup(word_mat, x),
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
            x = cudnn_lstm(x, config.hidden // 2, sequence_length=w_len)
            query = tf.tanh(dense(query_mat, config.hidden))

            doc = tf.expand_dims(x, axis=0)
            mask = tf.expand_dims(w_mask, axis=0)

            att = iter_attention(query, doc, mask, hop=config.hop_word)
            att = tf.reshape(att, [
                num_aspect * batch, num_sent, config.hidden * config.hop_word
            ])

        with tf.variable_scope("sent_level"):
            att = dropout(att,
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
            att = cudnn_lstm(att, config.hidden // 2)
            query = tf.tanh(dense(query_mat, config.hidden))

            doc = tf.reshape(att, [num_aspect, batch, num_sent, config.hidden])
            att = iter_attention(query, doc, hop=config.hop_sent)

        with tf.variable_scope("predict"):
            probs = []
            att = dropout(att,
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
            aspects = [config.aspect] if config.unsupervised else list(
                range(num_aspect))
            for i in aspects:
                with tf.variable_scope("aspect_{}".format(i)):
                    probs.append(tf.nn.softmax(dense(att[i], score_scale)))
            self.prob = tf.stack(probs, axis=0)
            self.pred = tf.argmax(self.prob, axis=2)

            self.golden = self.y if config.overall else self.ay
            self.loss = tf.reduce_sum(
                tf.reduce_mean(tf.reduce_sum(-self.golden *
                                             tf.log(self.prob + 1e-6),
                                             axis=2),
                               axis=1))

        with tf.variable_scope("decoder"):
            sent_emb = tf.nn.embedding_lookup(asp_word_mat, senti)
            neg_sent_emb = tf.nn.embedding_lookup(asp_word_mat, neg_senti)
            self.r_loss, self.u_loss = selectional_preference(
                sent_emb,
                neg_sent_emb,
                weight,
                self.prob[0],
                score_scale,
                alpha=config.alpha)
Exemplo n.º 9
0
    def get_vp(self, i):
        config = self.config

        gru = cudnn_gru if config.use_cudnn else native_gru
        opt = True
        MPL = config.single_para_limit

        zero = tf.constant(0)
        i_ = tf.constant(i)
        start = i * MPL
        end = (i + 1) * MPL
        c_pr = self.c_pr[:, start:end]
        ch_pr = self.ch_pr[:, start:end, :]

        # local masks
        c_mask = tf.cast(c_pr, tf.bool)
        q_mask = tf.cast(self.q, tf.bool)
        c_len = tf.reduce_sum(tf.cast(c_mask, tf.int32), axis=1)
        q_len = tf.reduce_sum(tf.cast(q_mask, tf.int32), axis=1)
        """
		### this line will replace the c_len with values 8 as it is some
		# unnecessary padding from the examples which does not have
		# passages with the same number as the max number of passage in the batch
		eight_indexes = tf.not_equal(c_len, tf.constant(8,dtype=tf.int32))
		eight_indexes = tf.cast(eight_indexes,tf.int32)
		c_len = c_len*eight_indexes
		"""

        if opt:
            N, CL = config.batch_size, config.char_limit
            c_maxlen = tf.reduce_max(c_len)
            q_maxlen = tf.reduce_max(q_len)
            c_pr = tf.slice(c_pr, [0, 0], [N, c_maxlen])
            q = tf.slice(self.q, [0, 0], [N, q_maxlen])
            c_mask = tf.slice(c_mask, [0, 0], [N, c_maxlen])
            q_mask = tf.slice(q_mask, [0, 0], [N, q_maxlen])
            ch_pr = tf.slice(ch_pr, [0, 0, 0], [N, c_maxlen, CL])
            qh = tf.slice(self.qh, [0, 0, 0], [N, q_maxlen, CL])
            y1 = tf.slice(self.y1, [0, 0], [N, c_maxlen])
            y2 = tf.slice(self.y2, [0, 0], [N, c_maxlen])

            seq_mask = tf.sequence_mask(c_len, maxlen=c_maxlen)
        else:
            self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

        ch_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(ch_pr, tf.bool), tf.int32), axis=2),
            [-1])
        qh_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(qh, tf.bool), tf.int32), axis=2),
            [-1])

        N, PL, QL, CL, d, dc, dg = config.batch_size, c_maxlen, q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, ch_pr),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, qh),
                                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)

                #self.cell_fw = tf.contrib.rnn.GRUCell(dg)
                #self.cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    self.cell_fw,
                    self.cell_bw,
                    ch_emb,
                    ch_len,
                    dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    self.cell_fw,
                    self.cell_bw,
                    qh_emb,
                    qh_len,
                    dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, c_pr)
                q_emb = tf.nn.embedding_lookup(self.word_mat, q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            #gru1 = lambda: gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            #	).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            #self.rnn1 = tf.cond(tf.equal(i_,zero), gru1, lambda: self.rnn1)
            #c = self.rnn1(c_emb, seq_len=c_len)
            #q = self.rnn1(q_emb, seq_len=q_len)

            if i == 0:
                self.rnn1 = gru(num_layers=3,
                                num_units=d,
                                batch_size=N,
                                input_size=c_emb.get_shape().as_list()[-1],
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)
                self.q_enc = self.rnn1(q_emb, seq_len=q_len)
            c = self.rnn1(c_emb, seq_len=c_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   self.q_enc,
                                   mask=q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   name_scope="attention_layer")

            #gru2 = lambda: gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            #	).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            #self.rnn2 = tf.cond(tf.equal(i_,zero), gru2, lambda: self.rnn2)
            #att = self.rnn2(qc_att, seq_len=c_len)

            if i == 0:
                self.rnn2 = gru(num_layers=1,
                                num_units=d,
                                batch_size=N,
                                input_size=qc_att.get_shape().as_list()[-1],
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)
            att = self.rnn2(qc_att, seq_len=c_len)
        return att, c_len, c_mask, y1, y2, seq_mask
Exemplo n.º 10
0
    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent)[0]
        sent_mask = tf.cast(self.sent, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = tf.reduce_max(sent_len)
        sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen])
        sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen])

        mid_mask = tf.cast(self.mid, tf.bool)
        mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1)
        mid_maxlen = tf.reduce_max(mid_len)
        mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen])
        mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen])

        pat_mask = tf.cast(self.pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope("embedding"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            mid_emb = tf.nn.embedding_lookup(self.word_mat, mid)
            sent_emb = dropout(sent_emb,
                               keep_prob=config.word_keep_prob,
                               is_train=self.is_train,
                               mode="embedding")
            pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats)

        with tf.variable_scope("encoder"):
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)

            cont_d = dropout(cont,
                             keep_prob=config.keep_prob,
                             is_train=self.is_train)
            pat_d = dropout(pat,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)

        with tf.variable_scope("attention"):
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            pat_a = self.pat_a = attention(pat_d,
                                           config.att_hidden,
                                           mask=pat_mask)

        with tf.variable_scope("sim"):
            sim, pat_sim = att_match(mid_emb,
                                     pat_emb,
                                     mid_mask,
                                     pat_mask,
                                     d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)

            neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0]))
            pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.))
            pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1)
            pat_neg = tf.square(tf.maximum(pat_sim, 0.))
            pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1)
            l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0)

            with tf.variable_scope("pred"):
                att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d,
                                       axis=1)
                pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d,
                                       axis=1)

                logit = self.logit = dense(att2_d,
                                           config.num_class,
                                           use_bias=False)
                pred = tf.nn.softmax(logit)
                l_a = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size],
                        labels=self.rel[:config.batch_size]),
                    axis=0)

                xsim = tf.stop_gradient(sim[config.batch_size:])
                pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                bound = tf.reduce_max(xsim, axis=1)
                weight = tf.nn.softmax(10 * bound)
                l_u = tf.reduce_sum(
                    weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel),
                    axis=0)

                logit = dense(pat2_d, config.num_class, use_bias=False)
                l_pat = self.pat_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit, labels=self.rels),
                    axis=0)

        self.max_val = tf.reduce_sum(pred * -log(pred), axis=1)
        self.pred = tf.argmax(pred, axis=1)

        self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u
        self.sim_pred = tf.argmax(tf.gather(self.rels,
                                            tf.argmax(self.sim, axis=1)),
                                  axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        self.gold = tf.argmax(self.rel, axis=1)
        self.max_logit = tf.reduce_max(self.logit, axis=1)
Exemplo n.º 11
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            self.c_emb = tf.stop_gradient(c_emb)
            self.q_emb = tf.stop_gradient(q_emb)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            self.c_rnn = c = rnn(c_emb, seq_len=self.c_len)
            self.q_rnn = q = rnn(q_emb, seq_len=self.q_len)

            c = tf.stop_gradient(c)
            q = tf.stop_gradient(q)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            
            self.att = [rnn(qc_att, seq_len=self.c_len)]
            self.att += [self.att[-1][:,-1,:]]
        
        with tf.variable_scope("binary"):
            for _ in range(3):
                self.att += [tf.nn.dropout(tf.keras.layers.Dense(300)(self.att[-1]), keep_prob=config.keep_prob)]

        with tf.variable_scope("badptr"):
            init = self.att[-1]
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, self.att[0], d, self.c_mask)

        with tf.variable_scope("badptr_predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1_distrib = tf.reduce_max(outer, axis=2)
            self.yp2_distrib = tf.reduce_max(outer, axis=1)
            self.yp1 = tf.argmax(self.yp1_distrib, axis=1)
            self.yp2 = tf.argmax(self.yp2_distrib, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 12
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            c_emb = tf.stop_gradient(c_emb)
            q_emb = tf.stop_gradient(q_emb)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            self.c_rnn = c = rnn(c_emb, seq_len=self.c_len)
            self.q_rnn = q = rnn(q_emb, seq_len=self.q_len)

            c = tf.stop_gradient(c)
            q = tf.stop_gradient(q)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            
            self.att = [rnn(qc_att, seq_len=self.c_len)[:,-1,:]]

        #self.att = [tf.concat([self.c_rnn[:,-1,:], self.q_rnn[:,-1,:]], 1)]
            
        #self.att += [tf.stop_gradient(self.att[-1])]
        
        with tf.variable_scope("binary"):
            for _ in range(3):
                self.att += [tf.nn.dropout(tf.keras.layers.Dense(300, activation='relu')(self.att[-1]), keep_prob=config.keep_prob)]

            self.prediction = tf.keras.layers.Dense(2)(self.att[-1])

        #self.loss = tf.reduce_mean(tf.squared_difference(self.prediction, tf.cast(self.y_target, tf.float32)))
        self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction, labels=tf.stop_gradient(self.y_target))
Exemplo n.º 13
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])

                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)

                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)

                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            #3层 lstm对输出进行编码
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            #with the size(batch_size,max_len,hidden_dim)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("relation analysis"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            g_theta_layers = [256, 128, 1]  # attention component
            md = Relation_Module(config, self.c_maxlen, self.q_maxlen,
                                 g_theta_layers)
            #r add attention weight with q_summary
            r, alpha = md.hop_2(c,
                                init,
                                phase=self.is_train,
                                activation=tf.nn.relu)
            c = r[-1]

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

#通过embedding q 获得rQ
        with tf.variable_scope("pointer"):
            # init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
            #             keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            self.start_logits = tf.nn.softmax(logits1)
            self.stop_logits = tf.nn.softmax(logits2)
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 14
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, BL, d, dc, dg, dbpe, dbpeh = config.batch_size, self.c_maxlen, self.q_maxlen, \
                                                   config.char_limit, config.bpe_limit, config.hidden, \
                                                   config.glove_dim if config.pretrained_char else config.char_dim, config.char_hidden, \
                                                   config.bpe_glove_dim if config.pretrained_bpe_emb else config.bpe_dim, config.bpe_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            if config.use_char:
                with tf.variable_scope("char"):
                    ch_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.ch),
                        [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            if config.use_bpe:
                with tf.variable_scope("bpe"):
                    cb_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.bpe_mat, self.cb),
                        [N * PL, BL, dbpe])
                    qb_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.bpe_mat, self.qb),
                        [N * QL, BL, dbpe])
                    cb_emb = dropout(cb_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qb_emb = dropout(qb_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dbpeh)
                    cell_bw = tf.contrib.rnn.GRUCell(dbpeh)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        cb_emb,
                        self.cb_len,
                        dtype=tf.float32)
                    cb_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qb_emb,
                        self.qb_len,
                        dtype=tf.float32)
                    qb_emb = tf.concat([state_fw, state_bw], axis=1)
                    qb_emb = tf.reshape(qb_emb, [N, QL, 2 * dbpeh])
                    cb_emb = tf.reshape(cb_emb, [N, PL, 2 * dbpeh])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            if config.use_char:
                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            if config.use_bpe:
                c_emb = tf.concat([c_emb, cb_emb], axis=2)
                q_emb = tf.concat([q_emb, qb_emb], axis=2)

            if config.use_pos:
                cp_emb = tf.nn.embedding_lookup(self.pos_mat, self.cp)
                qp_emb = tf.nn.embedding_lookup(self.pos_mat, self.qp)
                c_emb = tf.concat([c_emb, cp_emb], axis=2)
                q_emb = tf.concat([q_emb, qp_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 15
0
    def __init__(self,
                 config,
                 batch,
                 word_mat=None,
                 char_mat=None,
                 trainable=True,
                 opt=True):
        self.config = config
        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next(
        )
        self.emb_keep_prob = tf.get_variable(
            "emb_keep_prob",
            shape=[],
            dtype=tf.float32,
            trainable=False,
            initializer=tf.constant_initializer(config.emb_keep_prob))
        self.keep_prob = tf.get_variable("keep_prob",
                                         shape=[],
                                         dtype=tf.float32,
                                         trainable=False,
                                         initializer=tf.constant_initializer(
                                             config.keep_prob))
        self.ptr_keep_prob = tf.get_variable(
            "ptr_keep_prob",
            shape=[],
            dtype=tf.float32,
            trainable=False,
            initializer=tf.constant_initializer(config.ptr_keep_prob))
        self.is_train = tf.get_variable("is_train",
                                        shape=[],
                                        dtype=tf.bool,
                                        trainable=False)
        self.word_mat = dropout(tf.get_variable(
            "word_mat",
            initializer=tf.constant(word_mat, dtype=tf.float32),
            trainable=False),
                                keep_prob=self.emb_keep_prob,
                                is_train=self.is_train,
                                mode="embedding")
        self.char_mat = dropout(tf.get_variable(
            "char_mat", initializer=tf.constant(char_mat, dtype=tf.float32)),
                                keep_prob=self.emb_keep_prob,
                                is_train=self.is_train,
                                mode="embedding")

        self.c_mask = tf.cast(self.c, tf.bool)
        self.q_mask = tf.cast(self.q, tf.bool)
        self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
        self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

        if opt:
            N, CL = config.batch_size, config.char_limit
            self.c_maxlen = tf.reduce_max(self.c_len)
            self.q_maxlen = tf.reduce_max(self.q_len)
            self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
            self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])
            self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
            self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])
            self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
            self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])
            self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])
            self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])
        else:
            self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

        self.ch_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32),
                          axis=2), [-1])
        self.qh_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32),
                          axis=2), [-1])

        self.ready()

        if trainable:
            self.lr = tf.get_variable("lr",
                                      shape=[],
                                      dtype=tf.float32,
                                      trainable=False)
            self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.lr,
                                                  epsilon=1e-6)
            grads = self.opt.compute_gradients(self.loss)
            gradients, variables = zip(*grads)
            capped_grads, _ = tf.clip_by_global_norm(gradients,
                                                     config.grad_clip)
            self.train_op = self.opt.apply_gradients(
                zip(capped_grads, variables), global_step=self.global_step)
Exemplo n.º 16
0
    def ready(self):
        config = self.config
        N, QL, CL, d, dc, dg = config.batch_size, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru
        SN, SL = self.c_s_maxnum, self.c_s_maxlen
        W = config.glove_dim
        print('embedding part')
        with tf.variable_scope("emb"):
            # with tf.variable_scope("char"):
            #         ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.csh_slice), [N, SN * SL, CL, dc], name='char_reshape')
            #         qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.qh_slice), [N, QL, CL, dc])
            #         ch_emb = dropout(
            #             ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         qh_emb = dropout(
            #             qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            # ch_emb_char = tf.unstack(ch_emb, axis=0)
            # qh_emb_char = tf.unstack(qh_emb, axis=0)
            '''

            filter_size = [3, 4, 5]
            att_char = []
            merge_char = []
            q_merge_char = []
            for filter in filter_size:
                with tf.variable_scope("char-cnnencoder-%s" % filter):
                    step_merge_char = []
                    step_att_char = []
                    q_step_merge_char = []
                    q_step_att_char = []
                    for i in range(2):
                        if i==0:
                            input_char=ch_emb
                        else:
                            input_char=qh_emb
                        conv_branch_char = tf.layers.conv2d(
                            inputs=input_char,
                            # use as many filters as the hidden size
                            filters=50,
                            kernel_size=filter,
                            use_bias=True,
                            activation=tf.nn.relu,
                            trainable=True,
                            padding='SAME',
                            name = 'conv_char_' + str(filter),
                            reuse = tf.AUTO_REUSE,
                            data_format='channels_last'
                        )
                        if i ==0:
                            step_att_char.append(conv_branch_char)
                            # pool over the words to obtain: [first_dim x 1* hidden_size]
                            pool_branch_char = tf.reduce_max(conv_branch_char, axis=2)
                            merge_char.append(pool_branch_char)
                        else:
                            q_step_att_char.append(conv_branch_char)
                            # pool over the words to obtain: [first_dim x 1* hidden_size]
                            q_pool_branch_char = tf.reduce_max(conv_branch_char, axis=2)
                            q_merge_char.append(q_pool_branch_char)
                    # batch_merge = tf.stack(step_merge_char, axis=0)
                    # merge_char.append(batch_merge)
                    # batch_merge_q = tf.stack(q_step_merge_char, axis=0)
                    # q_merge_char.append(batch_merge_q)
            ch_con = tf.concat(merge_char, axis=-1)
            ch_con = tf.reshape(ch_con,[N,SN,SL,150])
            qh_con = tf.concat(q_merge_char,axis=-1)
            '''
            # if(use_char):
            #     with tf.variable_scope("char"):
            #         ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.csh), [N * SN * SL, CL, dc], name='char_reshape')
            #         qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.qh), [N * QL, CL, dc])
            #         ch_emb = dropout(
            #             ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         qh_emb = dropout(
            #             qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         cell_fw = tf.contrib.rnn.GRUCell(dg)
            #         cell_bw = tf.contrib.rnn.GRUCell(dg)
            #         _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #             cell_fw, cell_bw, ch_emb, self.csh_len, dtype=tf.float32)
            #         ch_emb = tf.concat([state_fw, state_bw], axis=1)
            #         _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #             cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
            #         qh_emb = tf.concat([state_fw, state_bw], axis=1)
            #         qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
            #         ch_emb = tf.reshape(ch_emb, [N, SN, SL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.cs_slice)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q_slice)

            with tf.name_scope("softemb"):
                c_emb_linear = tf.nn.relu(
                    dense(c_emb, d, use_bias=True, scope="c_emb_linear"))
                q_emb_linear = tf.nn.relu(
                    dense(q_emb, d, use_bias=True, scope="q_emb_linear"))
                c_emb_linear = tf.reshape(
                    c_emb_linear, [N, self.c_s_maxnum * self.c_s_maxlen, d])
                align_cq = tf.matmul(c_emb_linear,
                                     tf.transpose(q_emb_linear, [0, 2, 1]))

                cq_mask = tf.tile(tf.expand_dims(self.q_mask, axis=1),
                                  [1, self.c_s_maxnum * self.c_s_maxlen, 1])
                self.align_cq = tf.nn.softmax(softmax_mask(align_cq, cq_mask))
                align_c_emb = tf.matmul(self.align_cq, q_emb_linear)
                align_c_emb = tf.reshape(
                    align_c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, d])
            c_emb = tf.concat(
                [c_emb, align_c_emb, self.ce_slice, self.ct_slice], axis=3)
            c_emb = tf.reshape(
                c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, W + d + 3 + 19],
                name='c_emb_reshape')

            q_emb = tf.concat([q_emb, self.qt_slice], axis=2)
            self.c_emb = c_emb
            self.q_emb = q_emb
            # c_emb = tf.reshape(c_emb, [N,self.c_s_maxnum,self.c_s_maxlen,W+self.q_maxlen])

        print('encode-part')
        # c_s_len = tf.unstack(self.c_s_len, axis=1)

        cnn_out = []
        c_s_emb = tf.unstack(c_emb, axis=0)
        # q_s_emb = tf.expand_dims(q_emb, axis=1)
        # q_sample_emb = tf.unstack(q_s_emb, axis = 0)

        filter_size = [3, 4, 5]
        att = []
        merge = []
        q_merge = []
        with tf.variable_scope("cnnencoder"):
            for filter in filter_size:
                step_merge = []
                step_att = []
                q_step_merge = []
                q_step_att = []
                with tf.variable_scope("cnnencoder-%s" % filter):
                    for i in range(N):
                        conv_branch = tf.layers.conv1d(
                            inputs=c_s_emb[i],
                            # use as many filters as the hidden size
                            filters=100,
                            kernel_size=[filter],
                            use_bias=True,
                            activation=tf.nn.relu,
                            trainable=True,
                            padding='SAME',
                            name='conv_' + str(filter),
                            reuse=tf.AUTO_REUSE)
                        # tf.get_variable_scope().reuse_variables()
                        step_att.append(conv_branch)
                        # pool over the words to obtain: [first_dim x 1* hidden_size]
                        pool_branch = tf.reduce_max(conv_branch, axis=1)
                        pool_branch = dropout(pool_branch,
                                              keep_prob=config.keep_prob,
                                              is_train=self.is_train)
                        step_merge.append(pool_branch)

                batch_merge = tf.stack(step_merge, axis=0)
                merge.append(batch_merge)
                # batch_merge_q = tf.stack(q_step_merge, axis = 0)
                # q_merge.append(batch_merge_q)

                con = tf.concat(merge, axis=-1)
                # q_con = tf.concat(q_merge, axis = -1)
                #
                # attention_vis = tf.stack(att, axis=0)
                # attention_vis = tf.reduce_mean(attention_vis, axis=0)
                # cnn_out.append(con)
                # c_sen_emb = tf.concat(con, axis = 0)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=con.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            print('passage-encoder')
            c_s = rnn(con, seq_len=self.c_p_len)
            # q = rnn(q_emb, seq_len=self.q_len)
        with tf.variable_scope("qencode"):
            with tf.variable_scope("encoding"):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=q_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)

                q = rnn(q_emb, seq_len=self.q_len)
        self.q_enc = q
        print('qc_att')

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c_s,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)

            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            self.att_s = rnn(qc_att, seq_len=self.c_p_len)

        # print('pointer')
        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train,
                              is_sentence=True)

            logits1 = pointer(init, self.att_s, d, self.c_p_mask)
            self.lo = logits1
        with tf.variable_scope("predict"):
            self.outer = tf.nn.softmax(logits1)
            self.yp = tf.argmax(self.outer, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y_slice))
            self.out1 = tf.nn.top_k(self.outer, config.k).values
            self.policy = tf.nn.top_k(self.outer, 1).values
            self.policy = tf.reduce_sum(tf.nn.top_k(self.outer,
                                                    config.k).values,
                                        axis=-1,
                                        keepdims=True)
            self.policy_log_part = tf.log(self.policy)
            #self.loss = tf.reduce_mean(-1 * self.policy_log_part * self.reward)
            reward = self.advantage
            reward_mean, reward_var = tf.nn.moments(reward, axes=[0])

            reward_std = tf.sqrt(reward_var) + 1e-6
            self.reward_mean = reward_mean
            self.reward_var = reward_std
            reward = tf.div(reward - reward_mean, reward_std)

            self.final_reward = reward - self.baseline
            self.loss = tf.reduce_mean(-1 * self.policy_log_part *
                                       self.advantage)
Exemplo n.º 17
0
    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent_word)[0]
        sent_mask = tf.cast(self.sent_word, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = config.length

        sent = self.sent_word

        pretrain_sent_mask = tf.cast(self.pretrain_sents,tf.bool)
        rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
        label_mat,_= FIND_module(sent,self.raw_pats,self.word_mat,config,tf.constant(False,tf.bool),rnn)
        label_mat = tf.sigmoid(label_mat)*tf.tile(tf.reshape(tf.cast(sent_mask,tf.float32),[batch_size,sent_maxlen,1]),[1,1,self.raw_pats.get_shape()[0]])

        # label_mat = tf.cast(tf.greater(label_mat,0.7),tf.float32)

        _,keywords_sim= FIND_module(sent,self.pats,self.word_mat,config,self.is_train,rnn)
        # keywords_sim = tf.sigmoid(keywords_sim)

        pretrain_pred_labels,_ = FIND_module(self.pretrain_sents,self.pretrain_pats,self.word_mat,config,self.is_train,rnn)
        pretrain_pred_labels = tf.transpose(pretrain_pred_labels,[0,2,1])
        gather_order = tf.tile(tf.reshape(tf.range(max(config.pretrain_size,config.pretrain_size_together)), [-1, 1]),[1,2])
        pretrain_pred_labels = tf.gather_nd(pretrain_pred_labels,gather_order)
        self.pretrain_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32),axis=1)/tf.reduce_sum(tf.cast(pretrain_sent_mask,tf.float32),axis=1))#tf.losses.mean_squared_error(labels=self.pretrain_labels,predictions=pretrain_pred_labels)

        self.prt_loss = tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32)
        self.prt_pred = tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32)
        self.pretrain_pred_labels = tf.reshape(tf.cast(tf.greater(tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32),config.pretrain_threshold),tf.int32),[-1])

        neg_idxs = tf.matmul(self.keywords_rels, tf.transpose(self.keywords_rels, [1, 0]))
        pat_pos = tf.square(tf.maximum(0.9 - keywords_sim, 0.))
        pat_pos = tf.reduce_max(pat_pos - tf.cast(1 - neg_idxs,tf.float32)*tf.constant(1e30,tf.float32), axis=1)

        pat_neg = tf.square(tf.maximum(keywords_sim, 0.))
        pat_neg = tf.reduce_max(pat_neg - tf.constant(1e30,tf.float32) * tf.cast(neg_idxs,tf.float32), axis=1)
        pat_simloss = tf.reduce_mean(pat_pos + pat_neg,axis=0)

        # clustering的loss
        self.sim_loss = sim_loss = pat_simloss

        self.pretrain_loss_v2 = self.pretrain_loss+self.pretrain_alpha*self.sim_loss

        sim_raw = []

        for i, soft_labeling_function in enumerate(self.labeling_functions_soft):
            try:
                sim_raw.append(soft_labeling_function(label_mat, self.raw_keyword_dict, self.mask_mat)(
                    self.phrases_input) * self.type_restrict(i))
            except:
                print(i)
                sim_raw.append(tf.cast(tf.reshape(0*self.phrases_input[:,0],[1,-1]),tf.float32))

        self.sim =sim= tf.transpose(tf.concat(sim_raw,axis=0),[1,0]) #[tf.shape==(batch_size,1)]*num_functions->[batch_size,]
        with tf.variable_scope("classifier"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding")
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train)
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1)
            logit = dense(att2_d, config.num_class, use_bias=False)
            pred = tf.nn.softmax(logit)
            with tf.variable_scope("pred"):

                if not self.pseudo:

                    sent_loss = self.sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=self.rel), axis=0)
                else:

                    self.hard_train_loss = sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0)

                    lsim = sim[:config.batch_size]
                    index_tensor = tf.reshape(tf.constant(np.arange(config.batch_size),tf.int32),[config.batch_size,1])
                    select_tensor = tf.reshape(self.hard_match_func_idx,[config.batch_size,1])
                    probs = tf.reshape(tf.gather_nd(lsim,tf.concat([index_tensor,select_tensor],axis=1)),[config.batch_size,1])
                    self.labeled_loss = labeled_loss = tf.reduce_mean(tf.square((1-probs)))

                    xsim = tf.stop_gradient(sim[config.batch_size:])

                    pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                    bound = tf.reduce_max(xsim, axis=1)
                    weight = tf.nn.softmax(10.0 * bound)

                    self.unlabeled_loss = unlabeled_loss = tf.reduce_sum(weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel), axis=0)

                    sent_loss = self.sent_loss = sent_loss + self.gamma * unlabeled_loss+self.alpha*self.pretrain_loss#+self.alpha*labeled_loss

        #算entropy来对no_relation推断
        self.max_val = entropy = tf.reduce_sum(pred * -log(pred), axis=1)
        #pred是test时候用到的
        self.pred = tf.argmax(pred, axis=1)
        self.loss = sent_loss + self.beta * sim_loss
        #similarity model预测出来的结果
        self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        #true label
        self.gold = tf.argmax(self.rel, axis=1)
        self.entropy = tf.reduce_mean(entropy, axis=0)
Exemplo n.º 18
0
    def ready(self):
        config = self.config
        N, PL, QL, d_gl, dc = config.batch_size, self.c_maxlen, self.q_maxlen, config.glove_dim, config.char_dim
        gru = cudnn_gru if config.use_cudnn else native_gru
        logging.info("feature embedding")
        # glove
        c_emb = tf.reshape(tf.nn.embedding_lookup(
            self.word_mat, self.c), [N, PL, d_gl])
        q_emb = tf.reshape(tf.nn.embedding_lookup(
            self.word_mat, self.q), [N, QL, d_gl])
        c_emb = tf.reshape(dropout(
            c_emb, keep_prob=config.keep_prob, is_train=self.is_train),[N,PL,d_gl])
        q_emb = tf.reshape(dropout(
            q_emb, keep_prob=config.keep_prob, is_train=self.is_train),[N,QL,d_gl])
        # cove
        ch_emb = tf.reshape(tf.nn.embedding_lookup(
            self.char_mat, self.c), [N, PL, dc])
        qh_emb = tf.reshape(tf.nn.embedding_lookup(
            self.char_mat, self.q), [N, QL, dc])
        ch_emb = tf.reshape(dropout(
            ch_emb, keep_prob=config.keep_prob, is_train=self.is_train),[N,PL,dc])
        qh_emb = tf.reshape(dropout(
            qh_emb, keep_prob=config.keep_prob, is_train=self.is_train),[N,QL,dc])


        # new_feature
        c_f = tf.reshape(tf.cast(self.context_feature, tf.float32), [N, PL, config.feature_dim])

        logging.info("Word level infusion")
        # fused_a = fuse(para_glove, ques_glove, attention_dim, 'test')
        # high_level
        para_q_fused_glove = word_fusion(c_emb, q_emb,
                                         self.c_mask, self.q_mask)
        # low_level
        para_w_rep = tf.concat([c_emb, ch_emb, c_f], axis=2)

        # low_level
        ques_w_rep = tf.concat([q_emb, qh_emb],axis=2)

        # enhanced input vector for context
        para_enhanced_rep = tf.concat([para_w_rep, para_q_fused_glove], axis=2)
        # ---------------------reading
        logging.info("Building Reading section")
        # change LSTM to GRU
        with tf.variable_scope("Reading"):
            # hQh
            f_read_q_low = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)  # in paper 125
            b_read_q_low = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            inp = dropout(ques_w_rep, keep_prob=config.keep_prob, is_train=self.is_train)
            ques_low_h, _ = birnn(cell_fw=f_read_q_low, cell_bw=b_read_q_low,
                                  inputs=inp, dtype=tf.float32,
                                  scope='ques_low_under',
                                  sequence_length=self.q_len)
            ques_low_h = tf.concat(ques_low_h, axis=2)

            # Hqh
            f_read_q_high = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            b_read_q_high = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            inp = dropout(ques_low_h, keep_prob=config.keep_prob, is_train=self.is_train)
            ques_high_h, _ = birnn(cell_fw=f_read_q_high,
                                   cell_bw=b_read_q_high,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='ques_high_under',
                                   sequence_length=self.q_len)
            ques_high_h = tf.concat(ques_high_h, axis=2)

            # Hcl
            f_read_p_low = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            b_read_p_low = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            inp = dropout(para_enhanced_rep, keep_prob=config.keep_prob, is_train=self.is_train)
            para_low_h, _ = birnn(cell_fw=f_read_p_low,
                                  cell_bw=b_read_p_low,
                                  inputs=inp,
                                  dtype=tf.float32,
                                  scope='para_low_under',
                                  sequence_length=self.c_len)
            para_low_h = tf.concat(para_low_h, axis=2)

            # Hch
            f_read_p_high = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            b_read_p_high = tf.contrib.rnn.LSTMCell(config.reading_rep_dim // 2)
            inp = dropout(para_low_h, keep_prob=config.keep_prob, is_train=self.is_train)
            para_high_h, _ = birnn(cell_fw=f_read_p_high,
                                   cell_bw=b_read_p_high,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='para_high_under',
                                   sequence_length=self.c_len)
            para_high_h = tf.concat(para_high_h, axis=2)

        logging.info("Final Question Understanding")

        with tf.variable_scope("final_q_und"):
            f_uq = tf.contrib.rnn.LSTMCell(config.final_ques_under_dim // 2)
            b_uq = tf.contrib.rnn.LSTMCell(config.final_ques_under_dim // 2)
            inp = tf.concat([ques_low_h, ques_high_h], axis=2)
            inp = dropout(inp, keep_prob=config.keep_prob, is_train=self.is_train)
            final_q_und, _ = birnn(cell_fw=f_uq,
                                   cell_bw=b_uq,
                                   inputs=inp,
                                   dtype=tf.float32,
                                   scope='final_q_und',
                                   sequence_length=self.q_len)
            final_q_und = tf.concat(final_q_und, axis=2)

        logging.info("Fusion High level")
        with tf.variable_scope("high_level_fusion"):
            para_HoW = tf.concat([c_emb, ch_emb,
                                  para_low_h, para_high_h],
                                 axis=2)
            ques_HoW = tf.concat([q_emb, qh_emb,
                                  ques_low_h, ques_high_h],
                                 axis=2)
            para_fused_l = fuse(para_HoW, ques_HoW,
                                self.c_mask, self.q_mask,
                                config.sl_att_dim,
                                B=ques_low_h,
                                scope='low_level_fusion')
            para_fused_h = fuse(para_HoW, ques_HoW,
                                self.c_mask, self.q_mask,
                                config.sh_att_dim,
                                B=ques_high_h,
                                scope='high_level_fusion')
            para_fused_u = fuse(para_HoW, ques_HoW,
                                self.c_mask, self.q_mask,
                                config.su_att_dim,
                                B=final_q_und,
                                scope='understanding_fusion')

            inp = tf.concat([para_low_h, para_high_h,
                             para_fused_l, para_fused_h,
                             para_fused_u], axis=2)
            inp = dropout(inp, keep_prob=config.keep_prob, is_train=self.is_train)

            f_vc = tf.contrib.rnn.LSTMCell(config.fully_fused_para_dim // 2)
            b_vc = tf.contrib.rnn.LSTMCell(config.fully_fused_para_dim // 2)
            ff_para, _ = birnn(cell_fw=f_vc, cell_bw=b_vc, inputs=inp,
                               dtype=tf.float32, scope='full_fused_para',
                               sequence_length=self.c_len)
            ff_para = tf.concat(ff_para, axis=2)
        logging.info("Self boosting fusion")

        with tf.variable_scope("self_boosting_fusion"):
            para_HoW = tf.concat([c_emb, ch_emb,
                                  para_low_h, para_high_h,
                                  para_fused_l, para_fused_h,
                                  para_fused_u, ff_para],
                                 axis=2)
            ff_fused_para = fuse(para_HoW, para_HoW,
                                 self.c_mask, self.q_mask,
                                 config.selfboost_att_dim,
                                 B=ff_para,
                                 scope='self_boosted_fusion')
            f_sb = tf.contrib.rnn.LSTMCell(config.selfboost_rep_dim // 2)
            b_sb = tf.contrib.rnn.LSTMCell(config.selfboost_rep_dim // 2)
            inp = tf.concat([ff_para, ff_fused_para], axis=2)
            inp = dropout(inp, keep_prob=config.keep_prob, is_train=self.is_train)
            final_para_rep, _ = birnn(cell_fw=f_sb, cell_bw=b_sb, inputs=inp,
                                      dtype=tf.float32, scope='self_boosted')
            final_para_rep = tf.concat(final_para_rep, axis=2)

        logging.info("Fusion Net construction complete")
        logging.info("SQuAD specific construction begins")
        # now we have U_c, U_q = final_para_rep, final_q_und
        # The rest of the network is for SQuAD
        # TODO: This part is a little confusing
        logging.info("Sumarized question understanding vector")

        with tf.variable_scope("summarized_question"):
            w = tf.get_variable("W", shape=(config.final_ques_under_dim, 1),
                                dtype=tf.float32)
            uq_s = tf.unstack(final_q_und, axis=1)
            attention_weight = []
            for i, uq in enumerate(tqdm(uq_s, desc='Question Summary Vector')):
                s = tf.matmul(uq, w)
                attention_weight.append(s)
            attention_weight = tf.nn.softmax(tf.stack(attention_weight, axis=1))
            summarized_question = tf.reduce_sum(tf.multiply(final_q_und,
                                                            attention_weight), axis=1)

        logging.info("Span generation")
        # 通过embedding q 获得rQ
        with tf.variable_scope("pointer"):
            pointer = ptr_net(batch=N, hidden=summarized_question.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)

            logits1, logits2 = pointer(summarized_question,
                                       final_para_rep,
                                       summarized_question.get_shape().as_list()[-1],
                                       self.c_mask)

        with tf.variable_scope("predict"):
            self.start_logits = tf.nn.softmax(logits1)
            self.stop_logits = tf.nn.softmax(logits2)
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits1, labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2)
            # change
            self.loss = losses + losses2
Exemplo n.º 19
0
def FIND_module(sent,pats,word_mat,config,is_train,rnn,scope='Find_module'):#sents_emb [batch,maxlength_sent] pats [num_pats,maxlength_pat]   [batch,maxlength_sent,dim]
    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
        keep_prob = config.keep_prob
        d = config.hidden
        batch_size = tf.shape(sent)[0]
        maxlength_sent = tf.shape(sent)[1]
        dim = tf.shape(word_mat)[1]
        num_pats = tf.shape(pats)[0]

        sent_mask = tf.cast(sent, tf.bool)

        pat_mask = tf.cast(pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope('embedding'):
            sent_emb = tf.nn.embedding_lookup(word_mat, sent)
            sent_emb_d = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=is_train, mode="embedding")
            pat_emb = tf.nn.embedding_lookup(word_mat, pats)
            pat_emb_d = dropout(pat_emb, keep_prob=config.word_keep_prob, is_train=is_train,mode='embedding')

        with tf.variable_scope('stack'):
            pad = tf.zeros([batch_size,1,dim],tf.float32)

            sent_emb_pad = tf.concat([pad,sent_emb,pad],axis=1)
            sent_emb_stack_2 = tf.reshape(sent_emb_pad,[batch_size,maxlength_sent+2,1,dim])
            sent_emb_stack_2 = tf.concat([sent_emb_stack_2[:,0:-1,:],sent_emb_stack_2[:,1:,:]],axis=2)
            sent_emb_stack_2 = tf.reshape(sent_emb_stack_2,[batch_size*(maxlength_sent+1),2,dim])

            sent_emb_pad2 = tf.concat([pad,pad,sent_emb,pad,pad],axis=1)
            sent_emb_stack_3 = tf.reshape(sent_emb_pad2,[batch_size,maxlength_sent+4,1,dim])
            sent_emb_stack_3 = tf.concat([sent_emb_stack_3[:, 0:-2, :], sent_emb_stack_3[:, 1:-1, :], sent_emb_stack_3[:, 2:, :]], axis=2)
            sent_emb_stack_3 = tf.reshape(sent_emb_stack_3,[batch_size*(maxlength_sent+2),3,dim])

            sent_emb_stack_1 = tf.reshape(sent_emb,[batch_size*maxlength_sent,1,dim])

        with tf.variable_scope('stack_d'):
            pad = tf.zeros([batch_size,1,dim],tf.float32)

            sent_emb_pad_d = tf.concat([pad,sent_emb_d,pad],axis=1)
            sent_emb_stack_2_d = tf.reshape(sent_emb_pad_d,[batch_size,maxlength_sent+2,1,dim])
            sent_emb_stack_2_d = tf.concat([sent_emb_stack_2_d[:,0:-1,:],sent_emb_stack_2_d[:,1:,:]],axis=2)
            sent_emb_stack_2_d = tf.reshape(sent_emb_stack_2_d,[batch_size*(maxlength_sent+1),2,dim])

            sent_emb_pad2_d = tf.concat([pad,pad,sent_emb_d,pad,pad],axis=1)
            sent_emb_stack_3_d = tf.reshape(sent_emb_pad2_d,[batch_size,maxlength_sent+4,1,dim])
            sent_emb_stack_3_d = tf.concat([sent_emb_stack_3_d[:, 0:-2, :], sent_emb_stack_3_d[:, 1:-1, :], sent_emb_stack_3_d[:, 2:, :]], axis=2)
            sent_emb_stack_3_d = tf.reshape(sent_emb_stack_3_d,[batch_size*(maxlength_sent+2),3,dim])

            sent_emb_stack_1_d = tf.reshape(sent_emb_d,[batch_size*maxlength_sent,1,dim])

        with tf.variable_scope("encoder"):
            with tf.variable_scope('encode_pat'):
                pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)       #[numpats,d]
                pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=is_train)
            with tf.variable_scope('encode_sent'):
                cont_stack_3, _ = rnn(sent_emb_stack_3,seq_len=3 * tf.ones([batch_size * (maxlength_sent + 2)], tf.int32),concat_layers=False)
                cont_stack_2, _ = rnn(sent_emb_stack_2, seq_len=2*tf.ones([batch_size*(maxlength_sent+1)],tf.int32), concat_layers=False)  #[batch_size*(maxlength_sent+1),d]
                cont_stack_1, _ = rnn(sent_emb_stack_1, seq_len=tf.ones([batch_size*maxlength_sent],tf.int32), concat_layers=False)  #[batch_size*maxlength_sent,d]
                cont_stack_3_d = dropout(cont_stack_3, keep_prob=keep_prob, is_train=is_train)
                cont_stack_2_d = dropout(cont_stack_2, keep_prob=keep_prob, is_train=is_train)
                cont_stack_1_d = dropout(cont_stack_1, keep_prob=keep_prob, is_train=is_train)

        with tf.variable_scope('attention'):
            pat_d_a = attention(pat_d,config.att_hidden, mask=pat_mask)
            cont_stack_2_d_a = attention(cont_stack_2_d,config.att_hidden)
            cont_stack_3_d_a = attention(cont_stack_3_d,config.att_hidden)

            cont_stack_3_att = tf.reduce_sum(tf.expand_dims(cont_stack_3_d_a, axis=2) * cont_stack_3, axis=1)
            cont_stack_2_att = tf.reduce_sum(tf.expand_dims(cont_stack_2_d_a, axis=2) * cont_stack_2, axis=1)
            pat_d_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat_d, axis=1)
            pat_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat, axis=1)
            cont_stack_1_att = tf.squeeze(cont_stack_1)
        with tf.variable_scope('emb_attention'):
            pat_emb_d_a = attention(pat_emb_d, config.att_hidden, mask=pat_mask)
            pat_emb_d_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb_d, axis=1)
            pat_emb_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb, axis=1)

            sent_emb_stack_3_d_a = attention(sent_emb_stack_3_d, config.att_hidden)
            sent_emb_stack_3_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_3_d_a, axis=2) * sent_emb_stack_3, axis=1)

            sent_emb_stack_2_d_a = attention(sent_emb_stack_2_d, config.att_hidden)
            sent_emb_stack_2_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_2_d_a, axis=2) * sent_emb_stack_2, axis=1)

            sent_emb_stack_1_att = tf.squeeze(sent_emb_stack_1)

        with tf.variable_scope('Score'):
            scores_stack_2 = cosine(cont_stack_2_att,pat_d_att,weighted=False)
            scores_stack_1 = cosine(cont_stack_1_att,pat_d_att,weighted=False)
            scores_stack_3 = cosine(cont_stack_3_att, pat_d_att, weighted=False)

            scores_stack_3 = tf.reshape(scores_stack_3, [batch_size, 1, maxlength_sent + 2, num_pats])
            scores_stack_2 = tf.reshape(scores_stack_2,[batch_size,1,maxlength_sent+1,num_pats])
            scores_stack_1 = tf.reshape(scores_stack_1,[batch_size,1,maxlength_sent,num_pats])
            scores_sim = cosine(pat_att, pat_d_att, weighted=False)

        with tf.variable_scope('emb_Score'):
            scores_stack_3_emb = cosine(sent_emb_stack_3_att,pat_emb_d_att)
            scores_stack_2_emb = cosine(sent_emb_stack_2_att,pat_emb_d_att)
            scores_stack_1_emb = cosine(sent_emb_stack_1_att,pat_emb_d_att)

            scores_stack_3_emb = tf.reshape(scores_stack_3_emb, [batch_size, 1, maxlength_sent + 2, num_pats])
            scores_stack_2_emb = tf.reshape(scores_stack_2_emb,[batch_size,1,maxlength_sent+1,num_pats])
            scores_stack_1_emb = tf.reshape(scores_stack_1_emb,[batch_size,1,maxlength_sent,num_pats])

            phi = 0
            scores_stack_3 = phi * scores_stack_3_emb + (1 - phi) * scores_stack_3
            scores_stack_2 = phi*scores_stack_2_emb+(1-phi)*scores_stack_2
            scores_stack_1 = phi*scores_stack_1_emb+(1-phi)*scores_stack_1

            scores = tf.concat([scores_stack_3[:,:,0:-2,:],scores_stack_3[:,:,1:-1,:],scores_stack_3[:,:,2:,:],scores_stack_2[:,:,0:-1,:],scores_stack_2[:,:,1:,:],scores_stack_1],axis=1)
            scores = tf.reshape(scores,[batch_size,6,maxlength_sent,num_pats])
            scores = tf.transpose(scores,[0,3,1,2])
            scores = tf.reshape(scores,[batch_size*num_pats,6,maxlength_sent])

            scores_sim_emb = cosine(pat_emb_att, pat_emb_d_att)
            scores_sim = phi*scores_sim_emb+(1-phi)*scores_sim

        with tf.variable_scope('SeqLabel'):
            seq = tf.layers.dense(tf.transpose(scores,[0,2,1]),1)
            seq = tf.squeeze(seq)
            seq = tf.reshape(seq,[batch_size,num_pats,maxlength_sent])
            #seq = tf.reshape(tf.reduce_max(scores,axis=1),[batch_size,num_pats,maxlength_sent])
            seq = tf.transpose(seq,[0,2,1])
            seq = seq*tf.tile(tf.cast(tf.reshape(sent_mask,[batch_size,maxlength_sent,1]),tf.float32),[1,1,num_pats])

        return seq,scores_sim
Exemplo n.º 20
0
    def __init__(self,
                 config,
                 word_mat=None,
                 char_mat=None,
                 trainable=True,
                 opt=True):
        N = config.batch_size * 4
        self.article_maxlen, self.question_maxlen, self.opt_maxlen = config.para_limit, config.ques_limit, config.opt_limit
        self.config = config
        self.article_input = tf.placeholder(tf.int32,
                                            name='article',
                                            shape=[N, self.article_maxlen])
        self.question_input = tf.placeholder(tf.int32,
                                             name='question',
                                             shape=[N, self.question_maxlen])
        self.option_input = tf.placeholder(tf.int32,
                                           name='option',
                                           shape=[N, self.opt_maxlen])
        self.labels_input = tf.placeholder(tf.int32, name='label', shape=[N])
        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.article = self.article_input
        self.question = self.question_input
        self.option = self.option_input
        self.labels = self.labels_input
        self.question = tf.concat([self.question, self.option], axis=1)
        # concat question and option
        self.emb_keep_prob = tf.get_variable(
            "emb_keep_prob",
            shape=[],
            dtype=tf.float32,
            trainable=False,
            initializer=tf.constant_initializer(config.emb_keep_prob))
        self.keep_prob = tf.get_variable("keep_prob",
                                         shape=[],
                                         dtype=tf.float32,
                                         trainable=False,
                                         initializer=tf.constant_initializer(
                                             config.keep_prob))
        self.is_train = tf.get_variable("is_train",
                                        shape=[],
                                        dtype=tf.bool,
                                        trainable=False)
        self.word_mat = dropout(tf.get_variable(
            "word_mat",
            initializer=tf.constant(word_mat, dtype=tf.float32),
            trainable=False),
                                keep_prob=self.emb_keep_prob,
                                is_train=self.is_train,
                                mode="embedding")

        self.article_mask = tf.cast(self.article, tf.bool)
        self.question_mask = tf.cast(self.question, tf.bool)
        self.labels = tf.cast(self.labels, tf.float32)

        self.article_len = tf.reduce_sum(tf.cast(self.article_mask, tf.int32),
                                         axis=1)
        self.question_len = tf.reduce_sum(tf.cast(self.question_mask,
                                                  tf.int32),
                                          axis=1)

        self.article_maxlen = tf.reduce_max(self.article_len)
        self.question_maxlen = tf.reduce_max(self.question_len)

        # self.article = tf.slice(self.article, [0, 0], [N, self.article_maxlen])
        # self.question = tf.slice(self.question, [0, 0], [N, self.question_maxlen])
        # self.article_mask = tf.slice(self.article_mask, [0, 0], [N, self.article_maxlen])
        # self.question_mask = tf.slice(self.question_mask, [0, 0], [N, self.question_maxlen])
        # self.labels = tf.slice(self.labels, [0], [N])
        self.define_model()

        if trainable:
            self.opt = tf.train.AdadeltaOptimizer(config.learning_rate)
            self.train_op = self.opt.minimize(self.loss)
            # self.lr = tf.get_variable("lr", shape=[], dtype=tf.float32, trainable=False)
            # self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.lr, epsilon=1e-6)
            grads = self.opt.compute_gradients(self.loss)
            for grad, var in grads:
                tf.summary.histogram(var.name, var)
                tf.summary.histogram(var.name + '/gradient', grad)
            # gradients, variables = zip(*grads)
            # capped_grads, _ = tf.clip_by_global_norm(gradients, config.grad_clipping)
            # self.train_op = self.opt.apply_gradients(zip(capped_grads, variables), global_step=self.global_step)
        self.merged_summary_op = tf.summary.merge_all()
Exemplo n.º 21
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru
        with tf.variable_scope("emb"):
            # with tf.variable_scope("char"):
            # ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.char_mat, self.ch), [N * PL, CL, dc])
            # qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.char_mat, self.qh), [N * QL, CL, dc])
            #
            # ch_emb = dropout(
            #     ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            # qh_emb = dropout(
            #     qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #
            # cell_fw = tf.contrib.rnn.GRUCell(dg)
            # cell_bw = tf.contrib.rnn.GRUCell(dg)
            #
            # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #     cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
            # ch_emb = tf.concat([state_fw, state_bw], axis=1)
            # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #     cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
            # qh_emb = tf.concat([state_fw, state_bw], axis=1)
            # qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
            # ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            embedding = tf.get_variable(
                'embedding', [config.vocab_size, config.embedding_size],
                initializer=tf.random_uniform_initializer(minval=-0.05,
                                                          maxval=0.05))

            self.regularizer = tf.nn.l2_loss(embedding)

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(embedding, self.c)
                q_emb = tf.nn.embedding_lookup(embedding, self.q)
            c_emb = dropout(c_emb,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)
            q_emb = dropout(q_emb,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)
            c_emb = tf.reshape(c_emb, [N, PL, config.embedding_size])
            q_emb = tf.reshape(q_emb, [N, QL, config.embedding_size])
            #     c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
            #     q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)
            # c_emb = tf.concat([c_emb, ch_emb], axis=2)
            # q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            # 1层 lstm对输出进行编码
            rnn_c = gru(num_layers=1,
                        num_units=d,
                        batch_size=N,
                        input_size=c_emb.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
            rnn_q = gru(num_layers=1,
                        num_units=d,
                        batch_size=N,
                        input_size=q_emb.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
            c = rnn_c(c_emb, seq_len=self.c_len)
            q = rnn_q(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            M = tf.matmul(c, q, adjoint_b=True)
            M_mask = tf.to_float(
                tf.matmul(tf.cast(tf.expand_dims(self.c_mask, -1), tf.int32),
                          tf.cast(tf.expand_dims(self.q_mask, 1), tf.int32)))
            alpha = softmax(M, 1, M_mask)  # (batch_size,M,N)
            beta = softmax(M, 2, M_mask)  # (batch_size,M,N)
            # query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1)
            query_importance = tf.expand_dims(
                tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(PL, -1)),
                -1)  # (batch_size,N,1)
            s = tf.squeeze(tf.matmul(alpha, query_importance),
                           [2])  # (batch_size,M)
            #unpacked_s = zip(tf.unstack(s, config.batch_size), tf.unstack(self.c, config.batch_size))
            #y_hat=(batch_size,config.vocab_size)  (代表每个词为答案的概率)
            #y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, config.vocab_size) for (attentions, sentence_ids) in unpacked_s])
            match = c * tf.reshape(s, [-1, PL, 1])  #(batch_size,max_c_len,dim)
        #通过embedding q 获得rQ
        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            self.start_logits = tf.nn.softmax(logits1)
            self.stop_logits = tf.nn.softmax(logits2)
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(
                losses + losses2) + config.l2_reg * self.regularizer
Exemplo n.º 22
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope('emb'):
            with tf.variable_scope('char'):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope('word'):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope('encoding'):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope('attention'):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope('match'):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope('pointer'):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope('predict'):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 23
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = \
            config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, \
            config.char_dim, config.char_hidden
        gru = CudnnGRU if config.use_cudnn else NativeGRU

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)  # representation of paragraph
            q = rnn(q_emb, seq_len=self.q_len)  # representation of question

        with tf.variable_scope(
                "attention"
        ):  # gated att rnn (using dot att from Attention is All You Need actually)
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):  # self-matching rnn
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = PointerNet(batch=N,
                                 hidden=init.get_shape().as_list()[-1],
                                 keep_prob=config.ptr_keep_prob,
                                 is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 24
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        gi = []
        att_vP = []
        for i in range(config.max_para):
            with tf.variable_scope("emb"):
                with tf.variable_scope("char"):
                    ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                     self.char_mat, self.pr_ch), [N * PL, CL, dc])
                    #	self.char_mat, self.ch), [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

                with tf.name_scope("word"):
                    c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                    q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            with tf.variable_scope("encoding"):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=c_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                c = rnn(c_emb, seq_len=self.c_len)
                q = rnn(q_emb, seq_len=self.q_len)

            with tf.variable_scope("attention"):
                qc_att = dot_attention(c,
                                       q,
                                       mask=self.q_mask,
                                       hidden=d,
                                       keep_prob=config.keep_prob,
                                       is_train=self.is_train)
                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=qc_att.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                att = rnn(qc_att, seq_len=self.c_len)
                # att is the v_P
                att_vP.append(att)
            """
			with tf.variable_scope("match"):
				self_att = dot_attention(
					att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
				rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				match = rnn(self_att, seq_len=self.c_len)
			"""
            with tf.variable_scope("pointer"):

                # r_Q:
                init = summ(q[:, :, -2 * d:],
                            d,
                            mask=self.q_mask,
                            keep_prob=config.ptr_keep_prob,
                            is_train=self.is_train)

                pointer = ptr_net(batch=N,
                                  hidden=init.get_shape().as_list()[-1],
                                  keep_prob=config.ptr_keep_prob,
                                  is_train=self.is_train)
                logits1, logits2 = pointer(init, att, d, self.c_mask)

            with tf.variable_scope("predict"):
                outer = tf.matmul(
                    tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                    tf.expand_dims(tf.nn.softmax(logits2), axis=1))
                outer = tf.matrix_band_part(outer, 0, 15)
                self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
                self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits1, labels=self.y1)
                losses2 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits2, labels=self.y2)
                self.loss = tf.reduce_mean(losses + losses2)

                # print losses
                #condition = tf.greater(self.loss, 11)
                #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
                #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        for i in range(config.max_para):
            # Passage ranking
            with tf.variable_scope("passage-ranking-attention"):
                vj_P = dropout(att, keep_prob=keep_prob, is_train=is_train)
                r_Q = dropout(init, keep_prob=keep_prob, is_train=is_train)
                r_P = attention(r_Q,
                                vj_P,
                                mask=self.c_mask,
                                hidden=d,
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)

                #rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=pr_att.get_shape(
                #).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
                #att_rp = rnn(qc_att, seq_len=self.c_len)

                # Wg
                concatenate = tf.concat([init, att_rp], axis=2)
                g = tf.nn.tanh(
                    dense(concatenate, hidden=d, use_bias=False, scope="g"))
                g_ = dense(g, 1, use_bias=False, scope="g_")
                gi.append(g_)
        gi_ = tf.convert_to_tensor(gi)
        gi = tf.nn.softmax(gi_)
        self.pr_loss = tf.nn.softmax_cross_entropy_with_logits(logits=gi,
                                                               labels=self.pr)
    def ready(self):
        config = self.config
        N, PL, QL, d = config.batch_size, self.c_maxlen, self.q_maxlen, config.hidden
        keep_prob, is_train = config.keep_prob, config.is_train
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.name_scope("word"):
                c = tf.nn.embedding_lookup(self.word_mat, self.c)
                q = tf.nn.embedding_lookup(self.word_mat, self.q)
            c_emb = tf.concat([c, self.fs, self.fe], axis=2)
            q_emb = q

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c_enc, bw_final_state_c = rnn(c_emb, seq_len=self.c_len)
            q_enc, bw_final_state_q = rnn(q_emb, seq_len=self.q_len)

            encoder_outputs = tf.concat([c_enc, q_enc], axis=1)
            bw_final_state = (bw_final_state_c, bw_final_state_q)

        with tf.variable_scope("attention"):
            bi_final_hidden = dropout(bw_final_state,
                                      keep_prob=keep_prob,
                                      is_train=is_train)
            source_sequence_length = tf.add(PL, QL)

            logits, sample_id, final_context_state = _build_decoder(
                encoder_outputs, bi_final_hidden, config, is_train,
                source_sequence_length, target_sequence_length, target_input,
                embedding_decoder)
            """
			
			qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
				keep_prob=config.keep_prob, is_train=self.is_train,
				name_scope="attention_layer")
			rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
			).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
			att = rnn(qc_att, seq_len=self.c_len)
			# att is the v_P
			if i==0:
				att_vP = att
			else:
				att_vP = tf.concat([att_vP, att], axis=1)
			#att = tf.Print(att,[att],message="att:")
			print("att:",att.get_shape().as_list())
			print("att_vP:",att_vP.get_shape().as_list())
			"""

        with tf.variable_scope("pointer"):

            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            print("rQ:", init.get_shape().as_list())
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, att_vP, d, self.c_pr_mask)
            tf.summary.histogram('rQ_init', init)
            tf.summary.histogram('pointer_logits_1', logits1)
            tf.summary.histogram('pointer_logits_2', logits2)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1_pr)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2_pr)
            #losses1_2 = tf.reduce_mean(losses1_2, axis=0)
            self.loss = tf.reduce_mean(losses + losses2)

            # print losses
            #condition = tf.greater(self.loss, 11)
            #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
            #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        if config.with_passage_ranking:
            gi = None
            for i in range(config.max_para):
                # Passage ranking
                if i == 0:
                    with tf.variable_scope("passage-ranking-attention"):

                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg
                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
                else:
                    with tf.variable_scope("passage-ranking-attention",
                                           reuse=True):
                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg

                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
            tf.summary.histogram('gi', gi)
            #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32)
            #self.gi = tf.nn.softmax(gi_)
            #self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
            #			logits=gi_, labels=tf.reshape(self.pr,[-1,1]))
            self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
                logits=gi, labels=self.pr)
            #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
            #	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
            self.pr_loss = tf.reduce_mean(self.losses3)
            #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
            self.r = tf.constant(0.8)
            self.e_loss1 = tf.multiply(self.r, self.loss)
            self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r),
                                       self.pr_loss)
            self.e_loss = tf.add(self.e_loss1, self.e_loss2)
    def get_vP(self, i, att_vP, q_, answer_info, y1, y2, c_pr_mask, cmax_c,
               clen_c):
        # max para limit
        config = self.config

        opt = True
        MPL = config.para_limit
        zero = tf.constant(0, dtype=tf.int32)
        j = tf.constant(0, dtype=tf.int32)

        c = self.c_pr[:, i * MPL:(i + 1) * MPL]
        ch = self.ch_pr[:, i * MPL:(i + 1) * MPL, :]
        qh = self.qh
        q = self.q

        c_mask = tf.cast(c, tf.bool)
        q_mask = self.q_mask

        # passage ranking line:
        #self.pr_mask = tf.cast(self.p, tf.bool)

        c_len = tf.reduce_sum(tf.cast(c_mask, tf.int32), axis=1)
        c_len_int = tf.reshape(c_len, [config.batch_size, 1])
        q_len = self.q_len

        if opt:
            N, CL = config.batch_size, config.char_limit
            c_maxlen = tf.reduce_max(c_len)
            c_maxlen_int = tf.reshape(tf.reduce_max(c_len_int), [1])
            q_maxlen = q_len
            c = tf.slice(c, [0, 0], [N, c_maxlen])
            c_mask = tf.slice(c_mask, [0, 0], [N, c_maxlen])
            q_mask = self.q_mask
            ch = tf.slice(ch, [0, 0, 0], [N, c_maxlen, CL])
            qh = self.qh

            temp = self.y2[:, i * MPL:(i + 1) * MPL]
            #self.y1 = tf.Print(self.y1,["y1:",tf.shape(self.y1)])
            #self.y2 = tf.Print(self.y2,["y2:",tf.shape(self.y2)])
            y1__ = tf.slice(self.y1, [0, i * MPL], [N, c_maxlen])
            #y1__ = tf.Print(y1__,["y1__:",tf.shape(y1__)])

            y2__ = tf.slice(self.y2, [0, i * MPL], [N, c_maxlen])

            def b1():
                return c_mask

            def b2():
                return tf.concat([c_pr_mask, c_mask], axis=1)

            c_pr_mask = tf.cond(tf.equal(i, zero), b1, b2)

            def b3():
                return c_maxlen_int, c_len_int

            def b4():
                print(clen_c.get_shape(), c_len_int.get_shape())
                a = tf.concat([cmax_c, c_maxlen_int], axis=0)
                b = tf.concat([clen_c, c_len_int], axis=1)
                return a, b

            cmax_c, clen_c = tf.cond(tf.equal(i, zero), b3, b4)
            # passage ranking
            #print(self.ch_pr.get_shape())
            #print(self.c_pr.get_shape())
            #c_pr_mask = tf.cast(self.c_pr, tf.bool)
            #c_pr_mask = tf.slice(self.c_pr_mask, [0, i*MPL], [N, c_maxlen])
            ###
            ###
            #ch_pr = tf.slice(self.ch_pr, [0, i*MPL, 0], [N, c_maxlen, CL])
        else:
            self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

        ch_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(ch, tf.bool), tf.int32), axis=2),
            [-1])
        qh_len = self.qh_len

        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, c_maxlen, self.q_maxlen, \
         config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn_gru else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                #CL = tf.Print(CL,[CL],message="CL:")
                #PL = tf.Print(PL,[PL],message="PL:")
                #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr.get_shape()],message="ch_pr:")
                #self.c_pr = tf.reshape(self.c_pr, [N, 12, PL])
                #print(self.ch.get_shape())
                #print(self.ch_pr.get_shape())
                #print(self.c.get_shape())
                #print(self.c_pr.get_shape())
                #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr[:,2:,:]],message="ch_pr")
                ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                 self.char_mat, ch), [N * PL, CL, dc])
                #	self.char_mat, self.ch), [N * PL, CL, dc])
                print(ch.shape, PL)
                print(qh.shape, QL)
                qh_emb = tf.reshape(tf.nn.embedding_lookup(\
                 self.char_mat, qh), [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
                #qh_emb = tf.Print(qh_emb,[qh_emb],message="qh_emb")
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    self.cell_fw,
                    self.cell_bw,
                    ch_emb,
                    ch_len,
                    dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    self.cell_fw,
                    self.cell_bw,
                    qh_emb,
                    qh_len,
                    dtype=tf.float32)
                #state_fw = tf.Print(state_fw,[state_fw],message="state_fw")
                #state_bw = tf.Print(state_bw,[state_bw],message="state_bw")
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
                #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding", reuse=tf.AUTO_REUSE):
            """
			def f1():
				self.rnn1 = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				return self.rnn1(c_emb, seq_len=self.c_len)
			def f2():
				return self.rnn1(c_emb, seq_len=self.c_len)
			c = tf.cond(tf.equal(i, zero), f1, f2)
			#q = tf.cond(tf.equal(i, zero), f1, f2)
			#c = rnn(c_emb, seq_len=self.c_len)
			q = self.rnn1(q_emb, seq_len=self.q_len)
			self.q_enc = q
			#self.rnn1 = rnn
			"""
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            c = rnn(c_emb, seq_len=c_len)
            q = rnn(q_emb, seq_len=q_len)
            #c_len = tf.Print(c_len,[c_len,tf.shape(c)],message="C:")
            #self.q_enc = q
            q__ = q

        with tf.variable_scope("attention", reuse=tf.AUTO_REUSE):
            qc_att = dot_attention(c,
                                   q,
                                   mask=q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   name_scope="attention_layer")
            """
			print("qc_att:",qc_att.shape)
			def f3():
				self.rnn2 = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				return self.rnn2(qc_att, seq_len=self.c_len)
			def f4():
				return self.rnn2(qc_att, seq_len=self.c_len)
			att = tf.cond(tf.equal(self.i, zero), f3, f4)
			"""
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=c_len)

            ###
            #att = tf.Print(att,[tf.greater(tf.cast(tf.shape(att)[1],tf.int64),y1_),
            #	tf.shape(att)],message="att:")
            def f5():
                return att

            def f6():
                return tf.concat([att_vP, att], axis=1)

            #att = rnn(qc_att, seq_len=self.c_len)
            #self.rnn2 = rnn
            # att is the v_P
            att_vP = tf.cond(tf.equal(i, zero), f5, f6)

        def f7():
            return y1__, y2__

        def f8():
            return tf.concat([y1, y1__], axis=1), tf.concat([y2, y2__], axis=1)

        y1, y2 = tf.cond(tf.equal(i, zero), f7, f8)

        return tf.add(i, tf.constant(
            1)), att_vP, q__, answer_info, y1, y2, c_pr_mask, cmax_c, clen_c
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   name_scope="attention_layer")
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)
            tf.summary.histogram('vt_P', att)
            self.att_logits = tf.get_collection('Softmax_logits')[0]
            self.att_outputs = tf.get_collection('MatMul_outputs')[0]

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train,
                                     name_scope="match_layer")
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)
            tf.summary.histogram('self_match', match)
            self.match_logits = tf.get_collection('Softmax_logits')[1]
            self.match_outputs = tf.get_collection('MatMul_outputs')[1]

        with tf.variable_scope("pointer"):
            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)

            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)
            tf.summary.histogram('rQ_init', init)
            tf.summary.histogram('pointer_logits_1', logits1)
            tf.summary.histogram('pointer_logits_2', logits2)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
            ####
            self.predict_outer_start = tf.reduce_max(outer, axis=2)
            self.predict_outer_end = tf.reduce_max(outer, axis=1)
            """
Exemplo n.º 28
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits1, labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 29
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = native_rnn

        c_elmo_features = self.elmo(self.c_elmo)
        q_elmo_features = self.elmo(self.q_elmo)

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_elmo_emb = weight_layers('embedding',
                                       c_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']
            tf.get_variable_scope().reuse_variables()
            q_elmo_emb = weight_layers('embedding',
                                       q_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']

            c_elmo_emb = dropout(c_elmo_emb,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)
            q_elmo_emb = dropout(q_elmo_emb,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)

            c_emb = tf.concat([c_emb, ch_emb, c_elmo_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb, q_elmo_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(config.cell,
                      num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(config.cell,
                      num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(config.cell,
                      num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

            c_elmo_enc = weight_layers('encoding',
                                       c_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']
            tf.get_variable_scope().reuse_variables()
            q_elmo_enc = weight_layers('encoding',
                                       q_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']

            c_elmo_enc = dropout(c_elmo_enc,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)
            q_elmo_enc = dropout(q_elmo_enc,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)

            match = tf.concat([match, c_elmo_enc], -1)
            q = tf.concat([q, q_elmo_enc], -1)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
Exemplo n.º 30
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            self.c_emb = c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            bad_c_emb = tf.stop_gradient(c_emb)
            bad_q_emb = tf.stop_gradient(q_emb)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=bad_c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            self.c_rnn = rnn(bad_c_emb, seq_len=self.c_len)
            self.q_rnn = rnn(bad_q_emb, seq_len=self.q_len)

            badptr_c = tf.stop_gradient(self.c_rnn)
            badptr_q = tf.stop_gradient(self.q_rnn)
            old_rnn = rnn

        with tf.variable_scope("badptr_attention"):
            qc_att, self.badptr_qc_att = dot_attention(
                badptr_c,
                badptr_q,
                mask=self.q_mask,
                hidden=d,
                keep_prob=config.keep_prob,
                is_train=self.is_train,
                give=True)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            self.att = [rnn(qc_att, seq_len=self.c_len)]
            self.att += [self.att[-1][:, -1, :]]

        with tf.variable_scope("badptr_dense"):
            for _ in range(3):
                self.att += [
                    tf.nn.dropout(tf.keras.layers.Dense(300)(self.att[-1]),
                                  keep_prob=config.keep_prob)
                ]

        with tf.variable_scope("badptr"):
            init = self.att[-1]
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, self.att[0], d, self.c_mask)

        with tf.variable_scope("badptr_predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.bad_yp1_distrib = tf.reduce_max(outer, axis=2)
            self.bad_yp2_distrib = tf.reduce_max(outer, axis=1)
            self.bad_yp1 = tf.argmax(self.bad_yp1_distrib, axis=1)
            self.bad_yp2 = tf.argmax(self.bad_yp2_distrib, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.bad_y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.bad_y2))
            self.loss = tf.reduce_mean(losses + losses2)

        # recompute c with bitmask
        left = tf.sequence_mask(self.bad_yp1, tf.shape(c_emb)[1])
        right = tf.logical_not(
            tf.sequence_mask(self.bad_yp2 + 1,
                             tf.shape(c_emb)[1]))
        self.combo = combo = tf.logical_or(left, right)

        ### FOR TESTING ###
        ## self.combo = combo = tf.cast(tf.ones_like(combo), tf.bool)

        def adjust(c_emb_combo):
            c_emb, combo = c_emb_combo
            foo = c_emb
            bar = tf.boolean_mask(foo, combo)

            return tf.cond(
                tf.logical_and(tf.equal(combo[0], False),
                               tf.equal(combo[1], True)),
                false_fn=lambda: tf.pad(
                    bar, [[0, tf.shape(foo)[0] - tf.shape(bar)[0]], [0, 0]]),
                true_fn=lambda: foo)

        self.c_emb_new = c_emb_new = tf.map_fn(adjust, (c_emb, combo),
                                               dtype=(tf.float32))
        self.c_len = tf.reduce_sum(tf.cast(
            tf.logical_and(self.c_mask, self.combo), tf.int32),
                                   axis=-1)
        self.c_mask = tf.sequence_mask(
            tf.reduce_sum(tf.cast(tf.logical_and(self.c_mask, self.combo),
                                  tf.int32),
                          axis=-1),
            tf.shape(self.c_mask)[1])

        with tf.variable_scope("encoding", reuse=True):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train,
                      super_hacky_reload=True)
            #### SEQ LEN HAS TO BE FIXED!!!! ####
            c = rnn(c_emb_new, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        self.c_ck = c
        self.q_ck = c

        ### MAKE SURE THESE ARE RUN!!! ###
        print('RUN ASSIGN TRICK OPS (model.assign_trick_ops)!!')
        self.assign_trick_ops = []
        for i in range(len(rnn.init_fw)):
            self.assign_trick_ops += [
                tf.assign(rnn.init_fw[i], old_rnn.init_fw[i])
            ]
            self.assign_trick_ops += [
                tf.assign(rnn.init_bw[i], old_rnn.init_bw[i])
            ]

        with tf.variable_scope("attention"):
            qc_att, self.qc_att = dot_attention(c,
                                                q,
                                                mask=self.q_mask,
                                                hidden=d,
                                                keep_prob=config.keep_prob,
                                                is_train=self.is_train,
                                                give=True)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        self.att_ck = att

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        self.match_ck = match

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1_distrib = tf.reduce_max(outer, axis=2)
            self.yp2_distrib = tf.reduce_max(outer, axis=1)
            self.yp1 = tf.argmax(self.yp1_distrib, axis=1)
            self.yp2 = tf.argmax(self.yp2_distrib, axis=1)
Exemplo n.º 31
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                _, qh_emb = stacked_gru(qh_emb,
                                        dg,
                                        num_layers=1,
                                        seq_len=self.qh_len,
                                        keep_prob=self.keep_prob,
                                        is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                _, ch_emb = stacked_gru(ch_emb,
                                        dg,
                                        num_layers=1,
                                        seq_len=self.ch_len,
                                        keep_prob=self.keep_prob,
                                        is_train=self.is_train)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            c, _ = stacked_gru(c_emb,
                               d,
                               batch=N,
                               num_layers=3,
                               seq_len=self.c_len,
                               keep_prob=self.keep_prob,
                               is_train=self.is_train)
            tf.get_variable_scope().reuse_variables()
            q, _ = stacked_gru(q_emb,
                               d,
                               batch=N,
                               num_layers=3,
                               seq_len=self.q_len,
                               keep_prob=self.keep_prob,
                               is_train=self.is_train)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=self.keep_prob,
                                   is_train=self.is_train)
            att, _ = stacked_gru(qc_att,
                                 d,
                                 num_layers=1,
                                 seq_len=self.c_len,
                                 keep_prob=self.keep_prob,
                                 is_train=self.is_train)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=self.keep_prob,
                                     is_train=self.is_train)
            match, _ = stacked_gru(self_att,
                                   d,
                                   num_layers=1,
                                   seq_len=self.c_len,
                                   keep_prob=self.keep_prob,
                                   is_train=self.is_train)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=self.ptr_keep_prob,
                        is_train=self.is_train)
            d_match = dropout(match,
                              keep_prob=self.ptr_keep_prob,
                              is_train=self.is_train)
            hidden = init.get_shape().as_list()[-1]
            cell_fw = GRUCell(hidden)
            cell_bw = GRUCell(hidden)
            with tf.variable_scope("fw"):
                inp, logits1_fw = pointer(d_match, init, d, mask=self.c_mask)
                _, state = cell_fw(inp, init)
                tf.get_variable_scope().reuse_variables()
                _, logits2_fw = pointer(d_match, state, d, mask=self.c_mask)
            with tf.variable_scope("bw"):
                inp, logits2_bw = pointer(d_match, init, d, mask=self.c_mask)
                _, state = cell_bw(inp, init)
                tf.get_variable_scope().reuse_variables()
                _, logits1_bw = pointer(d_match, state, d, mask=self.c_mask)
            logits1 = (logits1_fw + logits1_bw) / 2.
            logits2 = (logits2_fw + logits2_bw) / 2.

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)