示例#1
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.xloc, self.yloc, self.m, self.num_batch = get_batch_data(
                )  # (N, T)
            else:  # inference
                self.x = tf.placeholder(tf.int32, shape=(None, hp.x_maxlen))
                self.y = tf.placeholder(tf.int32, shape=(None, hp.y_maxlen))
                self.xloc = tf.placeholder(tf.int32, shape=(None, hp.x_maxlen))
                self.yloc = tf.placeholder(tf.int32, shape=(None, hp.y_maxlen))
                self.m = tf.placeholder(tf.int32, shape=(None, hp.x_maxlen))

            # define decoder inputs
            self.decoder_inputs = tf.concat(
                (tf.ones_like(self.y[:, :1]) * 2, self.y[:, :-1]), -1)  # 2:<S>

            # Load vocabulary
            src2idx, idx2src = load_src_vocab()
            des2idx, idx2des = load_des_vocab()

            self.hidden_units = hp.hidden_units

            # Encoder
            with tf.variable_scope("encoder"):
                ## Embedding
                self.enc = embedding(self.x,
                                     vocab_size=len(src2idx),
                                     num_units=self.hidden_units,
                                     scale=True,
                                     scope="enc_embed")
                clue_level = tf.random_poisson(shape=[1],
                                               lam=1,
                                               dtype=tf.int32)
                #clue_level = tf.Print(clue_level, [clue_level])
                #self.enc_mask = tf.expand_dims(tf.cast(tf.equal(self.m, 1), tf.float32), 2)
                self.enc_mask = tf.expand_dims(
                    tf.cast(
                        tf.logical_and(tf.greater_equal(self.m, 1),
                                       tf.less_equal(self.m, clue_level)),
                        tf.float32), 2)
                self.enc = tf.concat([self.enc, self.enc_mask], axis=2)
                self.hidden_units += 1

                ## Positional Encoding
                if hp.sinusoid:
                    self.enc += positional_encoding(
                        self.x,
                        num_units=self.hidden_units,
                        zero_pad=False,
                        scale=False,
                        scope="enc_pe")
                else:
                    self.enc += embedding(tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0),
                        [tf.shape(self.x)[0], 1]),
                                          vocab_size=hp.x_maxlen,
                                          num_units=self.hidden_units,
                                          zero_pad=False,
                                          scale=False,
                                          scope="enc_pe")

                tf.add_to_collection('explain_input', self.enc)

                ## Dropout
                self.enc = tf.layers.dropout(
                    self.enc,
                    rate=hp.dropout_rate,
                    training=tf.convert_to_tensor(is_training))

                ## Blocks
                for i in range(hp.num_blocks):
                    with tf.variable_scope("num_blocks_{}".format(i)):
                        ### Multihead Attention
                        self.enc = multihead_attention(
                            queries=self.enc,
                            keys=self.enc,
                            num_units=self.hidden_units,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            causality=False)

                        ### Feed Forward
                        self.enc = feedforward(self.enc,
                                               num_units=[
                                                   4 * self.hidden_units,
                                                   self.hidden_units
                                               ])

            # Decoder
            with tf.variable_scope("decoder"):
                ## Embedding
                self.dec = embedding(self.decoder_inputs,
                                     vocab_size=len(des2idx),
                                     num_units=self.hidden_units,
                                     scale=True,
                                     scope="dec_embed")

                ## Positional Encoding
                if hp.sinusoid:
                    self.dec += positional_encoding(
                        self.decoder_inputs,
                        vocab_size=hp.y_maxlen,
                        num_units=self.hidden_units,
                        zero_pad=False,
                        scale=False,
                        scope="dec_pe")
                else:
                    self.dec += embedding(tf.tile(
                        tf.expand_dims(
                            tf.range(tf.shape(self.decoder_inputs)[1]), 0),
                        [tf.shape(self.decoder_inputs)[0], 1]),
                                          vocab_size=hp.y_maxlen,
                                          num_units=self.hidden_units,
                                          zero_pad=False,
                                          scale=False,
                                          scope="dec_pe")

                tf.add_to_collection('explain_input', self.dec)
                ## Dropout
                self.dec = tf.layers.dropout(
                    self.dec,
                    rate=hp.dropout_rate,
                    training=tf.convert_to_tensor(is_training))

                ## Blocks
                for i in range(hp.num_blocks):
                    with tf.variable_scope("num_blocks_{}".format(i)):
                        ## Multihead Attention ( self-attention)
                        self.dec = multihead_attention(
                            queries=self.dec,
                            keys=self.dec,
                            num_units=self.hidden_units,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            causality=True,
                            scope="self_attention")

                        ## Multihead Attention ( vanilla attention)
                        self.dec = multihead_attention(
                            queries=self.dec,
                            keys=self.enc,
                            num_units=self.hidden_units,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            causality=False,
                            scope="vanilla_attention")

                        ## Feed Forward
                        with tf.variable_scope(
                                "num_blocks_fc_dec_{}".format(i)):
                            self.dec = feedforward(self.dec,
                                                   num_units=[
                                                       4 * self.hidden_units,
                                                       self.hidden_units
                                                   ])

            self.loc_enc = self.enc
            self.loc_logits = attention_matrix(queries=self.loc_enc,
                                               keys=self.dec,
                                               num_units=self.hidden_units,
                                               dropout_rate=hp.dropout_rate,
                                               is_training=is_training,
                                               causality=False,
                                               scope="copy_matrix")

            xloc_vec = tf.one_hot(self.xloc,
                                  depth=hp.y_maxlen,
                                  dtype=tf.float32)
            yloc_vec = tf.one_hot(self.yloc,
                                  depth=hp.y_maxlen,
                                  dtype=tf.float32)
            loc_label = tf.matmul(yloc_vec, tf.transpose(xloc_vec, [0, 2, 1]))
            self.loc_label_history = tf.cumsum(loc_label,
                                               axis=1,
                                               exclusive=True)

            # Final linear projection
            self.loc_logits = tf.transpose(self.loc_logits, [0, 2, 1])

            self.loc_logits = tf.stack(
                [self.loc_logits, self.loc_label_history], axis=3)
            self.loc_logits = tf.squeeze(tf.layers.dense(self.loc_logits, 1),
                                         axis=[3])

            x_masks = tf.tile(tf.expand_dims(tf.equal(self.x, 0), 1),
                              [1, hp.y_maxlen, 1])
            #y_masks = tf.tile(tf.expand_dims(tf.equal(self.y, 0), -1), [1, 1, hp.x_maxlen])
            paddings = tf.ones_like(self.loc_logits) * (-1e6)
            self.loc_logits = tf.where(x_masks, paddings,
                                       self.loc_logits)  # (N, T_q, T_k)
            #self.loc_logits = tf.where(y_masks, paddings, self.loc_logits) # (N, T_q, T_k)
            self.logits = tf.layers.dense(self.dec, len(des2idx))
            self.final_logits = tf.concat([self.logits, self.loc_logits],
                                          axis=2)
            tf.add_to_collection('explain_output', self.final_logits)
            #self.final_logits = tf.Print(self.final_logits, [self.final_logits[0][0][-3:]], message="final_logits_last")
            #self.final_logits = tf.Print(self.final_logits, [self.final_logits[0][0][:3]], message="final_logits_first")

            self.preds = tf.to_int32(tf.argmax(self.final_logits, axis=-1))
            self.istarget = tf.to_float(tf.not_equal(self.y, 0))

            if is_training:
                label = tf.one_hot(self.y,
                                   depth=len(des2idx),
                                   dtype=tf.float32)
                # A special case, when copy is open, we should not need unk label
                unk_pos = label[:, :, 1]
                copy_pos = tf.sign(tf.reduce_sum(loc_label, axis=2))
                fix_pos = unk_pos * copy_pos
                #fix_pos = tf.Print(fix_pos, [tf.reduce_sum(unk_pos, axis=-1), tf.shape(unk_pos)], message="\nunk_pos", summarize=16)
                #fix_pos = tf.Print(fix_pos, [tf.reduce_sum(fix_pos, axis=-1), tf.shape(fix_pos)], message="\nfix_pos", summarize=16)
                fix_label = tf.expand_dims(label[:, :, 1] - fix_pos, axis=2)
                label = tf.concat(
                    [label[:, :, :1], fix_label, label[:, :, 2:]], axis=-1)

                self.final_label = tf.concat([label, loc_label], axis=2)
                #self.final_label = tf.Print(self.final_label, [self.final_label[0][0][-3:]], message="final_label")
                # Loss
                self.min_logit_loc = min_logit_loc = tf.argmax(
                    self.final_logits + (-1e6) * (1.0 - self.final_label),
                    axis=-1)
                #min_logit_loc = tf.Print(min_logit_loc, [min_logit_loc[0]], message="min_logit_loc")
                self.min_label = tf.one_hot(min_logit_loc,
                                            depth=len(des2idx) + hp.x_maxlen,
                                            dtype=tf.float32)

                vocab_count = len(des2idx) + hp.x_maxlen - tf.reduce_sum(
                    tf.cast(tf.equal(self.x, 0), dtype=tf.int32), axis=-1)
                #vocab_count = tf.Print(vocab_count, [vocab_count[0]], message="vocab_count")
                self.y_smoothed = label_smoothing_mask(self.min_label,
                                                       vocab_count)
                #self.final_logits = tf.Print(self.final_logits, [self.final_logits[0][1][min_logit_loc[0][1]]], message="final_logits")
                #self.y_smoothed = tf.Print(self.y_smoothed, [self.y_smoothed[0][1][min_logit_loc[0][1]]], message="y_smoothed")
                self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.final_logits, labels=self.y_smoothed)
                #self.loss = tf.Print(self.loss, [self.final_label[0][1][min_logit_loc[0][1]]], message="final_label")
                #self.loss = tf.Print(self.loss, [self.loss[0][-3:]], message="loss_last")
                #self.loss = tf.Print(self.loss, [self.loss[0][:3]], message="loss_first")
                self.mean_loss = tf.reduce_sum(
                    self.loss * self.istarget) / (tf.reduce_sum(self.istarget))

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
示例#2
0
def eval(stage='test', checkpoint_file=None, is_dedup=False, clue_level=1):
    # Load graph
    g = Graph(is_training=False, clue_level=clue_level)
    print("Graph loaded")

    # Load data
    if stage == 'test':
        X, XLoc, M, Sources, Targets = load_test_data()
    else:
        X, XLoc, M, Sources, Targets = load_dev_data()

    src2idx, idx2src = load_src_vocab()
    des2idx, idx2des = load_des_vocab()

    # X, Sources, Targets = X[:33], Sources[:33], Targets[:33]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    num_gen = 0
    num_copy = 0
    num_unk_copy = 0
    max_batch = 10

    # Start session
    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=config) as sess:
            if not checkpoint_file:
                checkpoint_file = tf.train.latest_checkpoint(hp.logdir)

            ## Restore parameters
            sv.saver.restore(sess, checkpoint_file)
            print("Restored! {}".format(checkpoint_file))

            ## Get model name
            #mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
            mname = checkpoint_file.split('/')[1]

            ## Inference
            if not os.path.exists(result_dir): os.mkdir(result_dir)
            with codecs.open(
                    result_dir + "/" + mname + '.level{}'.format(clue_level) +
                    '.' + stage, "w", "utf-8") as fout:
                list_of_refs, hypotheses = [], []
                for i in tqdm(range(min(max_batch, len(X) // hp.batch_size))):

                    ### Get mini-batches
                    x = X[i * hp.batch_size:(i + 1) * hp.batch_size]
                    xloc = XLoc[i * hp.batch_size:(i + 1) * hp.batch_size]
                    m = M[i * hp.batch_size:(i + 1) * hp.batch_size]
                    sources = Sources[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]
                    targets = Targets[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]

                    ### Autoregressive inference
                    preds = np.zeros((hp.batch_size, hp.y_maxlen), np.int32)
                    preds_unk = np.zeros((hp.batch_size, hp.y_maxlen),
                                         np.int32)
                    preds_xloc = np.zeros(
                        (hp.batch_size, hp.x_maxlen), np.int32) - 1
                    preds_yloc = np.zeros(
                        (hp.batch_size, hp.y_maxlen), np.int32) - 1
                    for j in range(hp.y_maxlen):
                        _preds, loc_logits = sess.run(
                            [g.preds, g.loc_logits], {
                                g.x: x,
                                g.y: preds_unk,
                                g.m: m,
                                g.xloc: preds_xloc,
                                g.yloc: preds_yloc
                            })
                        preds[:, j] = _preds[:, j]

                        preds_unk[:, j] = _preds[:, j]
                        preds_unk[preds_unk >= len(idx2des)] = 1

                        for i in range(hp.batch_size):
                            xloc = np.zeros(hp.x_maxlen, dtype=np.int32) - 1
                            yloc = np.zeros(hp.y_maxlen, dtype=np.int32) - 1

                            source_words = sources[i].split()
                            target_words = []
                            for idx in preds[i]:
                                if idx in idx2des:
                                    target_words.append(idx2des[idx])
                                elif idx - len(idx2des) == len(source_words):
                                    target_words.append('</S>')
                                else:
                                    cp_word_idx = idx - len(idx2des)
                                    cp_word = source_words[cp_word_idx]
                                    target_words.append(cp_word)
                            source_sent_np = np.array(source_words)
                            target_sent_np = np.array(target_words)
                            source_wset = set(source_words)
                            target_wset = set(target_words)
                            for loc_id, w in enumerate(target_wset
                                                       & source_wset):
                                xloc[np.where(source_sent_np == w)] = loc_id
                                yloc[np.where(target_sent_np == w)] = loc_id
                            preds_xloc[i] = xloc
                            preds_yloc[i] = yloc
                        #print(loc_logits.shape)
                        #print(loc_logits[0][j][:20])
                        #input()

                    ### Write to file
                    for source, target, m_, pred in zip(
                            sources, targets, m, preds):  # sentence-wise
                        got_display = []
                        got = []
                        source_words = np.array(source.split())
                        for idx in pred:
                            if idx in idx2des:
                                num_gen += 1
                                got.append(idx2des[idx])
                                got_display.append(idx2des[idx] +
                                                   '[{}]'.format(idx))
                            else:
                                num_copy += 1
                                cp_word_idx = idx - len(idx2des)
                                cp_word = source_words[cp_word_idx]
                                got.append(cp_word)
                                got_display.append(cp_word + '[{},{}]'.format(
                                    cp_word_idx, m_[cp_word_idx]))
                                if cp_word not in des2idx:
                                    num_unk_copy += 1

                        if is_dedup:
                            got = remove_dup(got)
                            got_display = remove_dup(got_display)

                        got = " ".join(got).split("</S>")[0].strip()
                        got_display = " ".join(got_display).split(
                            "</S>")[0].strip()
                        #if got.count('</S>'):
                        #    last_char = got.index('</S>')
                        #else:
                        #    last_char = len(got)
                        #got = " ".join(got[:last_char+1])
                        #got_display = " ".join(got_display)
                        fout.write("- source: " + source + "\n")
                        fout.write("- expected: " + target + "\n")
                        fout.write("- got: " + got + "\n")
                        fout.write("- analyse: " + got_display + "\n\n")
                        fout.flush()

                        # bleu score
                        ref = target.split()
                        hypothesis = got.split()
                        if len(ref) > 3 and len(hypothesis) > 3:
                            list_of_refs.append([ref])
                            hypotheses.append(hypothesis)

                ## Calculate bleu score
                score = corpus_bleu(list_of_refs, hypotheses)
                fout.write("Bleu Score = " + str(100 * score) + "\n")
                score = corpus_bleu(list_of_refs,
                                    hypotheses,
                                    weights=(1, 0, 0, 0))
                fout.write("Bleu@1 Score = " + str(100 * score) + "\n")
                score = corpus_bleu(list_of_refs,
                                    hypotheses,
                                    weights=(0, 1, 0, 0))
                fout.write("Bleu@2 Score = " + str(100 * score) + "\n")
                score = corpus_bleu(list_of_refs,
                                    hypotheses,
                                    weights=(0, 0, 1, 0))
                fout.write("Bleu@3 Score = " + str(100 * score) + "\n")
                score = corpus_bleu(list_of_refs,
                                    hypotheses,
                                    weights=(0, 0, 0, 1))
                fout.write("Bleu@4 Score = " + str(100 * score) + "\n")
                fout.write("Generate / Copy / UNK Copy = {} / {} / {}".format(
                    num_gen, num_copy, num_unk_copy))
示例#3
0
                                                        epsilon=1e-8)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()


if __name__ == '__main__':
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Load vocabulary
    src2idx, idx2src = load_src_vocab()
    des2idx, idx2des = load_des_vocab()

    # Construct graph
    g = Graph("train")
    print("Graph loaded")
    #with tf.Session(graph = g.graph) as sess:
    #    tf.train.export_meta_graph('explain.meta')
    #exit()

    # Start session
    sv = tf.train.Supervisor(graph=g.graph,
                             logdir=hp.logdir,
                             save_model_secs=0)
    with sv.managed_session(config=config) as sess:
        for epoch in range(1, hp.num_epochs + 1):
            if sv.should_stop(): break