def main():
    # Load data
    print("Loading data...")
    inputH = InputHelper()

    task_num = 1
    name = "des" if task_num == 1 else "opr"

    # train_f = "./data/exp0803/training_dynamic_data.txt"
    # dev_f = "./data/exp0803/validation_dynamic_data.txt"
    # test_f = "./data/exp0803/test_dynamic_data.txt"
    # prepara_dynamic_tensor(inputH, train_f, dev_f, test_f, FLAGS.max_sequence_len, FLAGS.max_sequence_len2)

    time_gen = "0823"
    data_file = os.path.join(FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt")
    data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt")
    data_file_val = data_file_test
    # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt")
    prepara_tensor_y_seperate(inputH, data_file, data_file_val, data_file_test, "\t", FLAGS.max_sequence_len, name,
                              task_num)
def main():
    # Load data
    print("Loading data...")
    inputH = InputHelper()

    date_f = "0823"
    train_f = "./data/exp" + date_f + "/data_augment_train.txt"
    test_f = "./data/exp" + date_f + "/data_augment_test.txt"
    dev_f = test_f

    our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len) + "/"
    x_train_tensor = np.load(our_dir + "train_des.npy")
    # x_dev_tensor = np.load(our_dir + "dev_des.npy")
    x_test_tensor = np.load(our_dir + "test_des.npy")
    x_dev_tensor = x_test_tensor

    our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len2) + "/"
    x_train_tensor_o = np.load(our_dir + "train_opr.npy")
    # x_dev_tensor_o = np.load(our_dir + "dev_opr.npy")
    x_test_tensor_o = np.load(our_dir + "test_opr.npy")
    x_dev_tensor_o = x_test_tensor_o

    def normalize(a):
        amin, amax = a.min(), a.max()  # 求最大最小值
        a = (a - amin) / (amax - amin)  # (矩阵元素-最小值)/(最大值-最小值)
        return a

    def normalize_tensor(t):
        t[:, :, :, 0] = normalize(t[:, :, :, 0])
        t[:, :, :, 1] = normalize(t[:, :, :, 1])
        t[:, :, :, 2] = normalize(t[:, :, :, 2])
        t[:, :, :, 3] = normalize(t[:, :, :, 3])
        return t

    x_test_tensor[:, :, :, 3] = normalize(x_test_tensor[:, :, :, 3])
    x_train_tensor[:, :, :, 3] = normalize(x_train_tensor[:, :, :, 3])
    x_test_tensor_o[:, :, :, 3] = normalize(x_test_tensor_o[:, :, :, 3])
    x_train_tensor_o[:, :, :, 3] = normalize(x_train_tensor_o[:, :, :, 3])
    # x_test_tensor = normalize_tensor(x_test_tensor)
    # x_test_tensor_o = normalize_tensor(x_test_tensor_o)
    # x_train_tensor = normalize_tensor(x_train_tensor)
    # x_train_tensor_o = normalize_tensor(x_train_tensor_o)

    sep = "\t"
    x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels(train_f, sep,
                                                                                                 FLAGS.max_sequence_len)
    # x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels(dev_f, sep, FLAGS.max_sequence_len)
    x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels(test_f, sep,
                                                                                           FLAGS.max_sequence_len)
    x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = x1_test, x2_test, x3_test, x4_test, y_test, y2_test

    des_e_names, des_opr_map = load_coocurrence_matrix("coorrence_file.txt")
    N_default = 0.01
    co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test, N_default)
    co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train, x4_train, N_default)
    # co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev)
    co_arr_val = co_arr_test

    with tf.Graph().as_default():

        sess = tf.Session()
        with sess.as_default():
            cnn = MultiTask_MultiGranModel(
                max_len1=FLAGS.max_sequence_len,
                max_len2=FLAGS.max_sequence_len2,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                filter_sizes2=list(map(int, FLAGS.filter_sizes2.split(","))),
                pool_sizes=list(map(int, FLAGS.pool_sizes.split(","))),
                pool_sizes2=list(map(int, FLAGS.pool_sizes2.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
                constraint_lambda=FLAGS.con_lambda,
                alpha=FLAGS.alpha,
                type_CNN=FLAGS.type_CNN,
                view_num=FLAGS.view_num,
                view_nums=list(map(int, FLAGS.view_nums.split(",")))
            )

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)

            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            saver = tf.train.Saver(tf.all_variables(), max_to_keep=20)

            # Keep track of gradient values and sparsity (optional)
            for g, v in grads_and_vars:
                if g is not None:
                    tf.summary.histogram("grad_hist/{}".format(v.name), g)
                    tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g))
                    tf.summary.histogram(v.name, v)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "alpha_E", "multitask" + timestamp))
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            print("Writing to {}\n".format(out_dir))

            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            constraint_summary = tf.summary.scalar("constraints", cnn.constraints)
            acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d)
            acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o)

            # Train Summaries
            train_summary_op = tf.summary.merge_all()
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary1, acc_summary2, constraint_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Initialize all variables
            sess.run(tf.initialize_all_variables())

            def train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr):
                feed_dict = {
                    cnn.input_tensor: x_batch,
                    cnn.input_y_description: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.input_tensor_o: x_batch2,
                    cnn.input_y_operation: y_batch2,
                    cnn.matrix: co_arr,
                }
                _, step, summaries, loss, accuracy1, accuracy2 = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                if step % 10 == 0:
                    print(
                        "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format(time_str, step, loss, accuracy1,
                                                                              accuracy2))
                train_summary_writer.add_summary(summaries, step)
                return accuracy1, accuracy2, loss

            def dev_step(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr, writer=None):
                feed_dict = {
                    cnn.input_tensor: x_dev,
                    cnn.input_y_description: y_batch_dev,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.input_y_operation: y_batch_dev2,
                    cnn.input_tensor_o: x_dev2,
                    cnn.matrix: co_arr
                }
                step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o],
                    feed_dict)
                if writer:
                    writer.add_summary(summaries, step)
                return loss, accuracy1, accuracy2

            def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr):
                feed_dict = {
                    cnn.input_tensor: x_dev,
                    cnn.input_y_description: y_batch_dev,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.input_y_operation: y_batch_dev2,
                    cnn.input_tensor_o: x_dev2,
                    cnn.matrix: co_arr
                }

                step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o],
                    feed_dict)

                eval_file = open(out_dir + "/evaluation.txt", "w+")
                right_file = open(out_dir + "/right_cases.txt", "w+")
                right_file2 = open(out_dir + "/right_cases_operation.txt", "w+")
                wrong_file = open(out_dir + "/wrong_cases.txt", "w+")
                wrong_file2 = open(out_dir + "/wrong_cases_operation.txt", "w+")

                eval_file.write("Accu1: " + str(accuracy1) + "\n")
                eval_file.write("Accu2: " + str(accuracy2) + "\n")

                predictions1 = np.argmax(pres1, 1)
                predictions2 = np.argmax(pres2, 1)
                labels1 = np.argmax(y_batch_dev, 1)
                labels2 = np.argmax(y_batch_dev2, 1)
                write_evaluation_file(eval_file, right_file, wrong_file, labels1, predictions1, x1_test, x2_test)
                write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, predictions2, x3_test, x4_test)

                eval_file.write("Parameters:")
                for attr, value in sorted(FLAGS.__flags.items()):
                    eval_file.write("{}={}".format(attr.upper(), value) + "\n")

                return loss, accuracy1, accuracy2

            def dev_whole(x_dev, y_dev, x_dev2, y_dev2, co_arr, writer=None):
                batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev, co_arr)), FLAGS.batch_size, 1, shuffle=False)
                batches_dev2 = inputH.batch_iter(list(zip(x_dev2, y_dev2)), FLAGS.batch_size, 1, shuffle=False)
                losses = []
                accuracies1 = []
                accuracies2 = []

                batches = zip(batches_dev, batches_dev2)

                for batches_dev, batches_dev2 in batches:
                    x_batch, y_batch, co_arr_ = zip(*batches_dev)
                    x_batch2, y_batch2 = zip(*batches_dev2)
                    loss, accuracy1, accuracy2 = dev_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_)
                    losses.append(loss)
                    accuracies1.append(accuracy1)
                    accuracies2.append(accuracy2)

                return np.mean(np.array(losses)), np.mean(np.array(accuracies1)), np.mean(np.array(accuracies2))

            def overfit(dev_loss, accu):
                num = FLAGS.early_stop_num
                n = len(dev_loss)
                if n < num:
                    return False

                for i in xrange(n - num, n - 1):
                    if dev_loss[i] < accu:
                        return False
                print(dev_loss)
                print(accu)
                return True

            # Generate batches
            batches = inputH.batch_iter(list(zip(x_train_tensor, y_train, x_train_tensor_o, y2_train, co_arr_train)),
                                        FLAGS.batch_size, FLAGS.num_epochs)

            # Training loop. For each batch...
            dev_loss = []
            train_loss = []

            train_accu = []
            train_accu2 = []
            dev_accu = []
            dev_accu2 = []
            # batch_d_o = zip(batches, batches2)
            optimum_accu1 = 0
            optimum_accu2 = 0
            data_num = len(y_train)
            num_batches_per_epoch = int(data_num / FLAGS.batch_size)
            # t = num_batches_per_epoch / 2
            optimum_loss = 1000

            for batch in batches:
                x_batch, y_batch, x_batch2, y_batch2, co_arr_batch = zip(*batch)

                acc1, acc2, loss_train = train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_batch)
                train_accu.append(acc1)
                train_accu2.append(acc2)
                train_loss.append(loss_train)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % num_batches_per_epoch == 0:

                    print("\nEvaluation:")
                    loss, accuracy1, accuracy2 = dev_whole(x_dev_tensor, y_dev, x_dev_tensor_o, y2_dev, co_arr_val,
                                                           writer=dev_summary_writer)

                    summary = tf.Summary()

                    summary.value.add(tag="Accuracy_Dev", simple_value=accuracy1)
                    summary.value.add(tag="Accuracy2_Dev", simple_value=accuracy2)
                    summary.value.add(tag="Loss_Dev", simple_value=loss)
                    dev_summary_writer.add_summary(summary, current_step)

                    time_str = datetime.datetime.now().isoformat()
                    print("{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format(time_str, loss, accuracy1, accuracy2))
                    dev_accu.append(accuracy1)
                    dev_accu2.append(accuracy2)
                    dev_loss.append(loss)
                    print("\nRecently accuracy:")
                    print dev_accu[-10:]
                    print dev_accu2[-10:]

                    # if loss < optimum_loss:
                    #     optimum_loss = loss
                    #     stop_early = 0
                    #     optimum_accu1 = accuracy1
                    #     optimum_accu2 = accuracy2
                    #     path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    #     print("Saved model checkpoint to {}\n".format(path))
                    # else:
                    #     stop_early += 1
                    #     if stop_early == 10:
                    #         break
                    if FLAGS.early_stop:
                        if overfit(dev_accu, accuracy1) or overfit(dev_accu2, accuracy2):
                            print 'Overfit!!'
                            print(current_step)
                            print(current_step / num_batches_per_epoch)
                            break
                        print("")

                    if accuracy1 > optimum_accu1 and accuracy2 > optimum_accu2:
                        optimum_accu1 = accuracy1
                        optimum_accu2 = accuracy2
                        path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))

                    print("Optimum_accu1: " + str(optimum_accu1))
                    print("Optimum_accu2: " + str(optimum_accu2))

    print("Optimum_accu1: " + str(optimum_accu1))
    print("Optimum_accu2: " + str(optimum_accu2))

    import matplotlib.pyplot as plt
    # def plot_plots(y1, y2, name_task, type_eval):
    #     x1 = np.arange(len(y1))
    #     x2 = np.arange(len(y2))
    #     p1, = plt.plot(x1, y1, 'b', label="Validation")
    #     p2, = plt.plot(x2, y2, 'r', label="Train")
    #     plt.legend(handles=[p1, p2], numpoints=1)  # make legend
    #     plt.title(name_task + "_" + type_eval)
    #     plt.savefig(os.path.join(out_dir, name_task + "_" + type_eval + ".png"))
    #
    # plot_plots(dev_accu, train_accu, "Disease", "Accu")
    # plot_plots(dev_accu2, train_accu2, "Operation", "Accu")
    # plot_plots(dev_loss, train_loss, "MTL", "Loss")

    # evaluate the result with the best model
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    checkpoint_file = ckpt.model_checkpoint_path
    graph = tf.Graph()

    with graph.as_default():
        sess = tf.Session()
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            sess.run(tf.initialize_all_variables())
            saver.restore(sess, checkpoint_file)

            cooccur = graph.get_operation_by_name("cooccurence").outputs[0]
            input_t1 = graph.get_operation_by_name("input_tensor_description").outputs[0]
            input_t2 = graph.get_operation_by_name("input_tensor_operation").outputs[0]
            input_y1 = graph.get_operation_by_name("input_y_description").outputs[0]
            input_y2 = graph.get_operation_by_name("input_y_operation").outputs[0]
            prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            loss_opr = graph.get_operation_by_name("loss/loss").outputs[0]
            prediction = graph.get_operation_by_name("output/predictions1").outputs[0]
            prediction2 = graph.get_operation_by_name("output/predictions2").outputs[0]
            accu = graph.get_operation_by_name("accuracy/accuracy_d").outputs[0]
            accu2 = graph.get_operation_by_name("accuracy/accuracy_o").outputs[0]

            loss, pres1, pres2, accuracy1, accuracy2 = sess.run(
                [loss_opr, prediction, prediction2, accu, accu2],
                {input_t1: x_test_tensor, input_y1: y_test, cooccur: co_arr_test,
                 input_t2: x_test_tensor_o, input_y2: y2_test, prob: 1})

            eval_file = open(out_dir + "/evaluation.txt", "w+")
            right_file = open(out_dir + "/right_cases.txt", "w+")
            wrong_file = open(out_dir + "/wrong_cases.txt", "w+")
            right_file2 = open(out_dir + "/right_cases_opr.txt", "w+")
            wrong_file2 = open(out_dir + "/wrong_cases_opr.txt", "w+")

            eval_file.write("Accu1: " + str(accuracy1) + "\n")
            eval_file.write("Accu2: " + str(accuracy2) + "\n")
            # eval_file.write("Stopped at: " + str(int(current_step / num_batches_per_epoch)) + "\n")
            eval_file.write("Default: " + str(N_default) + "\n")

            labels1 = np.argmax(y_test, 1)
            labels2 = np.argmax(y2_test, 1)
            write_evaluation_file(eval_file, right_file, wrong_file, labels1, pres1, x1_test, x2_test)
            write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, pres2, x3_test, x4_test)

            eval_file.write("Parameters:")
            for attr, value in sorted(FLAGS.__flags.items()):
                eval_file.write("{}={}".format(attr.upper(), value) + "\n")

            print("loss:" + str(loss))
            print("accuracy1:" + str(accuracy1))
            print("accuracy2:" + str(accuracy2))
    k = plot_activation(sample_idnex, k, row_n, col_n, conv1, "Str")
    k = plot_activation(sample_idnex, k, row_n, col_n, conv2,
                        "Character Embedding")
    k = plot_activation(sample_idnex, k, row_n, col_n, conv3, "Word Embedding")
    plot_activation(sample_idnex, k, row_n, col_n, conv4, "Sentence Embedding")

    # plt.colorbar()
    plt.savefig(dir_ + str(sample_idnex) + "activations_color" + ".jpg")


tf.flags.DEFINE_string("train_dir", "./", "Training dir root")
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

task_num = 1
inpH = InputHelper()
max_document_length = 10
dir_ = "fig/"

model_dir = "./runs/NewExp/Single_task11503543419"
checkpoint_dir = os.path.join(model_dir, "checkpoints")
print(checkpoint_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
checkpoint_file = ckpt.model_checkpoint_path
#
# mydir = "./Length" + str(max_document_length) + "/"
# x_test_tensor = np.load(mydir + "test_des" + ".npy")
x_test_tensor = np.load("./Tensor_files/0823/Length10/test_des.npy")
graph = tf.Graph()

with graph.as_default():
示例#4
0
import tensorflow as tf
import os
from DLDisambiguation.util.input_helpers import InputHelper
from tensor import Tensor
import numpy as np
from util.util import write_evaluation_file

tf.flags.DEFINE_string("train_dir", "./", "Training dir root")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

task_num = 2
inpH = InputHelper()
max_document_length = 20

name = "des" if task_num == 1 else "opr"

# load in model
model_dir = "./runs/Single_task21501595265"
checkpoint_dir = os.path.join(model_dir, "checkpoints")
print(checkpoint_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
checkpoint_file = ckpt.model_checkpoint_path

lstm_dir = "Description1500991322" if task_num == 1 else "Operation1501000120"
lstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir)

# load data
load_Tensor = True
示例#5
0
from DLDisambiguation.util.input_helpers import InputHelper
from DLDisambiguation.util.preprocess import MyVocabularyProcessor
import tensorflow as tf
import os

# tf.flags.DEFINE_string("test_file", "../data/validation_data_0724_opr.txt", "training file (default: None)")
tf.flags.DEFINE_string("test_file", "../data/test_data_0816_des.txt",
                       "training file (default: None)")

FLAGS = tf.flags.FLAGS
inpH = InputHelper()
max_document_length = 10
y_is_value = True

model_dir = "./Exp/runs/Description1502955472"
# model_dir = "./runs/Description1500991322"  # 0.760
# model_dir = "./runs/1500428748" # 0.69
# model_dir = "./runs/Description1500983617"  # 0.767
# model_dir = "./runs/Description1501058401" # 0.754
# model_dir = "./runs/Operation1501000120" # 0.809

checkpoint_dir = os.path.join(model_dir, "checkpoints")
print(checkpoint_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
checkpoint_file = ckpt.model_checkpoint_path

vocab_file = os.path.join(checkpoint_dir, "vocab")
# load vocabulary model
vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0)
vocab_processor = vocab_processor.restore(vocab_file)
示例#6
0
def main():
    # Load data
    print("Loading data...")
    inputH = InputHelper()

    train_f = os.path.join(FLAGS.train_dir,
                           'data/exp0803/training_dynamic_data.txt')
    dev_f = os.path.join(FLAGS.train_dir,
                         'data/exp0803/validation_dynamic_data.txt')
    test_f = os.path.join(FLAGS.train_dir,
                          'data/exp0803/test_dynamic_data.txt')

    our_dir = "./Tensor_files/0803_dynamic/"
    # our_dir = "./Length" + str(FLAGS.max_sequence_len) + "/"
    x_train_tensor = np.load(our_dir + "train_des.npy")
    x_dev_tensor = np.load(our_dir + "dev_des.npy")
    x_test_tensor = np.load(our_dir + "test_des.npy")

    # our_dir = "./Length" + str(FLAGS.max_sequence_len2) + "/"
    x_train_tensor_o = np.load(our_dir + "train_opr.npy")
    x_dev_tensor_o = np.load(our_dir + "dev_opr.npy")
    x_test_tensor_o = np.load(our_dir + "test_opr.npy")

    x_train_indi_o = 1 - np.load(our_dir + "train_indi_opr.npy")
    x_dev_indi_o = 1 - np.load(our_dir + "dev_indi_opr.npy")
    x_test_indi_o = 1 - np.load(our_dir + "test_indi_opr.npy")

    sep = "\t"
    i1, x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels_Dyna(
        train_f, sep, FLAGS.max_sequence_len)
    i2, x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels_Dyna(
        dev_f, sep, FLAGS.max_sequence_len)
    i3, x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels_Dyna(
        test_f, sep, FLAGS.max_sequence_len)

    des_e_names, des_opr_map = load_coocurrence_matrix("result.txt")
    co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test)
    co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train,
                                   x4_train)
    co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev)

    with tf.Graph().as_default():

        sess = tf.Session()
        with sess.as_default():
            cnn = MT_Dynamic_MultiGranModel(
                max_len1=FLAGS.max_sequence_len,
                max_len2=FLAGS.max_sequence_len2,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda,
            )

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)

            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            saver = tf.train.Saver(tf.all_variables(), max_to_keep=20)

            # Keep track of gradient values and sparsity (optional)
            for g, v in grads_and_vars:
                if g is not None:
                    tf.summary.histogram("grad_hist/{}".format(v.name), g)
                    tf.summary.scalar("grad_sparsity/{}".format(v.name),
                                      tf.nn.zero_fraction(g))
                    tf.summary.histogram(v.name, v)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(FLAGS.train_dir, "runs", "multitask" + timestamp))
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            print("Writing to {}\n".format(out_dir))

            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d)
            acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o)

            # Train Summaries
            train_summary_op = tf.summary.merge_all()
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge(
                [loss_summary, acc_summary1, acc_summary2])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Initialize all variables
            sess.run(tf.initialize_all_variables())

            def train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr):
                gamma = [0.5 if i == 1 else 1.0 for i in indi]
                gamma = np.asarray(gamma)

                feed_dict = {
                    cnn.input_tensor: x_batch,
                    cnn.input_y_description: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    cnn.input_tensor_o: x_batch2,
                    cnn.input_y_operation: y_batch2,
                    cnn.mask_opr: np.asarray(indi, dtype=float),
                    cnn.gamma: gamma,
                    cnn.matrix: co_arr
                }
                _, step, summaries, loss, accuracy1, accuracy2 = sess.run([
                    train_op, global_step, train_summary_op, cnn.loss,
                    cnn.accuracy_d, cnn.accuracy_o
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                if step % 10 == 0:
                    print(
                        "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format(
                            time_str, step, loss, accuracy1, accuracy2))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_dev,
                         y_batch_dev,
                         x_dev2,
                         y_batch_dev2,
                         indi,
                         co_arr,
                         writer=None):
                gamma = [0.5 if i == 1 else 1.0 for i in indi]
                gamma = np.asarray(gamma)
                feed_dict = {
                    cnn.input_tensor: x_dev,
                    cnn.input_y_description: y_batch_dev,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.input_y_operation: y_batch_dev2,
                    cnn.input_tensor_o: x_dev2,
                    cnn.mask_opr: np.asarray(indi, dtype=float),
                    cnn.gamma: gamma,
                    cnn.matrix: co_arr
                }
                step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy_d,
                        cnn.accuracy_o, cnn.scores_d, cnn.scores_o
                    ], feed_dict)
                if writer:
                    writer.add_summary(summaries, step)
                return loss, accuracy1, accuracy2

            def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, indi,
                         co_arr):
                gamma = [0.5 if i == 1 else 1.0 for i in indi]
                gamma = np.asarray(gamma)
                feed_dict = {
                    cnn.input_tensor: x_dev,
                    cnn.input_y_description: y_batch_dev,
                    cnn.dropout_keep_prob: 1.0,
                    cnn.input_y_operation: y_batch_dev2,
                    cnn.input_tensor_o: x_dev2,
                    cnn.mask_opr: np.asarray(indi, dtype=float),
                    cnn.gamma: gamma,
                    cnn.matrix: co_arr
                }

                step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy_d,
                        cnn.accuracy_o, cnn.scores_d, cnn.scores_o
                    ], feed_dict)

                eval_file = open(out_dir + "/evaluation.txt", "w+")
                right_file = open(out_dir + "/right_cases.txt", "w+")
                right_file2 = open(out_dir + "/right_cases_operation.txt",
                                   "w+")
                wrong_file = open(out_dir + "/wrong_cases.txt", "w+")
                wrong_file2 = open(out_dir + "/wrong_cases_operation.txt",
                                   "w+")

                eval_file.write("Accu1: " + str(accuracy1) + "\n")
                eval_file.write("Accu2: " + str(accuracy2) + "\n")

                predictions1 = np.argmax(pres1, 1)
                predictions2 = np.argmax(pres2, 1)
                labels1 = np.argmax(y_batch_dev, 1)
                labels2 = np.argmax(y_batch_dev2, 1)

                def process(indi, tensor):
                    tmp = []
                    ll = len(indi)
                    for i in range(ll):
                        if indi[i] == 0:
                            tmp.append(tensor[i])
                    return np.asarray(tmp)

                write_evaluation_file(eval_file, right_file, wrong_file,
                                      labels1, predictions1, x1_test, x2_test)
                write_evaluation_file(eval_file, right_file2, wrong_file2,
                                      labels2, predictions2, x3_test, x4_test,
                                      indi)

                eval_file.write("Parameters:")
                for attr, value in sorted(FLAGS.__flags.items()):
                    eval_file.write("{}={}".format(attr.upper(), value) + "\n")

                return loss, accuracy1, accuracy2

            def dev_whole(x_dev,
                          y_dev,
                          x_dev2,
                          y_dev2,
                          indi,
                          co_dev_arr,
                          writer=None):
                batches_dev = inputH.batch_iter(list(
                    zip(x_dev, y_dev, co_dev_arr)),
                                                FLAGS.batch_size,
                                                1,
                                                shuffle=False)
                batches_dev2 = inputH.batch_iter(list(zip(
                    x_dev2, y_dev2, indi)),
                                                 FLAGS.batch_size,
                                                 1,
                                                 shuffle=False)
                losses = []
                accuracies1 = []
                accuracies2 = []

                batches = zip(batches_dev, batches_dev2)

                for batches_dev, batches_dev2 in batches:
                    x_batch, y_batch, co_arr = zip(*batches_dev)
                    x_batch2, y_batch2, indi = zip(*batches_dev2)
                    loss, accuracy1, accuracy2 = dev_step(
                        x_batch, y_batch, x_batch2, y_batch2, indi, co_arr,
                        writer)
                    losses.append(loss)
                    accuracies1.append(accuracy1)
                    accuracies2.append(accuracy2)
                return np.mean(np.array(losses)), np.mean(
                    np.array(accuracies1)), np.mean(np.array(accuracies2))

            def overfit(dev_loss):
                n = len(dev_loss)
                if n < 5:
                    return False
                for i in xrange(n - 4, n):
                    if dev_loss[i] > dev_loss[i - 1]:
                        return False
                return True

            # Generate batches
            batches = inputH.batch_iter(
                list(
                    zip(x_train_tensor, y_train, x_train_tensor_o, y2_train,
                        x_train_indi_o, co_arr_train)), FLAGS.batch_size,
                FLAGS.num_epochs)

            # Training loop. For each batch...
            dev_loss = []
            dev_loss2 = []
            # batch_d_o = zip(batches, batches2)
            for batch in batches:
                x_batch, y_batch, x_batch2, y_batch2, indi, co_arr = zip(
                    *batch)

                train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr)
                current_step = tf.train.global_step(sess, global_step)

                if current_step % FLAGS.evaluate_every == 0:

                    print("\nEvaluation:")
                    loss, accuracy1, accuracy2 = dev_whole(
                        x_dev_tensor,
                        y_dev,
                        x_dev_tensor_o,
                        y2_dev,
                        x_dev_indi_o,
                        co_arr_val,
                        writer=dev_summary_writer)

                    time_str = datetime.datetime.now().isoformat()
                    print(
                        "{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format(
                            time_str, loss, accuracy1, accuracy2))
                    dev_loss.append(accuracy1)
                    dev_loss2.append(accuracy2)

                    print("\nRecently accuracy:")
                    print dev_loss[-10:]
                    print dev_loss2[-10:]

                    if overfit(dev_loss):
                        print 'Overfit!! in task1'
                        break
                    if overfit(dev_loss2):
                        print 'Overfit!! in task2'
                        break
                    print("")

                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))

            loss, accuracy1, accuracy2 = evaluate(x_test_tensor, y_test,
                                                  x_test_tensor_o, y2_test,
                                                  x_test_indi_o, co_arr_test)
            print(loss)
            print(accuracy1)
            print(accuracy2)
示例#7
0
    def __init__(self):
        # the max length of description/operation segment, padding if shorter, and ignore the pair if longer
        max_document_length = 20
        inpH = InputHelper()
        y_is_value = True  # flag to indicate that y is value(0 / 1) or array[0,1] / [1, 0]
        # train_set, dev_set, vocab_processor, sum_no_of_batches = inpH.getDataSets_File(FLAGS.training_files, "\t",
        #                                                                           max_document_length,
        #                                                                           10,  # 10---percent_dev
        #                                                                           FLAGS.batch_size, y_value=y_is_value)

        # test_x1, test_x2, test_y = inpH.getTestDataSet(FLAGS.test_file, "\t\t", vocab_processor, max_document_length, y_is_value)

        task_num = 2
        d_type = "Description" if task_num == 1 else "Operation"
        embedding_dir = "../data/word2vec/character_model.txt" if task_num == 1 \
            else "../data/operation/character_model.txt"
        name = "des" if task_num == 1 else "opr"

        time_gen = "0823"
        data_file = os.path.join(
            FLAGS.train_dir,
            "data/train_data_" + time_gen + "_" + name + ".txt")
        data_file_test = os.path.join(
            FLAGS.train_dir,
            "data/test_data_" + time_gen + "_" + name + ".txt")
        # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt")

        sep = "\t"
        train_x1, train_x2, train_y = inpH.getTsvTestData(
            data_file, sep, max_document_length, y_is_value)
        test_x1, test_x2, test_y = inpH.getTsvTestData(data_file_test, sep,
                                                       max_document_length,
                                                       y_is_value)
        # dev_x1, dev_x2, dev_y = inpH.getTsvTestData(data_file_val, sep, max_document_length, y_is_value)
        dev_x1, dev_x2, dev_y = test_x1, test_x2, test_y
        sum_no_of_batches = len(train_y) // FLAGS.batch_size

        vocab_processor = MyVocabularyProcessor(max_document_length,
                                                min_frequency=0)
        vocab_processor.fit_transform(
            np.concatenate((train_x1, train_x2, dev_x1, dev_x2)))
        # vocab_processor.fit_transform(np.concatenate((train_x1, train_x2, test_x1, test_x2, dev_x1, dev_x2)))
        print("Length of loaded vocabulary ={}".format(
            len(vocab_processor.vocabulary_)))

        train_set = inpH.get_data(vocab_processor, train_x1, train_x2, train_y,
                                  max_document_length)
        dev_set = inpH.get_data(vocab_processor, dev_x1, dev_x2, dev_y,
                                max_document_length)
        test_set = inpH.get_data(vocab_processor, test_x1, test_x2, test_y,
                                 max_document_length)

        # load in word2vec model
        embedding_matrix = self.getEmbeddingMatrix(embedding_dir,
                                                   vocab_processor)

        sess = tf.Session()
        with sess.as_default():
            siameseModel = SiameseLSTM(sequence_length=max_document_length,
                                       vocab_processer=vocab_processor,
                                       embedding_size=FLAGS.embedding_dim,
                                       hidden_unit_size=FLAGS.hidden_units,
                                       l2_reg_lambda=FLAGS.l2_reg_lambda,
                                       batch_size=FLAGS.batch_size,
                                       embedding_arr=embedding_matrix)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            print("initialized siameseModel object")

            grads_and_vars = optimizer.compute_gradients(siameseModel.loss)
            tr_op_set = optimizer.apply_gradients(grads_and_vars,
                                                  global_step=global_step)
            print("defined training_ops")

            # Keep track of variables, gradient values and sparsity
            for g, v in grads_and_vars:
                if g is not None:
                    tf.summary.histogram("grad_hist/{}".format(v.name), g)
                    tf.summary.histogram("grad_sparsity/{}".format(v.name),
                                         tf.nn.zero_fraction(g))
                    tf.summary.histogram(v.name, v)
            print("defined gradient summaries")

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "Exp" + time_gen, "runs",
                             d_type + timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", siameseModel.loss)
            acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy)

            # Train Summaries
            train_summary_merged = tf.summary.merge_all()
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables(), max_to_keep=100)

            # Write vocabulary
            vocab_processor.save(os.path.join(checkpoint_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.initialize_all_variables())
            print("init all variables")

            graph_def = tf.get_default_graph().as_graph_def()
            graphpb_txt = str(graph_def)
            with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f:
                f.write(graphpb_txt)

            def train_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m,
                           y_batch):

                feed_dict = {
                    siameseModel.input_x1: x1_batch,
                    siameseModel.input_x2: x2_batch,
                    siameseModel.mask_x1: x1_batch_m,
                    siameseModel.mask_x2: x2_batch_m,
                    siameseModel.input_y: y_batch,
                }

                _, step, summaries, loss, accuracy, dist = sess.run([
                    tr_op_set, global_step, train_summary_merged,
                    siameseModel.loss, siameseModel.accuracy,
                    siameseModel.distance
                ], feed_dict)

                time_str = datetime.datetime.now().isoformat()
                d = np.copy(dist)
                d[d >= 0.5] = 999.0
                d[d < 0.5] = 1
                d[d > 1.0] = 0
                accuracy_t = np.mean(y_batch == d)
                print("TRAIN {}: step {}, loss {:g}, acc {:g}, acc_t {:g}".
                      format(time_str, step, loss, accuracy, accuracy_t))
                print(y_batch)
                print(dist)
                print(d)
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch):

                feed_dict = {
                    siameseModel.input_x1: x1_batch,
                    siameseModel.input_x2: x2_batch,
                    siameseModel.mask_x1: x1_batch_m,
                    siameseModel.mask_x2: x2_batch_m,
                    siameseModel.input_y: y_batch,
                }

                step, summaries, loss, accuracy, dist = sess.run([
                    global_step, dev_summary_op, siameseModel.loss,
                    siameseModel.accuracy, siameseModel.distance
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                d = np.copy(dist)
                d[d >= 0.5] = 999.0
                d[d < 0.5] = 1
                d[d > 1.0] = 0
                accuracy_t = np.mean(y_batch == d)
                print(
                    "DEV {}: step {}, loss {:g}, acc {:g}, acc_t {:g}".format(
                        time_str, step, loss, accuracy, accuracy_t))
                print(y_batch)
                print(dist)
                print(d)
                dev_summary_writer.add_summary(summaries, step)
                return accuracy

            def overfit(dev_loss, accu):
                num = 6
                n = len(dev_loss)
                if n < num:
                    return False
                for i in xrange(n - num, n):
                    if dev_loss[i] < accu:
                        return False
                print(dev_loss)
                print(accu)
                return True

            def evaluate(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch,
                         mention, entity):

                feed_dict = {
                    siameseModel.input_x1: x1_batch,
                    siameseModel.input_x2: x2_batch,
                    siameseModel.mask_x1: x1_batch_m,
                    siameseModel.mask_x2: x2_batch_m,
                    siameseModel.input_y: y_batch,
                }

                loss, accuracy, dist = sess.run([
                    siameseModel.loss, siameseModel.accuracy,
                    siameseModel.distance
                ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("Test {}: loss {:g}, acc {:g}".format(
                    time_str, loss, accuracy))
                print(dist)

                eval_file = open(out_dir + "/evaluation.txt", "w+")
                right_file = open(out_dir + "/right_cases.txt", "w+")
                wrong_file = open(out_dir + "/wrong_cases.txt", "w+")

                eval_file.write("Accu: " + str(accuracy) + "\n")
                eval_file.write("Dataset: " + data_file + "\n")
                eval_file.write("Early Stopped at: " + str(stop_p) + "\n")

                d = np.copy(dist)
                d[d >= 0.5] = 999.0
                d[d < 0.5] = 1
                d[d > 1.0] = 0

                predictions = d
                write_evaluation_file(eval_file, right_file, wrong_file,
                                      y_batch, predictions, mention, entity)
                return accuracy

            # Generate batches
            batches = inpH.batch_iter(
                list(
                    zip(train_set[0], train_set[1], train_set[2], train_set[3],
                        train_set[4])), FLAGS.batch_size, FLAGS.num_epochs)

            max_validation_acc = 0.0
            num_batches_per_epoch = int(len(train_set[0]) / FLAGS.batch_size)
            print num_batches_per_epoch
            max_accu = 0
            dev_accu = []

            for nn in xrange(sum_no_of_batches * FLAGS.num_epochs):
                batch = batches.next()
                if len(batch) < 1:
                    continue
                x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch = zip(
                    *batch)
                if len(y_batch) < 1:
                    continue

                train_step(x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch)

                current_step = tf.train.global_step(
                    sess, global_step)  # get the global step.
                sum_acc = 0.0
                tmp = []

                if current_step % num_batches_per_epoch == 0:
                    print("\nEvaluation:")
                    # dev_batches = inpH.batch_iter(list(zip(dev_set[0], dev_set[1], dev_set[2], dev_set[3], dev_set[4])),
                    #                               FLAGS.batch_size, 1)
                    # for db in dev_batches:
                    #     if len(db) < 1:
                    #         continue
                    #     x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b = zip(*db)
                    #     if len(y_dev_b) < 1:
                    #         continue
                    #     acc = dev_step(x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b)
                    #     sum_acc = sum_acc + acc
                    #     tmp.append(acc)
                    #
                    # acc_mean = np.mean(tmp)
                    acc_mean = dev_step(dev_set[0], dev_set[1], dev_set[2],
                                        dev_set[3], dev_set[4])
                    dev_accu.append(acc_mean)

                    if overfit(dev_accu, acc_mean):
                        print 'Overfit!!'
                        print("Optimum" + str(max_accu))
                        print(current_step)
                        stop_p = current_step / num_batches_per_epoch
                        print(stop_p)
                        break

                    if acc_mean >= max_accu:
                        max_accu = acc_mean
                        saver.save(
                            sess, checkpoint_prefix,
                            global_step=current_step)  # save checkpoints
                        tf.train.write_graph(sess.graph.as_graph_def(),
                                             checkpoint_prefix,
                                             "graph" + str(nn) + ".pb",
                                             as_text=False)  # save graph_def
                        print(
                            "Saved model {} with sum_accuracy={} checkpoint to {}\n"
                            .format(nn, max_validation_acc, checkpoint_prefix))

            evaluate(test_set[0], test_set[1], test_set[2], test_set[3],
                     test_set[4], test_x1, test_x2)
import tensorflow as tf
import os
import numpy as np


def writeToFile(m, file):
    l = len(m)
    for i in range(l):
        l_col = len(m[i])
        file.write("[")
        for j in range(l_col):
            file.write(str(m[i][j]) + " ")
        file.write("]\n")
    file.write("\n")

inpH = InputHelper()
max_document_length = 10
y_is_value = True


def get_data(vocab_processor, inpH, train_x1, train_x2, train_y, max_document_length):
    train_x1_i = np.asarray(list(vocab_processor.transform(train_x1)))
    train_x2_i = np.asarray(list(vocab_processor.transform(train_x2)))

    mask_train_x1 = np.zeros([len(train_x1_i), max_document_length])
    mask_train_x2 = np.zeros([len(train_x2_i), max_document_length])

    new_mask_x1, new_mask_x2 = inpH.padding_and_generate_mask(train_x1, train_x2, mask_train_x1, mask_train_x2)
    return (train_x1_i, train_x2_i, new_mask_x1, new_mask_x2, train_y)