Exemplo n.º 1
0
    for connection in connections:
        conn.append(
            connection.replace('nc', '').strip(' ').replace(':',
                                                            ' ').split(' '))
else:
    conn = ''
if ctf_challs["files"]:
    files = ctf_challs["files.split(',')"]
else:
    files = [""]
if ctf_challs["category"]:
    category = ctf_challs["category"]
else:
    category = ''
if ctf_challs["points"]:
    points = ctf_challs["points"]
else:
    points = ''
if ctf_challs["links"]:
    links = helper.sanitise(ctf_challs["links"])
else:
    links = ''
if ctf_challs["tags"]:
    tags = helper.sanitise(ctf_challs["tags"])
else:
    tags = ''

Readme = helper.genWriteup(name, conn, files, desc, category, points, links,
                           tags)
helper.prepare(name, Readme, category, files, conn)
Exemplo n.º 2
0
def dev_point_wise():
    if FLAGS.data == 'TREC' or FLAGS.data == 'sst2':
        train, dev, test = load_trec_sst2(FLAGS.data)
    else:
        train, dev = load(FLAGS.data)
    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    print(q_max_sent_length)
    print(len(train))
    print('train question unique:{}'.format(len(train['question'].unique())))
    print('train length', len(train))
    print('dev length', len(dev))
    if FLAGS.data == 'TREC' or FLAGS.data == 'sst2':
        alphabet, embeddings = prepare([train, dev, test],
                                       max_sent_length=q_max_sent_length,
                                       dim=FLAGS.embedding_dim,
                                       is_embedding_needed=True,
                                       fresh=True)
    else:
        alphabet, embeddings = prepare([train, dev],
                                       max_sent_length=q_max_sent_length,
                                       dim=FLAGS.embedding_dim,
                                       is_embedding_needed=True,
                                       fresh=True)
    print('alphabet:', len(alphabet))
    with tf.Graph().as_default():
        with tf.device("/gpu:0"):
            session_conf = tf.ConfigProto()
            session_conf.allow_soft_placement = FLAGS.allow_soft_placement
            session_conf.log_device_placement = FLAGS.log_device_placement
            session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        now = int(time.time())
        timeArray = time.localtime(now)
        timeStamp1 = time.strftime("%Y%m%d%H%M%S", timeArray)
        timeDay = time.strftime("%Y%m%d", timeArray)
        print(timeStamp1)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            cnn = CNN(max_input_left=q_max_sent_length,
                      vocab_size=len(alphabet),
                      embeddings=embeddings,
                      embedding_size=FLAGS.embedding_dim,
                      batch_size=FLAGS.batch_size,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda,
                      is_Embedding_Needed=True,
                      trainable=FLAGS.trainable,
                      dataset=FLAGS.data,
                      extend_feature_dim=FLAGS.extend_feature_dim)
            cnn.build_graph()
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            sess.run(tf.global_variables_initializer())
            acc_max = 0.0000
            for i in range(FLAGS.num_epochs):
                datas = batch_gen_with_point_wise(train,
                                                  alphabet,
                                                  FLAGS.batch_size,
                                                  q_len=q_max_sent_length)
                for data in datas:
                    feed_dict = {
                        cnn.question: data[0],
                        cnn.input_y: data[1],
                        cnn.q_position: data[2],
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, loss, accuracy = sess.run(
                        [train_op, global_step, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}  ".format(
                        time_str, step, loss, accuracy))
                predicted = predict(sess, cnn, train, alphabet,
                                    FLAGS.batch_size, q_max_sent_length)
                predicted_label = np.argmax(predicted, 1)
                acc_train = accuracy_score(predicted_label, train['flag'])
                predicted_dev = predict(sess, cnn, dev, alphabet,
                                        FLAGS.batch_size, q_max_sent_length)
                predicted_label = np.argmax(predicted_dev, 1)
                acc_dev = accuracy_score(predicted_label, dev['flag'])
                if acc_dev > acc_max:
                    tf.train.Saver().save(sess,
                                          "model_save/model",
                                          write_meta_graph=True)
                    acc_max = acc_dev
                print("{}:train epoch:acc {}".format(i, acc_train))
                print("{}:dev epoch:acc {}".format(i, acc_dev))
                line2 = " {}:epoch: acc{}".format(i, acc_dev)
                log.write(line2 + '\n')
                log.flush()
            acc_flod.append(acc_max)
            log.close()
Exemplo n.º 3
0
logger = logging.getLogger(program)
#%%  获得配置
FLAGS = config.flags.FLAGS
opts = FLAGS.flag_values_dict()
for item in opts:
    logger.info('{} : {}'.format(item, opts[item]))
#%%
############## 数据载入 #################
logger.info('load data ...........')
train, dev, test = helper.load(opts['data_dir'])

max_sent_length = max(map(lambda x: len(x), train['question'].str.split()))
max_sent_length = 33
#%%
############## 数据预处理 ###############
alphabet, embeddings, = helper.prepare([train, test, dev],
                                       dim=opts['embedding_dim'])

#%%
############## 模型 #################

opts['embeddings'] = embeddings
opts['max_input_sentence'] = max_sent_length
opts['vocab_size'] = len(alphabet)


def embedding_look_up(data, embedding):
    new_data = []
    for _, d in enumerate(data):
        emb = [embedding[i] for i in d]
        new_data.append(emb)
    return np.array(new_data)
Exemplo n.º 4
0

if __name__ == '__main__':
    if FLAGS.data == 'TREC' or FLAGS.data == 'sst2':
        for attr, value in sorted(FLAGS.__flags.items()):
            print(("{}={}".format(attr.upper(), value)))
        dev_point_wise()
        ckpt = tf.train.get_checkpoint_state("model_save" + '/')
        saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path +
                                           '.meta')
        train, dev, test = load_trec_sst2(FLAGS.data)
        q_max_sent_length = max(
            map(lambda x: len(x), train['question'].str.split()))
        alphabet, embeddings = prepare([train, test, dev],
                                       max_sent_length=q_max_sent_length,
                                       dim=FLAGS.embedding_dim,
                                       is_embedding_needed=True,
                                       fresh=True)
        with tf.Session() as sess:
            saver.restore(sess, ckpt.model_checkpoint_path)
            graph = tf.get_default_graph()
            scores = []
            question = graph.get_operation_by_name('input_question').outputs[0]
            q_position = graph.get_operation_by_name('q_position').outputs[0]
            dropout_keep_prob = graph.get_operation_by_name(
                'dropout_keep_prob').outputs[0]
            for data in batch_gen_with_single(test, alphabet, FLAGS.batch_size,
                                              q_max_sent_length):
                feed_dict = {
                    question.name: data[0],
                    q_position.name: data[1],
Exemplo n.º 5
0
def test_pair_wise(dns=FLAGS.dns):
    train, test, dev = load(FLAGS.data, filter=FLAGS.clean)
    # train = train[:10000]
    # test = test[:10000]
    # dev = dev[:10000]
    # submit = submit[:1000]
    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split()))
    print 'q_question_length:{} a_question_length:{}'.format(
        q_max_sent_length, a_max_sent_length)
    print 'train question unique:{}'.format(len(train['question'].unique()))
    print 'train length', len(train)
    print 'test length', len(test)
    print 'dev length', len(dev)
    alphabet, embeddings = prepare([train, test, dev],
                                   dim=FLAGS.embedding_dim,
                                   is_embedding_needed=True,
                                   fresh=FLAGS.fresh)
    # alphabet,embeddings = prepare_300([train,test,dev])
    print 'alphabet:', len(alphabet)
    with tf.Graph().as_default(), tf.device("/gpu:" + str(FLAGS.gpu)):
        # with tf.device("/cpu:0"):
        session_conf = tf.ConfigProto()
        session_conf.allow_soft_placement = FLAGS.allow_soft_placement
        session_conf.log_device_placement = FLAGS.log_device_placement
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            folder = 'runs/' + timeDay + '/' + timeStamp + '/'
            out_dir = folder + FLAGS.data
            if not os.path.exists(folder):
                os.makedirs(folder)
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            print "start build model"
            cnn = QA_RNN_extend(max_input_left=q_max_sent_length,
                                max_input_right=a_max_sent_length,
                                batch_size=FLAGS.batch_size,
                                vocab_size=len(alphabet),
                                embedding_size=FLAGS.embedding_dim,
                                filter_sizes=list(
                                    map(int, FLAGS.filter_sizes.split(","))),
                                num_filters=FLAGS.num_filters,
                                dropout_keep_prob=FLAGS.dropout_keep_prob,
                                embeddings=embeddings,
                                l2_reg_lambda=FLAGS.l2_reg_lambda,
                                overlap_needed=FLAGS.overlap_needed,
                                learning_rate=FLAGS.learning_rate,
                                trainable=FLAGS.trainable,
                                extend_feature_dim=FLAGS.extend_feature_dim,
                                pooling=FLAGS.pooling,
                                position_needed=FLAGS.position_needed,
                                conv=FLAGS.conv)
            cnn.build_graph()

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            train_writer = tf.summary.FileWriter(log_dir + '/train',
                                                 sess.graph)
            test_writer = tf.summary.FileWriter(log_dir + '/test')
            # Initialize all variables
            print "build over"
            sess.run(tf.global_variables_initializer())
            print "variables_initializer"

            map_max = 0.65
            for i in range(FLAGS.num_epochs):
                if FLAGS.dns == True:
                    samples = dns_sample(train,
                                         alphabet,
                                         q_max_sent_length,
                                         a_max_sent_length,
                                         sess,
                                         cnn,
                                         FLAGS.batch_size,
                                         neg_sample_num=10)
                    datas = batch_gen_with_pair_dns(samples, FLAGS.batch_size)
                    print 'load dns datas'
                    for data in datas:
                        feed_dict = {
                            cnn.question: data[0],
                            cnn.answer: data[1],
                            cnn.answer_negative: data[2]
                        }
                        _, step, loss, accuracy, score12, score13 = sess.run([
                            cnn.train_op, cnn.global_step, cnn.loss,
                            cnn.accuracy, cnn.score12, cnn.score13
                        ], feed_dict)
                        time_str = datetime.datetime.now().isoformat()
                        print(
                            "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}"
                            .format(time_str, step, loss, accuracy,
                                    np.mean(score12), np.mean(score13)))
                        line = "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}".format(
                            time_str, step, loss, accuracy, np.mean(score12),
                            np.mean(score13))
                else:
                    d = get_overlap_dict(train,
                                         alphabet,
                                         q_len=q_max_sent_length,
                                         a_len=a_max_sent_length)
                    datas = batch_gen_with_pair_overlap(
                        train,
                        alphabet,
                        FLAGS.batch_size,
                        q_len=q_max_sent_length,
                        a_len=a_max_sent_length,
                        fresh=FLAGS.fresh,
                        overlap_dict=d)
                    print "load data"
                    for data in datas:
                        feed_dict = {
                            cnn.question: data[0],
                            cnn.answer: data[1],
                            cnn.answer_negative: data[2],
                            cnn.q_pos_overlap: data[3],
                            cnn.q_neg_overlap: data[4],
                            cnn.a_pos_overlap: data[5],
                            cnn.a_neg_overlap: data[6],
                            cnn.q_position: data[7],
                            cnn.a_pos_position: data[8],
                            cnn.a_neg_position: data[9]
                        }
                        _, summary, step, loss, accuracy, score12, score13 = sess.run(
                            [
                                cnn.train_op, cnn.merged, cnn.global_step,
                                cnn.loss, cnn.accuracy, cnn.score12,
                                cnn.score13
                            ], feed_dict)
                        train_writer.add_summary(summary, i)
                        time_str = datetime.datetime.now().isoformat()
                        print(
                            "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}"
                            .format(time_str, step, loss, accuracy,
                                    np.mean(score12), np.mean(score13)))
                        line = "{}: step {}, loss {:g}, acc {:g} ,positive {:g},negative {:g}".format(
                            time_str, step, loss, accuracy, np.mean(score12),
                            np.mean(score13))
                        # print loss
                if i % 1 == 0:
                    predicted_dev = predict(sess, cnn, dev, alphabet,
                                            FLAGS.batch_size,
                                            q_max_sent_length,
                                            a_max_sent_length)
                    map_mrr_dev = evaluation.evaluationBypandas(
                        dev, predicted_dev)
                    predicted_test = predict(sess, cnn, test, alphabet,
                                             FLAGS.batch_size,
                                             q_max_sent_length,
                                             a_max_sent_length)
                    map_mrr_test = evaluation.evaluationBypandas(
                        test, predicted_test)

                    print "{}:epoch:dev map mrr {}".format(i, map_mrr_dev)
                    print "{}:epoch:test map mrr {}".format(i, map_mrr_test)
                    line = " {}:epoch: map_dev{}-------map_mrr_test{}".format(
                        i, map_mrr_dev[0], map_mrr_test)
                    if map_mrr_dev[0] > map_max:
                        map_max = map_mrr_dev[0]
                        # timeStamp = time.strftime("%Y%m%d%H%M%S", time.localtime(int(time.time())))

                        save_path = saver.save(sess, out_dir)
                        print "Model saved in file: ", save_path

                log.write(line + '\n')
                log.flush()
            print 'train over'
            saver.restore(sess, out_dir)
            predicted = predict(sess, cnn, train, alphabet, FLAGS.batch_size,
                                q_max_sent_length, a_max_sent_length)
            train['predicted'] = predicted
            train['predicted'].to_csv('train.QApair.TJU_IR_QA2017_train.score',
                                      index=False,
                                      sep='\t')
            map_mrr_train = evaluation.evaluationBypandas(train, predicted)

            predicted_dev = predict(sess, cnn, dev, alphabet, FLAGS.batch_size,
                                    q_max_sent_length, a_max_sent_length)
            dev['predicted'] = predicted_dev
            dev['predicted'].to_csv('train.QApair.TJU_IR_QA2017_dev.score',
                                    index=False,
                                    sep='\t')
            map_mrr_dev = evaluation.evaluationBypandas(dev, predicted_dev)

            predicted_test = predict(sess, cnn, test, alphabet,
                                     FLAGS.batch_size, q_max_sent_length,
                                     a_max_sent_length)

            test['predicted'] = predicted_test
            test['predicted'].to_csv('train.QApair.TJU_IR_QA2017.score',
                                     index=False,
                                     sep='\t')
            map_mrr_test = evaluation.evaluationBypandas(test, predicted_test)

            print 'map_mrr train', map_mrr_train
            print 'map_mrr dev', map_mrr_dev
            print 'map_mrr test', map_mrr_test
            log.write(str(map_mrr_train) + '\n')
            log.write(str(map_mrr_test) + '\n')
            log.write(str(map_mrr_dev) + '\n')
            predict(sess, cnn, train[:100], alphabet, 20, q_max_sent_length,
                    a_max_sent_length)
def test_point_wise():
    train, dev, test = load(FLAGS.data, filter=FLAGS.clean)  # wiki
    # train, test, dev = load(FLAGS.data, filter=FLAGS.clean) #trec
    q_max_sent_length = max(map(lambda x: len(x),
                                test['question'].str.split()))
    a_max_sent_length = 2
    print(q_max_sent_length)
    print(len(train))
    print('train question unique:{}'.format(len(train['question'].unique())))
    print('train length', len(train))
    print('test length', len(test))
    print('dev length', len(dev))

    alphabet, embeddings = prepare([train, test, dev],
                                   max_sent_length=q_max_sent_length,
                                   dim=FLAGS.embedding_dim,
                                   is_embedding_needed=True,
                                   fresh=True)
    print('alphabet:', len(alphabet))
    with tf.Graph().as_default():
        with tf.device("/gpu:0"):
            # session_conf = tf.ConfigProto(
            #     allow_soft_placement=FLAGS.allow_soft_placement,
            #     log_device_placement=FLAGS.log_device_placement)

            session_conf = tf.ConfigProto()
            session_conf.allow_soft_placement = FLAGS.allow_soft_placement
            session_conf.log_device_placement = FLAGS.log_device_placement
            session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        now = int(time.time())
        timeArray = time.localtime(now)
        timeStamp1 = time.strftime("%Y%m%d%H%M%S", timeArray)
        timeDay = time.strftime("%Y%m%d", timeArray)
        print(timeStamp1)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            cnn = BiLSTM(max_input_left=q_max_sent_length,
                         vocab_size=len(alphabet),
                         embeddings=embeddings,
                         embedding_size=FLAGS.embedding_dim,
                         batch_size=FLAGS.batch_size,
                         filter_sizes=list(
                             map(int, FLAGS.filter_sizes.split(","))),
                         num_filters=FLAGS.num_filters,
                         l2_reg_lambda=FLAGS.l2_reg_lambda,
                         is_Embedding_Needed=True,
                         trainable=FLAGS.trainable,
                         overlap_needed=FLAGS.overlap_needed,
                         position_needed=FLAGS.position_needed,
                         pooling=FLAGS.pooling,
                         hidden_num=FLAGS.hidden_num,
                         extend_feature_dim=FLAGS.extend_feature_dim)
            cnn.build_graph()
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            sess.run(tf.global_variables_initializer())
            map_max = 0.65
            for i in range(FLAGS.num_epochs):
                datas = batch_gen_with_point_wise(train,
                                                  alphabet,
                                                  FLAGS.batch_size,
                                                  q_len=q_max_sent_length,
                                                  a_len=a_max_sent_length)
                for data in datas:
                    feed_dict = {
                        cnn.question: data[0],
                        cnn.input_y: data[1],
                        cnn.q_position: data[2],
                        cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, loss, accuracy = sess.run(
                        [train_op, global_step, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}  ".format(
                        time_str, step, loss, accuracy))
                now = int(time.time())
                timeArray = time.localtime(now)
                timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray)
                timeDay = time.strftime("%Y%m%d", timeArray)
                print(timeStamp1)
                print(timeStamp)
                predicted = predict(sess, cnn, train, alphabet,
                                    FLAGS.batch_size, q_max_sent_length,
                                    a_max_sent_length)
                predicted_label = np.argmax(predicted, 1)
                map_mrr_train = evaluation.evaluationBypandas_f1_acc(
                    train, predicted[:, -1], predicted_label)
                predicted_test = predict(sess, cnn, test, alphabet,
                                         FLAGS.batch_size, q_max_sent_length,
                                         a_max_sent_length)
                predicted_label = np.argmax(predicted_test, 1)
                map_mrr_test = evaluation.evaluationBypandas_f1_acc(
                    test, predicted_test[:, -1], predicted_label)
                if map_mrr_test[0] > map_max:
                    map_max = map_mrr_test[0]
                    timeStamp = time.strftime("%Y%m%d%H%M%S",
                                              time.localtime(int(time.time())))
                    folder = 'runs/' + timeDay
                    out_dir = folder + '/' + timeStamp + \
                        '__' + FLAGS.data + str(map_mrr_test[0])
                    if not os.path.exists(folder):
                        os.makedirs(folder)
                    #save_path = saver.save(sess, out_dir)
                print("{}:train epoch:map mrr {}".format(i, map_mrr_train))
                print("{}:test epoch:map mrr {}".format(i, map_mrr_test))
                line2 = " {}:epoch: map_test{}".format(i, map_mrr_test)
                log.write(line2 + '\n')
                log.flush()
            log.close()
logger = logging.getLogger(program)

# 获得配置
FLAGS = config.flags.FLAGS
# FLAGS._parse_flags()
opts = FLAGS.flag_values_dict()
for item in opts:
    logger.info('{} : {}'.format(item, opts[item]))
############## 数据载入 #################
logger.info('load data ...........')
train,dev,test = helper.load(opts['data_dir'])

max_sent_length = max(map(lambda x: len(x), train['question'].str.split()))
max_sent_length = 33
############## 数据预处理 ###############
alphabet, embeddings,embeddings_complex = helper.prepare(
        [train, test, dev],dim=FLAGS.embedding_dim)
############## 模型预测 #################

opts['embeddings'] = embeddings
opts['max_input_sentence'] = max_sent_length
opts['vocab_size'] = len(alphabet)

with tf.Graph().as_default():
    model = Fasttext(opts)
    model._model_stats()

    for i in range(opts['num_epochs']):
        data_gen = helper.batch_iter(train,opts['batch_size'],alphabet,True,sen_len=max_sent_length)
        model._train(data_gen,i)

        # 测试代码
Exemplo n.º 8
0
def test_point_wise():
    train, test, dev = load(FLAGS.data, filter=FLAGS.clean)
    train = train.fillna('')
    test = test.fillna('')
    dev = dev.fillna('')
    # submit = submit.fillna('')
    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split()))
    # train = train[:1000]
    # test = test[:1000]
    # dev = dev[:1000]
    # submit = dev[:100]
    print 'train question unique:{}'.format(len(train['question'].unique()))
    print 'train length', len(train)
    print 'test length', len(test)
    print 'dev length', len(dev)

    alphabet, embeddings = prepare([train, test, dev],
                                   dim=FLAGS.embedding_dim,
                                   is_embedding_needed=True,
                                   fresh=True)
    print 'alphabet:', len(alphabet)
    with tf.Graph().as_default():
        with tf.device("/gpu:0"):
            # session_conf = tf.ConfigProto(
            #     allow_soft_placement=FLAGS.allow_soft_placement,
            #     log_device_placement=FLAGS.log_device_placement)

            session_conf = tf.ConfigProto()
            session_conf.allow_soft_placement = FLAGS.allow_soft_placement
            session_conf.log_device_placement = FLAGS.log_device_placement
            session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default(), open(precision, "w") as log:
            log.write(str(FLAGS.__flags) + '\n')
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            cnn = QA(max_input_left=q_max_sent_length,
                     max_input_right=a_max_sent_length,
                     vocab_size=len(alphabet),
                     embedding_size=FLAGS.embedding_dim,
                     batch_size=FLAGS.batch_size,
                     embeddings=embeddings,
                     dropout_keep_prob=FLAGS.dropout_keep_prob,
                     filter_sizes=list(map(int,
                                           FLAGS.filter_sizes.split(","))),
                     num_filters=FLAGS.num_filters,
                     l2_reg_lambda=FLAGS.l2_reg_lambda,
                     is_Embedding_Needed=True,
                     trainable=FLAGS.trainable,
                     overlap_needed=FLAGS.overlap_needed,
                     position_needed=FLAGS.position_needed,
                     pooling=FLAGS.pooling,
                     extend_feature_dim=FLAGS.extend_feature_dim)
            cnn.build_graph()
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            starter_learning_rate = 0.001
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step, 100, 0.96)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # seq_process(train, alphabet)
            # seq_process(test, alphabet)
            map_max = 0.65
            for i in range(30):
                d = get_overlap_dict(train,
                                     alphabet,
                                     q_len=q_max_sent_length,
                                     a_len=a_max_sent_length)
                datas = batch_gen_with_point_wise(train,
                                                  alphabet,
                                                  FLAGS.batch_size,
                                                  overlap_dict=d,
                                                  q_len=q_max_sent_length,
                                                  a_len=a_max_sent_length)
                for data in datas:
                    feed_dict = {
                        cnn.question: data[0],
                        cnn.answer: data[1],
                        cnn.input_y: data[2],
                        cnn.q_overlap: data[3],
                        cnn.a_overlap: data[4],
                        cnn.q_position: data[5],
                        cnn.a_position: data[6]
                    }
                    _, step, loss, accuracy, pred, scores, see = sess.run([
                        train_op, global_step, cnn.loss, cnn.accuracy,
                        cnn.predictions, cnn.scores, cnn.see
                    ], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}  ".format(
                        time_str, step, loss, accuracy))

                    # print loss

                # predicted = predict(sess,cnn,train,alphabet,FLAGS.batch_size,q_max_sent_length,a_max_sent_length)
                # map_mrr_train = evaluation.evaluationBypandas(train,predicted[:,-1])
                predicted = predict(sess, cnn, dev, alphabet, FLAGS.batch_size,
                                    q_max_sent_length, a_max_sent_length)
                map_mrr_dev = evaluation.evaluationBypandas(
                    dev, predicted[:, -1])
                predicted_test = predict(sess, cnn, test, alphabet,
                                         FLAGS.batch_size, q_max_sent_length,
                                         a_max_sent_length)
                map_mrr_test = evaluation.evaluationBypandas(
                    test, predicted_test[:, -1])
                if map_mrr_dev[0] > map_max:
                    map_max = map_mrr_dev[0]
                    timeStamp = time.strftime("%Y%m%d%H%M%S",
                                              time.localtime(int(time.time())))
                    folder = 'runs/' + timeDay
                    out_dir = folder + '/' + timeStamp + '__' + FLAGS.data + str(
                        map_mrr_dev[0])
                    if not os.path.exists(folder):
                        os.makedirs(folder)
                    save_path = saver.save(sess, out_dir)
                    print "Model saved in file: ", save_path
                # predicted = predict(sess,cnn,dev,alphabet,FLAGS.batch_size,q_max_sent_length,a_max_sent_length)
                # map_mrr_dev = evaluation.evaluationBypandas(dev,predicted[:,-1])
                # map_mrr_train = evaluation.evaluationBypandas(train,predicted_train[:,-1])
                # print evaluation.evaluationBypandas(train,predicted_train[:,-1])
                # print "{}:train epoch:map mrr {}".format(i,map_mrr_train)
                print "{}:dev epoch:map mrr {}".format(i, map_mrr_dev)
                print "{}:test epoch:map mrr {}".format(i, map_mrr_test)
                # line = " {}:epoch: map_train{}----map_test{}----map_dev{}".format(i,map_mrr_train[0],map_mrr_test[0],map_mrr_dev[0])
                line = " {}:epoch: map_dev{}----map_test{}".format(
                    i, map_mrr_dev[0], map_mrr_test[0])
                log.write(line + '\n')
                log.flush()
            log.close()