with open(word_voc_path, 'rb') as infile:
     word_voc = pickle.load(infile)
 bert_encoder = BertEncoder(model_root=bert_model_path,
                            bert_config_file=bert_config_file,
                            init_checkpoint=bert_checkpoint_path,
                            vocab_file=bert_voc_path,
                            max_sequence_len=max_sequence_len,
                            embedding_batch=3,
                            embedding_matrix_path=None,
                            sen2id_path=sen2id_path,
                            vec_dim=768)
 instances_judge_dev = make_instances(judge_samples,
                                      char_voc,
                                      word_voc,
                                      sentiment_words_path,
                                      ner_dict_path=ner_dict_path,
                                      pos_dict_path=pos_dict_path,
                                      use_extra_feature=use_extra_feature,
                                      question2targets=question2targets,
                                      is_training=False,
                                      need_augment=False)
 # instances_judge_template = make_instances(
 #     judge_template,
 #     char_voc,
 #     word_voc,
 #     sentiment_words_path,
 #     ner_dict_path=ner_dict_path,
 #     pos_dict_path=pos_dict_path,
 #     use_extra_feature=use_extra_feature,
 #     question2targets=question2targets,
 #     is_training=False,
 #     need_augment=False)
def test():
    with open('config_judge.json', encoding='utf-8') as infile:
        config = json.load(infile)

    int2bool = {1: True, 0: False}

    sentiment_words_path = config["train_config"]["SENTIMENT_WORDS_PATH"]
    batch_size = config["train_config"]["BATCH_SIZE"]
    is_loop = int2bool[config["train_config"]["Is_LOOP"]]
    is_sort = int2bool[config["train_config"]["IS_SORT"]]
    dropout_rate = config["train_config"]["DROPOUT_RATE"]
    nb_classes = config["train_config"]["NB_CLASSES"]
    attention_dim = config["train_config"]["ATTENTION_DIM"]
    nb_hops = config["train_config"]["NB_HOPS"]
    drop_template_path = config["train_config"]["DROP_JUDGE_TEMPLATE_PATH"]
    use_bert = int2bool[config["train_config"]["USE_BERT"]]
    optimizer = config["train_config"]["OPTIMIZER"]
    learning_rate = config["train_config"]["LEARNING_RATE"]
    grad_clipper = config["train_config"]["GRAD_CLIPPER"]
    drop_judge_dev_path = config["train_config"]["DROP_JUDGE_DEV_PATH"]
    best_path = config["train_config"]["BEST_PATH"]
    question2targets_path = config["train_config"]["QUESTION2TARGETS_PATH"]
    use_extra_feature = config["train_config"]["USE_EXTRA_FEATURE"]
    ner_dict_size = config["train_config"]["NER_DICT_SIZE"]
    pos_dict_size = config["train_config"]["POS_DICT_SIZE"]
    extra_feature_dim = config["train_config"]["EXTRA_FEATURE_DIM"]
    ner_dict_path = config["train_config"]["NER_DICT_PATH"]
    pos_dict_path = config["train_config"]["POS_DICT_PATH"]
    rnn_dim = config["train_config"]["RNN_DIM"]
    lambda_l2 = config["train_config"]["LAMBDA_L2"]
    ans_max_len = config["train_config"]["ANS_MAX_LEN"]
    que_max_len = config["train_config"]["QUE_MAX_LEN"]
    sentiment_polarity_multiple = config["train_config"]["POLARITY_MULTIPLE"]
    use_w2v = True
    if use_bert:
        use_w2v = False

    char_voc_path = config["w2v_config"]["CHAR_VOC_PATH"]
    char_embedding_matrix_path = config["w2v_config"][
        "CHAR_EMBEDDING_MATRIX_PATH"]
    word_voc_path = config["w2v_config"]["WORD_VOC_PATH"]
    word_embedding_matrix_path = config["w2v_config"][
        "WORD_EMBEDDING_MATRIX_PATH"]

    bert_model_path = config["bert_config"]["BERT_MODEL_PATH"]
    bert_config_file = config["bert_config"]["CONFIG_FILE"]
    bert_checkpoint_path = config["bert_config"]["INIT_CHECKPOINT"]
    bert_voc_path = config["bert_config"]["VOC_FILE"]
    sen2id_path = config["bert_config"]["SEN2ID_PATH"]

    judge_samples, _, _ = read_file(drop_judge_dev_path)
    judge_template, _, _ = read_file(drop_template_path)
    max_sequence_len = max(
        max([len(sample['question']) for sample in judge_samples]),
        max([len(sample['answer']) for sample in judge_samples]))
    with open(char_voc_path, 'rb') as infile:
        char_voc = pickle.load(infile)
    with open(word_voc_path, 'rb') as infile:
        word_voc = pickle.load(infile)

    bert_encoder = BertEncoder(model_root=bert_model_path,
                               bert_config_file=bert_config_file,
                               init_checkpoint=bert_checkpoint_path,
                               vocab_file=bert_voc_path,
                               max_sequence_len=max_sequence_len,
                               embedding_batch=3,
                               embedding_matrix_path=None,
                               sen2id_path=sen2id_path,
                               vec_dim=768)

    instances_judge_dev = make_instances(judge_samples,
                                         char_voc,
                                         word_voc,
                                         sentiment_words_path,
                                         ner_dict_path=ner_dict_path,
                                         pos_dict_path=pos_dict_path,
                                         use_extra_feature=use_extra_feature,
                                         question2targets=question2targets,
                                         is_training=False,
                                         need_augment=False)

    instances_judge_dev_with_match_result(instances_judge_dev)

    data_stream_judge_dev = DataStream(instances=instances_judge_dev,
                                       is_shuffle=False,
                                       is_loop=is_loop,
                                       batch_size=batch_size,
                                       ans_max_len=ans_max_len,
                                       que_max_len=que_max_len,
                                       use_bert=use_bert,
                                       bert_encoder=bert_encoder,
                                       is_sort=is_sort)
    with tf.Graph().as_default():
        with tf.variable_scope("Model",
                               reuse=False,
                               initializer=tf.glorot_uniform_initializer()):
            answer_understander_dev = AnswerUnderstander(
                use_bert=use_bert,
                use_w2v=use_w2v,
                rnn_unit='lstm',
                dropout_rate=dropout_rate,
                optimizer=optimizer,
                learning_rate=learning_rate,
                grad_clipper=grad_clipper,
                global_step=None,
                attention_dim=attention_dim,
                nb_hops=nb_hops,
                rnn_dim=rnn_dim,
                lambda_l2=lambda_l2,
                is_training=False,
                sentiment_polarity_multiple=sentiment_polarity_multiple,
                nb_classes=nb_classes,
                use_extra_feature=use_extra_feature,
                ner_dict_size=ner_dict_size,
                pos_dict_size=pos_dict_size,
                extra_feature_dim=extra_feature_dim,
                ans_max_len=ans_max_len,
                que_max_len=que_max_len,
                char_w2v_embedding_matrix_path=char_embedding_matrix_path,
                word_w2v_embedding_matrix_path=word_embedding_matrix_path)
        saver = tf.train.Saver()
        sess = tf.Session()
        initializer = tf.global_variables_initializer()
        sess.run(initializer)
        saver.restore(sess, best_path)
        judge_acc = evaluation(sess, answer_understander_dev,
                               data_stream_judge_dev,
                               'result_{}.txt'.format(loop_index))
        print("the final judge accuracy:{}".format(judge_acc))
        return judge_acc
Exemplo n.º 3
0
    # ins_template = make_instances(samples_template,
    #                               w2v_encoder.char_voc,
    #                               w2v_encoder.word_voc,
    #                               sentiment_words_path,
    #                               ner_dict_path=ner_dict_path,
    #                               pos_dict_path=pos_dict_path,
    #                               use_extra_feature=use_extra_feature,
    #                               question2targets=question2targets,
    #                               is_training=True,
    #                               need_augment=True)
    instances = make_instances(samples,
                               w2v_encoder.char_voc,
                               w2v_encoder.word_voc,
                               sentiment_words_path,
                               ner_dict_path=ner_dict_path,
                               pos_dict_path=pos_dict_path,
                               use_extra_feature=use_extra_feature,
                               question2targets=question2targets,
                               is_training=True,
                               need_augment=True)
    instances_train, instances_dev = split_train_dev(instances, dev_size=0.2)
    drop_instances_to_excel(instances_train, drop_train_path)
    drop_instances_to_excel(instances_dev, drop_path)
    drop_instances_to_excel(instances, 'temp.xlsx')
    # drop_instances_to_excel(ins_template, drop_template_path)
    instances_train, instances_valid = split_train_dev(instances_train,
                                                       dev_size=0.1)

    # data_stream_train = DataStream(instances=instances_train,
    #                                is_shuffle=is_shuffle,
    #                                is_loop=is_loop,