Пример #1
0
def predict_class(text):
    logging.info(f"Input text: {text}")
    logging.info("cleaning input text")
    text = clean_text(text)
    sentence = [text]
    tokenizer = load_tokenizer()
    logging.info("trained tokenizer loaded")
    word_index = tokenizer.word_index
    vocab_size = len(word_index)
    text_sequences = tokenizer.texts_to_sequences(sentence)
    text_padded = pad_sequences(text_sequences,
                                padding=PADDING_TYPE,
                                truncating=TRUNC_TYPE,
                                maxlen=MAX_LENGTH)

    logging.info("creating embedding matrix using Glove Embeddings")
    embedding_matrix = embedding_matrix_glove(word_index)
    logging.info(f"Embeddings Weights created {embedding_matrix.shape}")

    logging.info("getting pre-trained model")
    model = create_model(vocab_size, EMBEDDING_DIM, MAX_LENGTH,
                         embedding_matrix)
    print(model.summary())
    logging.info("loading model weights")
    model.load_weights(MODEL)

    predict = model.predict(text_padded)
    predict = np.argmax(predict)

    predicted_main_product = get_main_product(predict)
    logging.info(f'Predicted Main Product: {predicted_main_product}')
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description='model')
    parser.add_argument('--input_image_paths',
                        dest='input_image_paths',
                        default='inputs_128_128_1',
                        nargs='+',
                        help='input image paths, separate by space')
    parser.add_argument('--max_input_count',
                        dest='max_input_count',
                        type=int,
                        default=10000,
                        help='max input image count for train')
    parser.add_argument('--model_parameter_path',
                        dest='model_parameter_path',
                        default='model_parameter')
    parser.add_argument('--dump_detail',
                        dest='dump_detail',
                        default=False,
                        action='store_true')

    args = parser.parse_args()

    train_x, train_y, test_x, test_y = load_image_data(args.input_image_paths,
                                                       args.max_input_count)
    model = create_model(0.0001)
    load_latest_model_parameter(model, args.model_parameter_path)
    test_model(model, train_x, train_y, args.dump_detail)
    test_model(model, test_x, test_y, args.dump_detail)
Пример #3
0
def evaluate_line():
    config = utils.load_config(FLAGS.config_file)  # 读取配置文件
    log_path = os.path.join("evl_log", FLAGS.log_test)  # ./log/train.log
    logger = utils.get_logger(log_path)  # log文件名及路径
    # limit GPU memory
    tf_config = tf.ConfigProto()  # TensorFlow 会话的配置项
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:  # map_file 中存储着字与id,tag与id之间的对应关系
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
        # char_to_id  每个字对应的id, id_to_char 两者是相对应的
        # print('char_to_id: ', char_to_id)
        # print('tag_to_id: ', tag_to_id)

    with tf.Session(config=tf_config) as sess:
        model = utils.create_model(sess, Model, FLAGS.ckpt_path,
                                   data_utils.load_word2vec, config,
                                   id_to_char, logger)
        while True:
            try:
                line = input("请输入测试句子:")
                if line == 'exit':
                    break
                result = model.evaluate_line(
                    sess, data_utils.input_from_line(line, char_to_id),
                    id_to_tag)
                print(result)
                logger.debug(result)
            except Exception as e:
                logger.info(e)
Пример #4
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger, False)
        # while True:
        #     # try:
        #     #     line = input("请输入测试句子:")
        #     #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #     #     print(result)
        #     # except Exception as e:
        #     #     logger.info(e)
        #
        #         line = input("请输入测试句子:")
        #         result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #         print(result)
        line = u"香港的房价已经到达历史巅峰,乌溪沙地铁站上盖由新鸿基地产公司开发的银湖天峰,现在的尺价已经超过一万五千港币。"
        line = u"这是测试语句,国务院加入测试"
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(result)
Пример #5
0
def evaluate_line():
    config = load_config(FLAGS.config_file)  #从文件config_file 中读取配置数据
    #{'model_type': 'idcnn', 'num_chars': 3538, 'char_dim': 100, 'num_tags': 51, 'seg_dim': 20, 'lstm_dim': 100, 'batch_size': 20, 'emb_file': 'E:\\pythonWork3.6.2\\NERuselocal\\NERuselocal\\data\\vec.txt', 'clip': 5, 'dropout_keep': 0.5, 'optimizer': 'adam', 'lr': 0.001, 'tag_schema': 'iobes', 'pre_emb': True, 'zeros': True, 'lower': False}
    logger = get_logger(FLAGS.log_file)  #写日志文件名字为 train.log
    # limit GPU memory
    tf_config = tf.ConfigProto(
    )  #实例化一个设置GPU的对象  函数用在创建session的时候,用来对session进行参数配置
    tf_config.gpu_options.allow_growth = True  #1动态申请显存需要多少使用多少 2限制GPU使用率 config.gpu_options.per_process_gpu_memory_fraction = 0.4
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
Пример #6
0
def main():

    # load parameters for run...
    parameters = yaml.load(open(PARAM_FILE_DIRECTORY))
    db_defs = parameters['database']
    solver_param = parameters['solver_param']
    model_param = parameters['model_param']
    model_specific_params = parameters[model_param['model_type']]

    print('Initialize DB connection....')
    db, dictionary, reverse_dictionary = utils.initialize_db_connection(
        db_defs)
    #target_words, context, dictionary, reverse_dictionary = collect_data(db_defs,5000)
    #data, count, dictionary, reverse_dictionary = collect_data2(vocabulary_size=model_param['vocabulary_size'])
    #target_words, context = generate_batch(data, 500, 2, 2)

    #data, count, dictionary, reverse_dictionary = collect_data2(vocabulary_size=model_param['vocabulary_size'])
    print('Done Collect Data.')

    print('try to import model')
    # build model...
    model = create_model(model_param, model_specific_params)
    print('Model drawn')

    # Initialize the solver object.
    solver = Solver(model)

    # train model....
    solver.train(db, db_defs, dictionary, reverse_dictionary, solver_param)

    #grab embeddings for some sample data.
    embedding = solver.run(np.array([1, 2, 3, 12], dtype=np.int32),
                           solver_param)
    print('done!')
 def train(self):
     self.get_sentences_dict()
     self.get_batch_data()
     logger, config = self.get_config()
     tf_config = tf.ConfigProto()
     tf_config.gpu_options.allow_growth = True  # limit GPU memory
     steps_per_epoch = self.train_batch_manager.len_data  # 每一轮epoch的batch数量
     with tf.Session(config=tf_config) as sess:
         model = create_model(sess, Model, FLAGS.ckpt_path, config,
                              self.id_to_char, logger)
         logger.info("start training")
         loss = []
         for i in range(FLAGS.max_epoch):
             for batch in self.train_batch_manager.iter_batch(shuffle=True):
                 step, batch_loss = model.run_step(sess, True, batch)
                 loss.append(batch_loss)
                 if step % FLAGS.steps_check == 0:
                     iteration = step // steps_per_epoch + 1
                     logger.info("iteration:{} step:{}/{}, "
                                 "NER loss:{:>9.6f}".format(
                                     iteration, step % steps_per_epoch,
                                     steps_per_epoch, np.mean(loss)))
                     loss = []
             # 对验证集进行预测和评估
             best = self.evaluate(sess, model, "dev",
                                  self.dev_batch_manager, self.id_to_tag,
                                  logger)
             if best:
                 save_model(sess, model, FLAGS.ckpt_path, logger)
Пример #8
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, NERModel, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        # txt = input("请输入文件:")
        # with open(txt, encoding='u8') as test_file:
        #     for line in test_file.readlines():
        #         line = line.split(',')
        #         result = model.evaluate_line(sess, input_from_line(line[1], char_to_id), id_to_tag)
        #         print(result)

        while True:
            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
Пример #9
0
def evaluate_file(file, target):
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, NERModel, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        with open(file, encoding='u8') as fr, open(target,
                                                   mode='w',
                                                   encoding='u8') as fw:
            for line in fr:
                result = model.evaluate_line(
                    sess, input_from_line(line.strip(), char_to_id), id_to_tag)
                print(result)
                fw.write(json.dumps(result, ensure_ascii=False))
                fw.write("\n")
                fw.flush()

        while True:
            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
Пример #10
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    user_path = os.path.expanduser("~")

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        with open(
                user_path +
                '/share/deep_learning/data/knowledge_graph/entity_relation/corpus.yml'
        ) as f:
            corpus = yaml.load(f)
            for key in corpus:
                sentences = corpus[key]
                sentences = list(set(sentences))
                for sen in sentences:
                    sen_strip = sen.replace(' ', '')
                    result = model.evaluate_line(
                        sess, input_from_line(sen_strip, char_to_id),
                        id_to_tag)
                    extract_entity(result, key)
Пример #11
0
Файл: main.py Проект: wshzd/NER
def main(_):

    if FLAGS.train:
        if FLAGS.clean:
            clean(FLAGS)
        train()
    else:
        # 下面使用testdata来进行评估模型
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
        log_path = os.path.join("log", FLAGS.log_file)
        config = load_config(FLAGS.config_file)
        logger = get_logger(log_path)
        tf_config = tf.ConfigProto(allow_soft_placement=True,
                                   log_device_placement=True)
        test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower,
                                        FLAGS.zeros)
        test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                    FLAGS.lower)
        test_manager = BatchManager(test_data, 100)
        with tf.Session(config=tf_config) as sess:
            sess.run(tf.global_variables_initializer())
            model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                                 config, id_to_char, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
Пример #12
0
def evaluate_line(sents):
    global static_model
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    graph = tf.Graph()
    # tf_config = tf.ConfigProto()
    # tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

        # with tf.Session(config=tf_config) as sess:

    sess = tf.InteractiveSession(graph=graph)

    print(" start  create model")
    static_model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                                config, id_to_char, logger)
    print(" end  create model")

    result = static_model.evaluate_line(sess,
                                        input_from_line(sents,
                                                        char_to_id), id_to_tag)
    sess.close()

    return result
 def predict():
     """
     对一个数据集进行实体识别
     :return:
     """
     config = load_config(FLAGS.config_file)
     logger = get_logger(FLAGS.log_file)
     tf_config = tf.ConfigProto()
     tf_config.gpu_options.allow_growth = True  # limit GPU memory
     # 从训练阶段生成的map_file中恢复各映射字典
     with open(FLAGS.map_file, "rb") as f:
         char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
     test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower,
                                     FLAGS.zeros)
     test_data = prepare_dataset(test_sentences,
                                 char_to_id,
                                 tag_to_id,
                                 FLAGS.lower,
                                 train=False)
     test_manager = BatchManager(test_data, 1)
     with tf.Session(config=tf_config) as sess:
         model = create_model(sess, Model, FLAGS.ckpt_path, config,
                              id_to_char, logger)
         logger.info("predict data......")
         ner_results = model.predict(sess, test_manager, id_to_tag)
         result_write_evaluate(ner_results, FLAGS.result_path, "test")
def evaluate_line():
    # 写死
    FLAGS.config_file = 'forum_config/config_file'
    FLAGS.log_file = 'forum_config/log/train.log'
    FLAGS.ckpt_path = 'forum_ckpt/'
    # FLAGS.ckpt_path = 'ckpt/'
    FLAGS.map_file = 'forum_config/maps.pkl'

    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            line = input("请输入测试句子:")
            result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            print(result)
Пример #15
0
def __test():
    """
    test interface
    :return:
    """
    log_path = os.path.join(".", config['log_path'])
    config['ckpt_path'] = "Pretrain/" + domain + "/"
    logger = get_logger(log_path)
    id2mid, id2p, entity_entity_sim_Matrix, entity_relation_Adj, truth_label, train_entity_list, \
    test_entity_list, valid_entity_list = data_reader(logger, config=config, domain=domain,
                                                      entity_entity_topk=entity_knn_number)
    test_data = (entity_relation_Adj, entity_entity_sim_Matrix,
                 test_entity_list, truth_label)
    test_manager = BatchManager(test_data, len(test_entity_list), "test")

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, config['ckpt_path'], config, logger)
        logger.info("start test")
        test_precision, test_ndcg, test_map, test_ndcg_topall = evaluate(
            sess, model, "test", test_manager, logger, id2mid, id2p)
        print("test precision at {} :{:>.5f}".format(k, test_precision))
        print("test ndcg at {} :{:>.5f}".format(k, test_ndcg))
        print("test map :{:>.5f}".format(test_map))
        print("test ndcg_topall :{:>.5f}".format(test_ndcg_topall))
Пример #16
0
def evaluate_ht():
    submit_path_ht = 'submit_sample/hetong.csv'
    submit_path_file = open(submit_path_ht, 'a+', encoding='gbk')
    submit_path_file.write('公告id,甲方,乙方,项目名称,合同名称,合同金额上限,合同金额下限,联合体成员\n')
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        rootdir = '/home/utopia/corpus/FDDC_part2_data/FDDC_announcements_round1_test_a_20180605/重大合同/html/'
        list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
        for i in range(0, len(list)):
            htmlpath = os.path.join(rootdir, list[i])
            if os.path.isfile(htmlpath):
                print(htmlpath)
                s_arr = levelText_withtable(htmlpath)
                candidates = []
                for j in range(len(s_arr)):
                    sen = s_arr[j]
                    result = model.evaluate_line(
                        sess, input_from_line(sen, char_to_id), id_to_tag)
                    entities = result.get('entities')
                    if len(entities) > 0:
                        for en in entities:
                            en['sid'] = j
                            en['pid'] = list[i]
                            candidates.append(en)
                org_ht(candidates, submit_path_file)
                print('-------------------------------------------------')
Пример #17
0
def predict():
    batcher = get_batcher()

    config = load_config(FLAGS.config_file)
    logger = get_logger(os.path.join('log', FLAGS.log_file))
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, tag_to_id, id_to_tag = pickle.load(f)

    def get_test_data(char2id):
        sentences = []
        with open('data/test.txt', 'r', encoding='utf-8') as f:
            for line in f:
                words = line.strip().split('_')
                ids = [
                    char2id[char if char in char2id else '<UNK>']
                    for char in words
                ]
                sentences.append([words, ids])
        return sentences

    test_data = get_test_data(char_to_id)
    with tf.Session(config=tf_config) as sess:
        elmo_model = load_elmo()
        model = create_model(sess, Model, FLAGS.ckpt_path, elmo_model, config,
                             logger)
        results = model.predict_batch(sess,
                                      data=test_data,
                                      id_to_tag=id_to_tag,
                                      batcher=batcher,
                                      batch_size=FLAGS.batch_size)
        result_to_file(results)
Пример #18
0
def semisupervised_training(unsup, layers, data, epochs):

    X_train, y_train, X_test, y_test = data[0], data[1], data[2], data[3]

    sup = create_model(layers, 0, 0, 0, 'relu', 'softmax',
                       'binary_crossentropy', 'adam')

    for i in range(3):
        sup.fit(X_train,
                y_train,
                validation_data=(X_test, y_test),
                epochs=1,
                batch_size=200,
                verbose=2)

        unsup = change_weights(sup, unsup)

        unsup.fit(X_train,
                  X_train,
                  validation_data=(X_test, X_test),
                  epochs=1,
                  batch_size=200,
                  verbose=2)

        sup = change_weights(unsup, sup)

    for j in range(epochs):
        sup.fit(X_train,
                y_train,
                validation_data=(X_test, y_test),
                epochs=1,
                batch_size=200,
                verbose=2)

    return sup
Пример #19
0
def content_ner():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        for i in range(302):
            newslist = []
            m = i + 1
            f = open('newsLists/%d.txt' % m, 'r')
            for line in f.readlines():
                newslist.append(int(line))
            fout = codecs.open('content_ner/%d.json' % m,
                               'w',
                               encoding='utf-8')
            for i in range(len(newslist)):
                day = newslist[i]
                f = codecs.open('D:/PycharmProjects/news_data/%d.json' % day,
                                encoding='utf-8')
                f_d = json.load(f)
                content = f_d["content"]
                content = strip_tags(content)
                result = model.evaluate_line(
                    sess, input_from_line(content, char_to_id), id_to_tag)
                dicObj = json.dumps(result)
                fout.write(dicObj)
                fout.write("\n")
            fout.close()
Пример #20
0
def ensemble_val_data():
    preds_raw = []
    labels = []

    for match_str in w_file_matcher:
        os.chdir(MODEL_DIR)
        w_files = glob.glob(match_str)
        for w_file in w_files:
            full_w_file = MODEL_DIR + '/' + w_file
            mname = w_file.split('_')[0]
            print(full_w_file)
            model = create_model(mname)
            model.load_state_dict(torch.load(full_w_file))

            pred, y = make_preds_val(model)
            #pred = np.array(pred)
            preds_raw.append(pred)
            labels.append(y)
            del model

    save_array(PRED_VAL_RAW, preds_raw)
    preds = np.mean(preds_raw, axis=0)
    save_array(PRED_VAL, preds)
    save_array(VAL_LABELS, labels)
    return preds, labels
Пример #21
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        # while True:
        # try:
        #     line = input("请输入测试句子:")
        #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
        #     print(result)
        # except Exception as e:
        #     logger.info(e)

        # line = input("请输入测试句子:")
        line = "哮喘古代文献也称“鼻息”、“肩息”、“上气”等。"
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(line)
        print([(x["word"], x["type"]) for x in result["entities"]])

        line = "喘病是指由于外感或内伤,导致肺失宣降,肺气上逆或气无所主,肾失摄纳,以致呼吸困难,甚则张口抬肩,鼻翼煽动,不能平卧等为主要临床特征的一种病证。 "
        result = model.evaluate_line(sess, input_from_line(line, char_to_id),
                                     id_to_tag)
        print(line)
        print([(x["word"], x["type"]) for x in result["entities"]])
Пример #22
0
def main():
    # For GPU memory efficiency
    gpu_options = tf.GPUOptions(allow_growth=True)

    # create log dir
    if not os.path.exists(FLAGS.logdir):
        os.makedirs(FLAGS.logdir)
    model_path = os.path.join(FLAGS.logdir, FLAGS.task_name)

    # Train or Inference
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if FLAGS.is_train:
            # load traning dataset
            print ("Start Loding Dataset...")
            mnist = input_data.read_data_sets(FLAGS.data_path ,one_hot=False, validation_size=5000)
            train_data = mnist.train
            val_data = mnist.validation
            print ("Loading done.")

            print ("Initialize the network")
            if not os.path.exists(model_path) or FLAGS.reset:
                # Create model if not exist or reset the model.
                model = create_model(sess, FLAGS, mode="train")
            else:
                model = load_model(sess, model_path, mode="train")
                # then we restore the trained model.

            train(sess, train_data, val_data, model, FLAGS)
            print ("Training Done.")
        else:
            print ("The inference mode is removed from inference.py")
Пример #23
0
def get_slot_dl(text):
    """
    获得一个句子的slot_table
    :param text: 用户输入的句子
    # :param tf_sess: tensorflow 的sessiong
    :return:
    """
    tf.reset_default_graph()   # FIXME: 为了可以保证重复多次输入(不过会很慢)整合代码时,可以尝试将NLU和DM分离,使用统一的文件管理。
    FLAGS.config_file = '/forum_config/config_file'
    FLAGS.log_file = '/forum_config/log/train.log'
    FLAGS.ckpt_path = '/forum_ckpt/'
    FLAGS.map_file = '/forum_config/maps.pkl'
    
    file_path = os.path.dirname(__file__)
    config = load_config(file_path+FLAGS.config_file)
    logger = get_logger(file_path+FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    with open(file_path+FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        result = model.evaluate_line(sess, input_from_line(text, char_to_id), id_to_tag)
    return result
Пример #24
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    # map_file文件需要改
    with open(FLAGS.map_file, "rb+") as f:
        # pkllf=pickle.load(f)
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

                # 改成file in file out
                line = input("请输入测试句子:")
                result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)

                print(result)
Пример #25
0
def test_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char = pickle.load(f)
        with tf.Session(config=tf_config) as sess:
            model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                                 config, id_to_char, logger)
            logger.info("start testing")
            data = input_pro()
            for line in data:
                data = json.loads(line)
                print(data)
                id = data["id"]
                doc_txt = data["doc"]
                golden_event = data["golden_event"]
                chunk_result = data["chunk_result"]
                result_entity = data["result"]
                for result in result_entity:
                    str_token = result["string"]
                    entities = result["entities"]
                    test_data = input_from_line(str_token, char_to_id)
                    classfy_result = model.evaluate_line(sess, test_data)
                    print(classfy_result)
                    result["mention_classify"] = classfy_result[0]
                write_to_file(data)
Пример #26
0
def main():

    # read in the parameters defining the model.
    parameters = yaml.load(open(PARAM_FILE_DIRECTORY))

    # read in all the parameters defining the model and
    # how it will be trained.
    solver_param = parameters['solver_param']
    model_param = parameters['model_param']
    model_specific_params = parameters[model_param['model_type']]
    print("Training data using %s model" % (model_param['model_type']))

    print('Loading Data set...')
    data = utils.load_data()

    print('Drawing model...')
    # initialize the model
    model = utils.create_model(model_param, model_specific_params)

    # build it with feature, label sizes...
    model.build(data['features'], data['categories'])
    print('Model drawn.')

    # Initialize the solver object.
    solver = Solver(model)

    # train model....
    solver.train(data['X_train'], data['y_train'], solver_param, data['X_val'],
                 data['y_val'])

    print('done!')
Пример #27
0
def evaluate_predictsamples():
    config = load_config(FLAGS.config_file)
    logger = get_logger('log/test.log')
    print_config(config, logger)
    # limit GPU memory
    logger.info("start predict")
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        index = 1
        with open('data/predict.txt', 'w', encoding='utf-8') as f_write:
            read_file = 'data/test.txt'
            for line in open(read_file, 'r', encoding='utf-8-sig').readlines():
                result = model.evaluate_line(sess,
                                             input_from_line(line, char_to_id),
                                             id_to_tag)
                print(result)
                content = str(index) + ','
                if result is not None:
                    entities = result['entities']
                    for entity in entities:
                        if entity:
                            print(entity['word'] + '\t' + entity['type'])
                            # content += entity['word'] + '\t' + str(entity['start']) + '\t' + str(entity['end']) + '\t' + entity['type']+ ';'
                            content += entity['word'] + '\t' + entity[
                                'type'] + ';'
                f_write.write(content + '\n')
                index += 1
Пример #28
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        f = codecs.open(os.path.join(FLAGS.test_filepath, "127_9.txt"), "r",
                        "utf-8")
        s = f.read()
        line = []
        sent = ''
        for i in range(len(s)):
            if s[i] != '。':
                sent += s[i]
            else:
                sent += s[i]
                line.append(sent)
                sent = ''

        line = input("请输入测试句子:")
        for info in line:
            print(info)
            result = model.evaluate_line(sess,
                                         input_from_line(info, char_to_id),
                                         id_to_tag)
            for info1 in result['entities']:
                print(info1)
Пример #29
0
    def stacking(self):

        N, M = len(self.models), len(self.y_train)

        super_model = create_model([N, 500, 500, 1], 0.8, 1, 'elu', 'sigmoid',
                                   'binary_crossentropy')

        predictions = np.zeros(shape=(M, N))

        new_models = []

        for i in range(N):
            model = self.models[i]

            indices = np.array(range(M))
            sample = np.random.choice(indices,
                                      round(len(indices) * 0.7),
                                      replace=False)

            X, Y = self.X_train[sample], self.y_train[sample]

            model.fit(X, Y, batch_size=5000, verbose=1)

            new_models.append(model)

            probs = model.predict(self.X_train)

            predictions[:, i] = probs.reshape(M, )

        #fit the super model to predictions:

        super_model.fit(predictions, self.y_train)

        return super_model, new_models
Пример #30
0
def run_test():
    model = create_model(model_type=model_type,
                         pretrained=pretrained,
                         n_classes=n_classes,
                         input_size=input_size,
                         checkpoint=checkpoint)
    model = model.to(device)
    print(model)
    # count_flops(model, device=device)

    test_dataset = EvalDataset('./data/stanford-dogs/Processed/test')
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False)

    model.eval()
    loss_func = nn.CrossEntropyLoss()
    acc_list, loss_list = [], []
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(tqdm(test_dataloader)):
            inputs, labels = inputs.float().to(device), labels.to(device)
            preds = model(inputs)
            pred_idx = preds.max(1).indices
            acc = (pred_idx == labels).sum().item() / labels.size(0)
            acc_list.append(acc)
            loss = loss_func(preds, labels).item()
            loss_list.append(loss)

    final_loss = np.array(loss_list).mean()
    final_acc = np.array(acc_list).mean()
    print('Test loss: {}\nTest accuracy: {}'.format(final_loss, final_acc))
    def setUp(self):
        admin_opts = {}
        fields = {'name': models.CharField(max_length=255)}
        self.dynamic_model = create_model('DynamicModel', fields=fields, app_label='context_admin', admin_opts={})

        fields = {  'name': models.CharField(max_length=255),
                    'dynamic_model': models.ForeignKey(self.dynamic_model)
                    }

        self.dynamic_inner_model = create_model('DynamicInnerModel', fields=fields, app_label='context_admin', admin_opts={})
        

        install(self.dynamic_model)
        install(self.dynamic_inner_model)

        instance = self.dynamic_model()
        instance.save()
Пример #32
0
def evaluate_line():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

                line = input("请输入测试句子:")
                result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
                print(result)
Пример #33
0
def train(data_path, config):
    with tf.Graph().as_default(), tf.Session() as session:
        word_to_id_path = os.path.join(data_path, config.vocab_file)
        with open(word_to_id_path, "rb") as f:
            word_to_id = pickle.load(f)

        vocab_size = len(word_to_id)
        print("Vocab size: %d" % vocab_size)
        sys.stdout.flush()

        train_pattern = config.data_pattern.replace("{-type-}", "train") + ".part*"
        valid_pattern = config.data_pattern.replace("{-type-}", "valid") + ".part*"

        train_files = get_file_list(config, data_path, train_pattern, "train")
        valid_files = get_file_list(config, data_path, valid_pattern, "valid")

        if config.copy_temp:
            temp_dir = tempfile.mkdtemp()
            print("Copying data files to %s" % temp_dir)
            train_files = copy_temp_files(train_files, temp_dir)
            valid_files = copy_temp_files(valid_files, temp_dir)

        config.vocab_size = vocab_size

        train_batcher = PreBatched(train_files, config.batch_size, description="train") if config.use_prebatched \
            else QueuedSequenceBatcher(train_files, config.seq_length, config.batch_size, description="train",
                                       attns=config.attention)
        valid_batcher = PreBatched(valid_files, config.batch_size, description="valid") if config.use_prebatched \
            else QueuedSequenceBatcher(valid_files, config.seq_length, config.batch_size, description="valid",
                                       attns=config.attention)

        t0 = datetime.datetime.now()
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = create_model(config, True)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = create_model(config, False)

        summary_writer = tf.train.SummaryWriter(config.events_path, graph=session.graph)
        valid_perplexity = PerplexityHook(summary_writer, mvalid, valid_batcher)

        hooks = [
            SpeedHook(summary_writer, config.status_iterations, config.batch_size),
            LossHook(summary_writer, config.status_iterations),
            valid_perplexity,
            SaveModelHook(config.checkpoint_path, 1, config.__dict__, 5)
        ]
        t1 = datetime.datetime.now()
        print("Building models took: %s" % (t1 - t0))

        def load_func():
            if config.model_path is not None:
                load_model(session, config.model_path)
                print("Continuing training from model: %s" % config.model_path)
            if config.embedding_path is not None:
                load_variables(session, os.path.join(config.embedding_path, "embedding.tf"),
                               [m.embedding_variable])
                print("Loading embedding vectors from: %s" % config.embedding_path)

        trainer = Trainer(m.optimizer, config.epochs, hooks, m, m.train_op)
        trainer(train_batcher, m.loss, session, config.learning_rate, config.lr_decay, load_func)

        saver = tf.train.Saver(tf.trainable_variables())
        embedding_saver = tf.train.Saver([m.embedding_variable])
        print("Saving model...")
        out_path = save_model(saver, session, config.save_path, m.predict, config.__dict__)
        embedding_saver.save(session, os.path.join(out_path, "embedding.tf"))

        if config.copy_temp:
            shutil.rmtree(temp_dir)
Пример #34
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        if FLAGS.pre_emb:
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(),
                FLAGS.emb_file,
                list(itertools.chain.from_iterable(
                    [[w[0] for w in s] for s in test_sentences])
                )
            )
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(
        train_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    dev_data = prepare_dataset(
        dev_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    test_data = prepare_dataset(
        test_sentences, char_to_id, tag_to_id, FLAGS.lower
    )
    print("%i / %i / %i sentences in train / dev / test." % (
        len(train_data), 0, len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
        logger.info("start training")
        loss = []

        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                #print batch
                step, batch_loss = model.run_step(sess, True, batch)
                #print step
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                        iteration, step%steps_per_epoch, steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)