Пример #1
0
 def __init__(self,
              model_path,
              type_filter=False,
              save_label=False,
              batch=32,
              save_expand_subject=True):
     # mention -> entity_json_line
     self.subject_id_dict = subject_id_dict
     self._model = BLSTMCRFModel.load_model(model_path)
     # self._model = DDDDModel.load_model(model_path)
     self.type_filter = type_filter
     self.batch = batch
     self.save_label = save_label
     self.save_expand_subject = save_expand_subject
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    model_fold = cp["EVALUATION"].get("model_fold")
    output_dir = os.path.join('experiments', model_fold)

    test_x, test_y = CoNLL2003Corpus.get_sequence_tagging_data('test')

    model_path = os.path.join(output_dir, 'model')
    model = BLSTMCRFModel.load_model(model_path)
    report_evaluate = model.evaluate(test_x, test_y, debug_info=True)

    with open(os.path.join(output_dir, 'report_evaluate.log'), 'w') as f:
        f.write(f"The evaluate report is :\n {str(report_evaluate)}\n")
Пример #3
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    model_fold = cp["TEST"].get("model_fold")
    output_dir = os.path.join('experiments', model_fold)

    model_path = os.path.join(output_dir, 'model')
    model = BLSTMCRFModel.load_model(model_path)
    sentence = 'China and the United States are about the same size'
    sentence_list = sentence.split()
    result = model.predict(sentence_list)
    result_dict = model.predict(sentence_list, output_dict=True)
    print(f'the sentence is {sentence}')
    print(f'the result is {result}')
    print(f'the result of dict is {result_dict}')
    logging.info('test predict: {} -> {}'.format(sentence_list, result))

    with open(os.path.join(output_dir, 'result_predict.log'), 'w') as f:
        f.write(f"The predict result is : {str(result)}\n")
Пример #4
0
log_filepath = r"D:\data\biendata\ccks2019_el\ner\m0.1log"

# emn_path = r'D:\data\bert\chinese_L-12_H-768_A-12'
emn_path = r'D:\data\bert\chinese-bert_chinese_wwm_L-12_H-768_A-12'

# check_point = ModelCheckpoint(model_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min")

early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=1)
# early_stop = EarlyStopping(monitor="val_crf_accuracy", mode="max", patience=2)

log = TensorBoard(log_dir=log_filepath,
                  write_images=False,
                  write_graph=True,
                  histogram_freq=0)

model = BLSTMCRFModel.load_model(model_path_o)

model.fit(train_x,
          train_y,
          x_validate=validate_x,
          y_validate=validate_y,
          epochs=40,
          batch_size=512,
          labels_weight=True,
          fit_kwargs={'callbacks': [early_stop, log]})

model.evaluate(test_x, test_y)

model.save(model_path_n)
"""
继续训练
Пример #5
0
 def __init__(self, model_path, type_filter=False):
     # mention -> entity_json_line
     self.subject_id_dict = subject_id_dict
     self._model = BLSTMCRFModel.load_model(model_path)
     self.type_filter = type_filter
Пример #6
0
 def __init__(self, model_path):
     # mention -> entity_json_line
     self.subject_id_dict = subject_id_dict
     self._model = BLSTMCRFModel.load_model(model_path)
Пример #7
0
                x = []
        return datas

    def model_predict(self, model, text):
        x_test = self.build_input(text)
        result = model.predict(x_test)
        chars = [i for i in text]
        tags = []
        for i in range(len(result)):
            tags = tags + result[i]
        res = list(zip(chars, tags))
        print(res)
        return (res)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--option', type=str, default='predict')
    args = parser.parse_args()
    config = Config()

    bertner = BERTNER()
    if args.option == 'train':
        bertner.train_model()
    else:
        model = BLSTMCRFModel.load_model(bertner.model_path)
        while 1:
            s = input('enter an sent:').strip()
            bertner.model_predict(model, s)
Пример #8
0
    def __init__(self, model_path):
        with tf.device('/gpu:0'):
            # mention -> entity_json_line
            self.subject_dic = super().get_kb_dic()

            self._model = BLSTMCRFModel.load_model(model_path)
Пример #9
0
    print('train start')
    train_x, train_y = get_train_data('data/train_text.txt')
    embedding = BERTEmbedding("bert-base-chinese", sequence_length=512)
    model = BLSTMCRFModel(embedding)
    length = int(len(train_x) * 0.9)
    print(len(train_x[:length]), len(train_y[:length]))
    model.fit(train_x[:length], train_y[:length], train_x[length:], train_y[length:], epochs=5, batch_size=20)
    # model.fit(train_x[:length], train_y[:length], train_x[length:], train_y[length:], epochs=5, batch_size=128,
    #           labels_weight=True, default_labels_weight=100)
    valid_x = train_x[length:]
    valid_y = train_y[length:]
    model.save('models')
    print('train end')
    print('predict start')
    try:
        model = BLSTMCRFModel.load_model('models')
    except Exception:
        print('模型加载失败')
    newsId_set = set()
    try:
        with open('data/result_bert.txt', 'r', encoding='utf-8') as file:
            for line in file:
                newsId_set.add(line.split('\t')[0])
    except IOError:
        print('文件不存在')

    test_data = loadData('data/coreEntityEmotion_test_stage1.txt')
    test_data += loadData('data/coreEntityEmotion_train.txt')
    with open('data/result_bert.txt', 'a', encoding='utf-8') as file:
        for news in tqdm(test_data):
            if news['newsId'] in newsId_set:
Пример #10
0
def eval_crf():
    model_path = r"D:\data\biendata\ccks2019_el\ner_model"
    model = BLSTMCRFModel.load_model(model_path)
    validate_x, validate_y = dload.load_json_data('validate')
    model.evaluate(validate_x, validate_y)
Пример #11
0
## 面向脚本编程

from kashgari.tasks.seq_labeling import BLSTMCRFModel

from util import InputHelper

# 读取模型
new_model = BLSTMCRFModel.load_model('./model')

# 读取测试集数据
with open('./data/test', 'r', encoding='utf-8') as g:
    test_data = g.readlines()

# 对测试集进行预测
with open('./keywords_test', 'w', encoding='utf-8') as g_key:
    for ids, line in enumerate(test_data):
        try:
            label = InputHelper().iob_iobes(
                new_model.predict(line.replace('\t', '。')))
            result = InputHelper().result_to_json(line, label)
            line_keys = [entity['word'] for entity in result['entities']]
            g_key.write(','.join(line_keys) + '\n')
        except Exception as e:
            g_key.write('\n')

# “人工智能”
# 清洗一些看得到的错误,取前三个。这里做的很糙
with open('./keywords_test', 'r', encoding='utf-8') as g:
    data = g.readlines()
with open('./keywords', 'w', encoding='utf-8') as f:
    for line in data: