示例#1
0
def run_lstm_crf(train=True):
    train_word_lists, train_tag_lists = build_corpus("train")
    dev_word_lists, dev_tag_lists = build_corpus("dev")
    test_word_lists, test_tag_lists = build_corpus("test")
    word2id, tag2id = build_vocab(train_word_lists, train_tag_lists)

    # 如果是加了CRF的lstm还要加入<start>和<end> (解码的时候需要用到)
    crf_word2id, crf_tag2id = extend_maps(word2id, tag2id, for_crf=True)
    # 还需要额外的一些数据处理
    train_word_lists, train_tag_lists = prepocess_data_for_lstmcrf(
        train_word_lists, train_tag_lists)
    dev_word_lists, dev_tag_lists = prepocess_data_for_lstmcrf(
        dev_word_lists, dev_tag_lists)
    test_word_lists, test_tag_lists = prepocess_data_for_lstmcrf(
        test_word_lists, test_tag_lists, test=True)

    if train:
        print("正在训练Bi-LSTM+CRF模型...")
        bilstm_crf_train((train_word_lists, train_tag_lists),
                         (dev_word_lists, dev_tag_lists), crf_word2id,
                         crf_tag2id)
    print("正在评估Bi-LSTM+CRF模型...")
    pred = bilstm_crf_eval((test_word_lists, test_tag_lists), crf_word2id,
                           crf_tag2id)
    return pred
示例#2
0
def run_crf(train=True):
    train_word_lists, train_tag_lists = build_corpus("train")
    dev_word_lists, dev_tag_lists = build_corpus("dev")
    test_word_lists, test_tag_lists = build_corpus("test")
    word2id, tag2id = build_vocab(train_word_lists, train_tag_lists)
    if train:
        print("正在训练CRF模型...")
        crf_train((train_word_lists, train_tag_lists))
        print("正在评估CRF模型...")
        crf_dev((dev_word_lists, dev_tag_lists))
    print("正在测试CRF模型...")
    pred = crf_eval((test_word_lists, test_tag_lists))
    return pred
示例#3
0
def run_hmm(train=True):
    train_word_lists, train_tag_lists = build_corpus("train")
    dev_word_lists, dev_tag_lists = build_corpus("dev")
    test_word_lists, test_tag_lists = build_corpus("test")
    word2id, tag2id = build_vocab(train_word_lists, train_tag_lists)

    if train:
        print("正在训练HMM模型...")
        hmm_train((train_word_lists, train_tag_lists), word2id, tag2id)
        print("正在评估HMM模型...")
        hmm_dev((dev_word_lists, dev_tag_lists), word2id, tag2id)

    print("正在测试HMM模型...")
    pred = hmm_eval((test_word_lists, test_tag_lists), word2id, tag2id)
    return pred
示例#4
0
def run_lstm(train=True):
    train_word_lists, train_tag_lists = build_corpus("train")
    dev_word_lists, dev_tag_lists = build_corpus("dev")
    test_word_lists, test_tag_lists = build_corpus("test")
    word2id, tag2id = build_vocab(train_word_lists, train_tag_lists)
    # LSTM模型训练的时候需要在word2id和tag2id加入PAD和UNK
    bilstm_word2id, bilstm_tag2id = extend_maps(word2id, tag2id, for_crf=False)

    if train:
        print("正在训练双向LSTM模型...")
        bilstm_train((train_word_lists, train_tag_lists),
                     (dev_word_lists, dev_tag_lists), bilstm_word2id,
                     bilstm_tag2id)
    print("正在评估双向LSTM模型...")
    pred = bilstm_eval((test_word_lists, test_tag_lists), bilstm_word2id,
                       bilstm_tag2id)
    return pred
示例#5
0
def ensemble_pred(hmm_pred, crf_pred, lstm_pred, lstmcrf_pred):
    test_word_lists, test_tag_lists = build_corpus("test")
    ensemble_evaluate([hmm_pred, crf_pred, lstm_pred, lstmcrf_pred],
                      test_tag_lists)