Пример #1
0
def build_corpus_features():
    corpus = pcc.PostagCorpus()
    train_seq = corpus.read_sequence_list_conll(data.find('train-02-21.conll'),
        max_sent_len=MAX_SENT_SIZE,
        max_nr_sent=MAX_NR_SENTENCES)
    corpus.add_sequence_list(train_seq)
    dev_seq = corpus.read_sequence_list_conll(data.find('dev-22.conll'))
    corpus.add_sequence_list(dev_seq)
    categories = [
        'adventure',
        'belles_lettres',
        'editorial',
        'fiction',
        'government',
        'hobbies',
        'humor',
        'learned',
        'lore',
        'mystery',
        'news',
        'religion',
        'reviews',
        'romance']
    for cat in categories:
        brown_seq = corpus.read_sequence_list_brown(categories=cat)
        corpus.add_sequence_list(brown_seq)
    features = exfc.ExtendedFeatures(corpus)
    features.build_features()
    corpus.save_corpus(MODEL_DIR)
    features.save_features(MODEL_DIR+"features.txt")
    return corpus, features
Пример #2
0
def load_model():
    corpus = pcc.PostagCorpus()
    corpus.load_corpus(MODEL_DIR)
    features = exfc.ExtendedFeatures(corpus)
    features.load_features(MODEL_DIR + "features.txt", corpus)
    model = spc.StructuredPercetron(corpus, features)
    model.load_model(MODEL_DIR)
    return corpus, features, model
Пример #3
0
def build_corpus_features():
    corpus = pcc.PostagCorpus()
    train_seq = corpus.read_sequence_list_conll("../data/train-02-21.conll",
                                                max_sent_len=MAX_SENT_SIZE,
                                                max_nr_sent=MAX_NR_SENTENCES)
    corpus.add_sequence_list(train_seq)
    features = exfc.ExtendedFeatures(corpus)
    features.build_features()
    corpus.save_corpus(MODEL_DIR)
    features.save_features(MODEL_DIR + "features.txt")
    return corpus, features
Пример #4
0
id_f.build_features()
#sp = spc.StructuredPercetron(corpus,id_f)
#sp.nr_rounds = 20
#sp.train_supervised(train_seq.seq_list)
#
#pred_train = sp.viterbi_decode_corpus(train_seq.seq_list)
#pred_dev = sp.viterbi_decode_corpus(dev_seq.seq_list)
#pred_test = sp.viterbi_decode_corpus(test_seq.seq_list)
#
#eval_train = sp.evaluate_corpus(train_seq.seq_list,pred_train)
#eval_dev = sp.evaluate_corpus(dev_seq.seq_list,pred_dev)
#eval_test = sp.evaluate_corpus(test_seq.seq_list,pred_test)
#
#print "Structured Percetron - ID Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train,eval_dev,eval_test)

ex_f = exfc.ExtendedFeatures(corpus)
ex_f.build_features()
#sp = spc.StructuredPercetron(corpus,ex_f)
#sp.nr_rounds = 20
#sp.train_supervised(train_seq.seq_list)
#
#pred_train = sp.viterbi_decode_corpus(train_seq.seq_list)
#pred_dev = sp.viterbi_decode_corpus(dev_seq.seq_list)
#pred_test = sp.viterbi_decode_corpus(test_seq.seq_list)
#
#eval_train = sp.evaluate_corpus(train_seq.seq_list,pred_train)
#eval_dev = sp.evaluate_corpus(dev_seq.seq_list,pred_dev)
#eval_test = sp.evaluate_corpus(test_seq.seq_list,pred_test)
#
#print "Structured Percetron - Extended Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train,eval_dev,eval_test)
Пример #5
0
crf_online = crfo.CRFOnline(corpus.word_dict, corpus.tag_dict, feature_mapper)
crf_online.num_epochs = 20
crf_online.train_supervised(train_seq)

pred_train = crf_online.viterbi_decode_corpus(train_seq)
pred_dev = crf_online.viterbi_decode_corpus(dev_seq)
pred_test = crf_online.viterbi_decode_corpus(test_seq)
eval_train = crf_online.evaluate_corpus(train_seq, pred_train)
eval_dev = crf_online.evaluate_corpus(dev_seq, pred_dev)
eval_test = crf_online.evaluate_corpus(test_seq, pred_test)

print "CRF - ID Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train, eval_dev, eval_test)


feature_mapper = exfc.ExtendedFeatures(train_seq)
feature_mapper.build_features()

crf_online = crfo.CRFOnline(corpus.word_dict, corpus.tag_dict, feature_mapper)
crf_online.num_epochs = 20
crf_online.train_supervised(train_seq)

pred_train = crf_online.viterbi_decode_corpus(train_seq)
pred_dev = crf_online.viterbi_decode_corpus(dev_seq)
pred_test = crf_online.viterbi_decode_corpus(test_seq)
eval_train = crf_online.evaluate_corpus(train_seq, pred_train)
eval_dev = crf_online.evaluate_corpus(dev_seq, pred_dev)
eval_test = crf_online.evaluate_corpus(test_seq, pred_test)

print "CRF - Extended Features Accuracy Train: %.3f Dev: %.3f Test: %.3f"%(eval_train, eval_dev,eval_test)