Пример #1
0
from simplebilty import SimpleBiltyTagger
from simplebilty import load_tagger, save_tagger

### Use --dynet-seed $SEED
train_data = "data/da-ud-train.conllu"
test_data = "data/da-ud-test.conllu"
dev_data = "data/da-ud-dev.conllu"
in_dim = 64
h_dim = 100
c_in_dim = 100
h_layers = 1
iters = 50
trainer = "sgd"
tagger = SimpleBiltyTagger(in_dim, h_dim, c_in_dim, h_layers, embeds_file=None)
train_X, train_Y = tagger.get_train_data(train_data)
dev_X, dev_Y = tagger.get_data_as_indices(dev_data)
tagger.initialize_graph()
tagger.fit(train_X,
           train_Y,
           iters,
           val_X=dev_X,
           val_Y=dev_Y,
           patience=2,
           model_path="tmp")
# make sure to reload to get patience 2 model
tagger = load_tagger("tmp")
test_X, test_Y = tagger.get_data_as_indices(test_data)
correct, total = tagger.evaluate(test_X, test_Y)
print(correct, total, correct / total)
Пример #2
0
vocab = Vocab(vocabfile)

if "embeds" in config:
    tagger = SimpleBiltyTagger(
        config.in_dim,
        config.h_dim,
        config.c_in_dim,
        config.h_layers,
        embeds_file=config.embeds,
        word2id=vocab.word2id,
    )
else:
    tagger = SimpleBiltyTagger(config.in_dim,
                               config.h_dim,
                               config.c_in_dim,
                               config.h_layers,
                               embeds_file=None,
                               word2id=vocab.word2id)

tagger = load_tagger(model)

test_X, test_Y = tagger.get_data_as_indices(testfile)

correct, total = tagger.evaluate(test_X, test_Y)
print("accuracy", correct / total)

dev_test_labels = []
for _, tags in read_conll_file(testfile):
    dev_test_labels.append(tags)
tagger.get_predictions_output(test_X, dev_test_labels, "dev.xxx.out")
Пример #3
0
## python run_simply.py --dynet-seed 113

from simplebilty import SimpleBiltyTagger
from simplebilty import load_tagger, save_tagger

import random
### Use --dynet-seed $SEED
seed = 113  # assume we pass this to script
train_data = "data/da-ud-dev.conllu"
dev_data = "data/da-ud-test.conllu"
in_dim = 64
h_dim = 100
c_in_dim = 100
h_layers = 1
iters = 2
trainer = "sgd"
tagger = SimpleBiltyTagger(in_dim, h_dim, c_in_dim, h_layers, embeds_file=None)
train_X, train_Y = tagger.get_train_data(train_data)
tagger.initialize_graph()
tagger.fit(train_X, train_Y, iters, seed=seed)
test_X, test_Y = tagger.get_data_as_indices(dev_data)
correct, total = tagger.evaluate(test_X, test_Y)
print(correct, total, correct / total)

# test loading/saving
save_tagger(tagger, "tmp")

tagger2 = load_tagger("tmp")
correct, total = tagger2.evaluate(test_X, test_Y)
print(correct, total, correct / total)