Пример #1
0
def predict_a_file(test_file, out_file, model, add_o_tag):
    assert os.path.isfile(test_file)

    model = Model(model_path=model)
    parameters = model.parameters

    # Data parameters
    lower = parameters['lower']
    zeros = parameters['zeros']
    tag_scheme = model.parameters['tag_scheme']

    # Load reverse mappings
    word_to_id, char_to_id, tag_to_id = [{
        v: k
        for k, v in x.items()
    } for x in [model.id_to_word, model.id_to_char, model.id_to_tag]]

    print 'Reloading previous model...'
    _, f_eval = model.build(training=False, **parameters)
    model.reload()

    test_sentences = loader.load_sentences(test_file, lower, zeros)
    update_tag_scheme(test_sentences, tag_scheme)

    test_data = prepare_dataset2(test_sentences, word_to_id, char_to_id,
                                 tag_to_id, model.feature_maps, lower)

    print "input: ", test_file, ":", len(test_sentences), len(test_data)
    print "output: ", out_file

    predict(parameters,
            f_eval,
            test_sentences,
            test_data,
            model.id_to_tag,
            out_file,
            add_O_tags=add_o_tag)
Пример #2
0
from tabulate import tabulate
embeddings_params, model_params = model.count_parameters()
print tabulate(embeddings_params, ["name", "shape", "size"])
print tabulate(model_params, ["name", "shape", "size"])


print "Parameters: \n  *", "\n  * ".join([str(k) + " = " + str(v) for k, v in model.parameters.items()])
# print "Parameters: \n", model.parameters

test_file = opts.test_file
out_file = opts.out_file

test_sentences = loader.load_sentences(test_file, lower, zeros)
update_tag_scheme(test_sentences, tag_scheme)

test_data = prepare_dataset2(
    test_sentences, word_to_id, char_to_id, tag_to_id, model.feature_maps, lower
)

print "input: ", test_file, ":", len(test_sentences), len(test_data)
print "output: ", out_file

import datetime
t1 = datetime.datetime.now()
predict(parameters, f_eval, test_sentences, test_data, model.id_to_tag, out_file, add_O_tags=opts.add_o_tag)
t2 = datetime.datetime.now()
print "done in ", (t2-t1).total_seconds(), "seconds"


print "finish !!!"
Пример #3
0
    print "--------------------"
    dico_ftag, ftag_to_id, id_to_ftag = feature_mapping(train_sentences, f)
    print 'feature_name:', f['name']
    print 'dico_ftag   :', len(dico_ftag)
    print 'ftag_to_id  :', len(ftag_to_id)
    print 'id_to_ftag  :', len(id_to_ftag)
    feature_maps.append({
        'name': f['name'],
        'column': f['column'],
        'dim': f['dim'],
        'dico_ftag': dico_ftag,
        'ftag_to_id': ftag_to_id,
        'id_to_ftag': id_to_ftag
    })

train_data = prepare_dataset2(train_sentences, word_to_id, char_to_id,
                              tag_to_id, feature_maps, lower)

dev_data = prepare_dataset2(dev_sentences, word_to_id, char_to_id, tag_to_id,
                            feature_maps, lower)
test_data = prepare_dataset2(test_sentences, word_to_id, char_to_id, tag_to_id,
                             feature_maps, lower)

print "%i / %i / %i sentences in train / dev / test." % (
    len(train_data), len(dev_data), len(test_data))

# Save the mappings to disk
print 'Saving the mappings to disk...'
model.save_mappings(id_to_word, id_to_char, id_to_tag, feature_maps)

# Build the model
# f_train, f_eval, f_test = model.build(**parameters)