示例#1
0
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3: cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = '/tmp/ne_chunker_{0}.pickle'.format(fmt)
    print('Saving chunker to {0}...'.format(outfilename))

    with open(outfilename, 'wb') as outfile:
        pickle.dump(cp, outfile, -1)

    return cp


if __name__ == '__main__':
    # Make sure that the pickled object has the right class name:
    from nltk.chunk.named_entity import build_model

    build_model('binary')
    build_model('multiclass')
示例#2
0
    print("Loading eval data...")
    eval_paths = [find("corpora/ace_data/ace.eval")]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print("Evaluating...")
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3:
            cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = "/tmp/ne_chunker_{0}.pickle".format(fmt)
    print("Saving chunker to {0}...".format(outfilename))

    with open(outfilename, "wb") as outfile:
        pickle.dump(cp, outfile, -1)

    return cp


if __name__ == "__main__":
    # Make sure that the pickled object has the right class name:
    from nltk.chunk.named_entity import build_model

    build_model("binary")
    build_model("multiclass")
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3: cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = '/tmp/ne_chunker_%s.pickle' % fmt
    print('Saving chunker to %s...' % outfilename)

    with open(outfilename, 'wb') as outfile:
        pickle.dump(cp, outfile, -1)

    return cp


if __name__ == '__main__':
    # Make sure that the pickled object has the right class name:
    from nltk.chunk.named_entity import build_model

    build_model('binary')
    build_model('multiclass')
示例#4
0
    print("Loading eval data...")
    eval_paths = [find("corpora/ace_data/ace.eval")]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print("Evaluating...")
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3:
            cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = "/tmp/ne_chunker_%s.pickle" % fmt
    print("Saving chunker to %s..." % outfilename)

    with open(outfilename, "wb") as out:
        pickle.dump(cp, out, -1)

    return cp


if __name__ == "__main__":
    # Make sure that the pickled object has the right class name:
    from nltk.chunk.named_entity import build_model

    build_model("binary")
    build_model("multiclass")