Пример #1
0
    def setUp(self):
        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))

        config = ModelConfig()
        config.vocab_size = len(p.vocab_word)
        config.char_vocab_size = len(p.vocab_char)

        model = SeqLabeling(config, ntags=len(p.vocab_tag))
        model.load(filepath=os.path.join(SAVE_ROOT, 'model_weights.h5'))

        self.tagger = anago.Tagger(model, preprocessor=p)
        self.sent = 'President Obama is speaking at the White House.'
Пример #2
0
    def test_eval(self):
        test_path = os.path.join(DATA_ROOT, 'test.txt')
        x_test, y_test = load_data_and_labels(test_path)

        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))
        config = ModelConfig()
        config.vocab_size = len(p.vocab_word)
        config.char_vocab_size = len(p.vocab_char)

        model = SeqLabeling(config, ntags=len(p.vocab_tag))
        model.load(filepath=os.path.join(SAVE_ROOT, 'model_weights.h5'))

        evaluator = anago.Evaluator(model, preprocessor=p)
        evaluator.eval(x_test, y_test)
Пример #3
0
    def setUp(self):
        SAVE_ROOT = os.path.join(os.path.dirname(__file__), '../models')

        model_config = ModelConfig()

        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))
        model_config.vocab_size = len(p.vocab_word)
        model_config.char_vocab_size = len(p.vocab_char)

        weights = 'model_weights.h5'

        self.tagger = anago.Tagger(model_config,
                                   weights,
                                   save_path=SAVE_ROOT,
                                   preprocessor=p)
        self.sent = 'President Obama is speaking at the White House.'
Пример #4
0
    def test_load(self):
        X, y = reader.load_data_and_labels(self.filename)
        p = WordPreprocessor()
        p.fit(X, y)
        filepath = os.path.join(os.path.dirname(__file__), 'data/preprocessor.pkl')
        p.save(filepath)
        self.assertTrue(os.path.exists(filepath))

        loaded_p = WordPreprocessor.load(filepath)
        x_test1, y_test1 = p.transform(X, y)
        x_test2, y_test2 = loaded_p.transform(X, y)
        np.testing.assert_array_equal(x_test1[0], x_test2[0])  # word
        np.testing.assert_array_equal(x_test1[1], x_test2[1])  # char
        np.testing.assert_array_equal(y_test1, y_test2)
        if os.path.exists(filepath):
            os.remove(filepath)
Пример #5
0
    def test_eval(self):
        DATA_ROOT = os.path.join(os.path.dirname(__file__),
                                 '../data/conll2003/en/tagging')
        SAVE_ROOT = os.path.join(os.path.dirname(__file__), '../models')

        model_config = ModelConfig()

        test_path = os.path.join(DATA_ROOT, 'test.txt')
        x_test, y_test = load_data_and_labels(test_path)

        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))
        model_config.vocab_size = len(p.vocab_word)
        model_config.char_vocab_size = len(p.vocab_char)

        weights = 'model_weights.h5'

        evaluator = anago.Evaluator(model_config,
                                    weights,
                                    save_path=SAVE_ROOT,
                                    preprocessor=p)
        evaluator.eval(x_test, y_test)
Пример #6
0
import json
import os
import tornado.ioloop
import tornado.web

import anago
from anago.config import ModelConfig
from anago.data.preprocess import WordPreprocessor

SAVE_ROOT = os.path.join(os.path.dirname(__file__), '../../models')
model_config = ModelConfig()
p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))
model_config.vocab_size = len(p.vocab_word)
model_config.char_vocab_size = len(p.vocab_char)
weights = 'model_weights.h5'
tagger = anago.Tagger(model_config,
                      weights,
                      save_path=SAVE_ROOT,
                      preprocessor=p)


class MainHandler(tornado.web.RequestHandler):
    def get(self):
        self.render('index.html', sent='')

    def post(self):
        sent = self.get_argument('sent')
        entities = tagger.get_entities(sent)
        if entities:
            self.write(json.dumps(dict(entities)))