示例#1
0
 def test_unknown_word(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=False)
     p = preprocessor.fit(X, y)
     X = [['$unknownword$', 'あ']]
     y = [['O', 'O']]
     X, y = p.transform(X, y)
示例#2
0
 def test_save(self):
     preprocessor = WordPreprocessor()
     filepath = os.path.join(os.path.dirname(__file__),
                             'data/preprocessor.pkl')
     preprocessor.save(filepath)
     self.assertTrue(os.path.exists(filepath))
     if os.path.exists(filepath):
         os.remove(filepath)
示例#3
0
 def test_calc_sequence_lengths(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=True)
     p = preprocessor.fit(X, y)
     _, y = p.transform(X, y)
     y_t = np.argmax(y, -1)
     y_t = y_t.astype(np.int32)
     sequence_lengths = np.argmin(y_t, -1)
示例#4
0
 def test_transform_with_padding(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=True)
     p = preprocessor.fit(X, y)
     X = p.transform(X)
     words, chars = X
     word, char = words[0][0], chars[0][0][0]
     self.assertIsInstance(int(word), int)
     self.assertIsInstance(int(char), int)
示例#5
0
 def test_transform_only_words(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=False)
     p = preprocessor.fit(X, y)
     X = p.transform(X)
     words, chars = X
     char, word = chars[0][0][0], words[0][0]
     self.assertIsInstance(word, int)
     self.assertIsInstance(char, int)
示例#6
0
 def test_preprocessor(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=False)
     p = preprocessor.fit(X, y)
     X, y = p.transform(X, y)
     words, chars = X
     char, word = chars[0][0][0], words[0][0]
     tag = y[0][0]
     self.assertIsInstance(word, int)
     self.assertIsInstance(char, int)
     self.assertIsInstance(tag, int)
     self.assertIsInstance(p.inverse_transform(y[0])[0], str)
示例#7
0
    def setUp(self):
        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))

        config = ModelConfig()
        config.vocab_size = len(p.vocab_word)
        config.char_vocab_size = len(p.vocab_char)

        model = SeqLabeling(config, ntags=len(p.vocab_tag))
        model.load(filepath=os.path.join(SAVE_ROOT, 'model_weights.h5'))

        self.tagger = anago.Tagger(model, preprocessor=p)
        self.sent = 'President Obama is speaking at the White House.'
示例#8
0
    def load(cls, dir_path):
        self = cls()
        self.p = WordPreprocessor.load(
            os.path.join(dir_path, cls.preprocessor_file))
        config = ModelConfig.load(os.path.join(dir_path, cls.config_file))
        dummy_embeddings = np.zeros(
            (config.vocab_size, config.word_embedding_size), dtype=np.float32)
        self.model = SeqLabeling(config,
                                 dummy_embeddings,
                                 ntags=len(self.p.vocab_tag))
        self.model.load(filepath=os.path.join(dir_path, cls.weight_file))

        return self
示例#9
0
    def test_eval(self):
        test_path = os.path.join(DATA_ROOT, 'test.txt')
        x_test, y_test = load_data_and_labels(test_path)

        p = WordPreprocessor.load(os.path.join(SAVE_ROOT, 'preprocessor.pkl'))
        config = ModelConfig()
        config.vocab_size = len(p.vocab_word)
        config.char_vocab_size = len(p.vocab_char)

        model = SeqLabeling(config, ntags=len(p.vocab_tag))
        model.load(filepath=os.path.join(SAVE_ROOT, 'model_weights.h5'))

        evaluator = anago.Evaluator(model, preprocessor=p)
        evaluator.eval(x_test, y_test)
示例#10
0
    def test_load(self):
        X, y = load_data_and_labels(self.filename)
        p = WordPreprocessor()
        p.fit(X, y)
        filepath = os.path.join(os.path.dirname(__file__),
                                'data/preprocessor.pkl')
        p.save(filepath)
        self.assertTrue(os.path.exists(filepath))

        loaded_p = WordPreprocessor.load(filepath)
        x_test1, y_test1 = p.transform(X, y)
        x_test2, y_test2 = loaded_p.transform(X, y)
        np.testing.assert_array_equal(x_test1[0], x_test2[0])  # word
        np.testing.assert_array_equal(x_test1[1], x_test2[1])  # char
        np.testing.assert_array_equal(y_test1, y_test2)
        if os.path.exists(filepath):
            os.remove(filepath)
示例#11
0
    def test_vocab_init(self):
        X, y = load_data_and_labels(self.filename)
        unknown_word = 'unknownword'
        X_test, y_test = [[unknown_word]], [['O']]

        preprocessor = WordPreprocessor(padding=False)
        p = preprocessor.fit(X, y)
        X_pred, _ = p.transform(X_test, y_test)
        words = X_pred[0][1]
        self.assertEqual(words, [p.vocab_word[UNK]])

        vocab_init = {unknown_word}
        preprocessor = WordPreprocessor(vocab_init=vocab_init, padding=False)
        p = preprocessor.fit(X, y)
        X_pred, _ = p.transform(X_test, y_test)
        words = X_pred[0][1]
        self.assertNotEqual(words, [p.vocab_word[UNK]])
示例#12
0
 def test_pad_sequences(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=True)
     p = preprocessor.fit(X, y)
     X, y = p.transform(X, y)
示例#13
0
 def test_to_numpy_array(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor(padding=False)
     p = preprocessor.fit(X, y)
     X, y = p.transform(X, y)
     y = np.asarray(y)
示例#14
0
 def test_fit(self):
     X, y = load_data_and_labels(self.filename)
     preprocessor = WordPreprocessor()
     p = preprocessor.fit(X, y)