Пример #1
0
 def test_bag_dict_from_string_dict_without_tokenizer(self):
     bd = bag_dict().read(self.tokenizer, {
         '_id': 'テスト用のデータ001',
         '~pv': 123.0,
     })
     bow = bag_of_words()
     bow['テスト用のデータ001'] = 1
     self.assertEqual(bow, bd['_id'])
     bow = bag_of_words()
     bow[123.0] = 1
     self.assertEqual(bow, bd['~pv'])
Пример #2
0
 def test_bag_dict_from_string_dict_in_japanese(self):
     bd = bag_dict().read(self.tokenizer, {
         'title': 'テスト',
         'body': 'データ',
     })
     bow_title = bag_of_words()
     bow_title['テスト'] = 1
     bow_body = bag_of_words()
     bow_body['データ'] = 1
     self.assertEqual(bow_title, bd['title'])
     self.assertEqual(bow_body, bd['body'])
Пример #3
0
 def setUpClass(cls):
     tokenizer = Tokenizer()
     cls.bj = bag_jag()
     cls.bd0 = bag_dict().read(tokenizer, {
         '_id': '0',
         'title': 'テストデータ',
         'body': 'テスト',
         'anchor': 'モニタ',
     })
     cls.bj.append(cls.bd0)
     cls.bd1 = bag_dict().read(tokenizer, {
         '_id': '1',
         'title': 'テストデータ',
         'body': 'テスト',
     })
     cls.bj.append(cls.bd1)
     cls.bd2 = bag_dict().read(tokenizer, {
         '_id': '2',
         'body': 'テスト',
     })
     cls.bj.append(cls.bd2)
     cls.bd3 = bag_dict().read(tokenizer, {
         '_id': '3',
     })
     cls.bj.append(cls.bd3)
     cls.query = bag_of_words()
     cls.query['テスト'] = 1
     cls.query['モニタ'] = 1
Пример #4
0
 def test_bag_of_words_iadd(self):
     bow = bag_of_words().read(self.tokenizer, 'テスト用のテストデータ')
     bow += bow
     self.assertEqual(4, bow['テスト'])
     self.assertEqual(2, bow['用'])
     self.assertEqual(2, bow['の'])
     self.assertEqual(2, bow['データ'])
Пример #5
0
 def test_bag_of_words(self):
     bow = bag_of_words()
     self.assertEqual(0, bow['unknown'])
     bow['test'] += 1
     self.assertEqual(1, bow['test'])
     bow['data'] = 10
     self.assertEqual(10, bow['data'])
     self.assertEqual(11, len(bow))
Пример #6
0
 def test_bag_dict_reduce(self):
     bd = bag_dict().read(self.tokenizer, {
         'title': 'テストデータ',
         'body': 'テスト',
         'anchor': 'データ',
     })
     bow = bag_of_words()
     bow['テスト'] = 2
     bow['データ'] = 2
     self.assertEqual(bow, bd.reduce())
Пример #7
0
 def test_bag_dict(self):
     bd = bag_dict()
     self.assertEqual(bag_of_words(), bd['unknown'])
Пример #8
0
 def test_bag_of_words_from_string_in_japanese(self):
     bow = bag_of_words().read(self.tokenizer, 'テスト用のテストデータ')
     self.assertEqual(2, bow['テスト'])
     self.assertEqual(1, bow['用'])
     self.assertEqual(1, bow['の'])
     self.assertEqual(1, bow['データ'])