Python DAWG示例，lexpy.dawg.DAWG Python示例

示例#1

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_word_count_greater_than_zero(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ash', 'ashes', 'ashley'])
     self.dawg.reduce()
     self.assertGreater(self.dawg.get_word_count(), 0,
                        "The number of words should be greater than 0")
     self.assertEqual(3, self.dawg.get_word_count(), "Word count not equal")

示例#2

0

显示文件

 def test_dawg_wildcard_exception(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ab', 'as', 'ash', 'ashley'])
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertRaises(InvalidWildCardExpressionError, self.dawg.search, '#$%^a')

示例#3

0

显示文件

 def test_dawg_question_search(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ab', 'as', 'ash', 'ashley'])
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertEqual(sorted(self.dawg.search('a?')), sorted(['ab', 'as']), 'The lists should be equal')

示例#4

0

显示文件

 def test_word_add_all_with_number(self):
     self.dawg = DAWG()
     self.dawg.add_all(('axe', 'kick', 3)) #tuple with one integer.
     self.dawg.reduce()
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('axe' in self.dawg, "Word should be in dawg")
     self.assertTrue('kick' in self.dawg, "Word should be in dawg")
     self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

示例#5

0

显示文件

 def test_word_add_all_set(self):
     self.dawg = DAWG()
     self.dawg.add_all({'axe', 'kick'}) #set
     self.dawg.reduce()
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('axe' in self.dawg, "Word should be in dawg")
     self.assertTrue('kick' in self.dawg, "Word should be in dawg")
     self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

示例#6

0

显示文件

 def test_dawg_node_count(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ash', 'ashley'])
     self.dawg.reduce()
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")
     self.assertEqual(7, len(self.dawg), "Number of nodes")

示例#7

0

显示文件

 def test_word_add_all_file_path(self):
     self.dawg = DAWG()
     self.dawg.add_all(small_dataset) # From a file
     self.dawg.reduce()
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('AARGH' in self.dawg, "Word should be in dawg")
     self.assertTrue('AARRGHH' in self.dawg, "Word should be in dawg")
     self.assertTrue('AAS' in self.dawg, "Word should be in dawg")
     self.assertEqual(178691, self.dawg.get_word_count(), "Word count not equal")

示例#8

0

显示文件

 def test_dawg_prefix_search(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ashlame', 'ashley', 'askoiu', 'ashlo'])
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertFalse('ash' in self.dawg, "Word should not be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertEqual(4, self.dawg.get_word_count(), "Word count not equal")
     self.assertTrue(self.dawg.contains_prefix('ash'), "Prefix should be present in DAWG")
     self.assertEqual(sorted(self.dawg.search_with_prefix('ash')), sorted(['ashlame', 'ashley', 'ashlo']),
                           'The lists should be equal')

示例#9

0

显示文件

 def test_dawg_node_prefix_not_exists(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ash', 'ashley'])
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")
     self.assertFalse(self.dawg.contains_prefix('xmas'), "Prefix should be present in DAWG")
     self.assertFalse(self.dawg.contains_prefix('xor'), "Prefix should be present in DAWG")
     self.assertFalse(self.dawg.contains_prefix('sh'), "Prefix should be present in DAWG")

示例#10

0

显示文件

 def test_word_add_all_gen(self):
     def gen_words():
         a = ['ash', 'ashley', 'simpson']
         for word in a:
             yield word
     self.dawg = DAWG()
     self.dawg.add_all(gen_words()) # generator
     self.dawg.reduce()
     self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")
     self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
     self.assertTrue('simpson' in self.dawg, "Word should be in dawg")
     self.assertEqual(3, self.dawg.get_word_count(), "Word count not equal")

示例#11

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_edit_distance_search(self):
     self.dawg = DAWG()
     input_words = [
         'abhor', 'abuzz', 'accept', 'acorn', 'agony', 'albay', 'albin',
         'algin', 'alisa', 'almug', 'altai', 'amato', 'ampyx', 'aneto',
         'arbil', 'arrow', 'artha', 'aruba', 'athie', 'auric', 'aurum',
         'cap', 'common', 'dime', 'eyes', 'foot', 'likeablelanguage',
         'lonely', 'look', 'nasty', 'pet', 'psychotic', 'quilt', 'shock',
         'smalldusty', 'sore', 'steel', 'suit', 'tank', 'thrill'
     ]
     self.dawg.add_all(input_words)
     self.dawg.reduce()
     self.assertListEqual(self.dawg.search_within_distance('arie', dist=2),
                          ['arbil', 'athie', 'auric'])

示例#12

0

显示文件

class TestDAWGExactWordSearch(unittest.TestCase):

    def test_word_in_dawg(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ash', 'ashley'])
        self.dawg.reduce()
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")

    def test_word_not_int_dawg(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ash', 'ashley'])
        self.dawg.reduce()
        self.assertFalse('salary' in self.dawg, "Word should not be in dawg")

示例#13

0

显示文件

文件： utils.py 项目： rohithkodali/lexpy

def _build_from_file(infile=None, _type='Trie'):
    if infile is None:
        raise ValueError("Please provide the file path")
    fsa = None
    if _type == 'Trie':
        fsa = Trie()
        fsa.add_all(infile)
    elif _type == 'DAWG':
        fsa = DAWG()
        fsa.add_all(infile)
    return fsa

示例#14

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

class TestDAWGNodeCount(unittest.TestCase):
    def test_dawg_node_count(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ash', 'ashley'])
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")
        self.assertEqual(6, len(self.dawg), "Number of nodes")

    def test_dawg_reduced_node_count(self):
        self.dawg = DAWG()
        self.dawg.add_all(["tap", "taps", "top", "tops"])
        self.dawg.reduce()
        self.assertEqual(6, len(self.dawg), "Number of nodes")

示例#15

0

显示文件

文件： query.py 项目： geospatial-jeff/geohash-playground

def build_lexpy_dawg(geohash_list):
    dawg = DAWG()
    dawg.add_all(geohash_list)
    dawg.reduce()
    return LexpyDawg(dawg)

示例#16

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

class TesDAWGWordInsert(unittest.TestCase):
    def test_word_add(self):
        self.dawg = DAWG()
        self.dawg.add('axe')
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('axe' in self.dawg, "Word should be in dawg")

    def test_word_add_all_list(self):
        self.dawg = DAWG()
        self.dawg.add_all(['axe', 'kick'])  #list
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('axe' in self.dawg, "Word should be in dawg")
        self.assertTrue('kick' in self.dawg, "Word should be in dawg")
        self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

    def test_word_add_all_set(self):
        self.dawg = DAWG()
        self.dawg.add_all({'axe', 'kick'})  #set
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('axe' in self.dawg, "Word should be in dawg")
        self.assertTrue('kick' in self.dawg, "Word should be in dawg")
        self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

    def test_word_add_all_tuple(self):
        self.dawg = DAWG()
        self.dawg.add_all(('axe', 'kick'))  #tuple
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('axe' in self.dawg, "Word should be in dawg")
        self.assertTrue('kick' in self.dawg, "Word should be in dawg")
        self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

    def test_word_add_all_with_number(self):
        self.dawg = DAWG()
        self.dawg.add_all(('axe', 'kick'))  #tuple with one integer.
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('axe' in self.dawg, "Word should be in dawg")
        self.assertTrue('kick' in self.dawg, "Word should be in dawg")
        self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal")

    def test_word_add_all_gen(self):
        def gen_words():
            a = ['ash', 'ashley', 'simpson']
            for word in a:
                yield word

        self.dawg = DAWG()
        self.dawg.add_all(gen_words())  # generator
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertTrue('simpson' in self.dawg, "Word should be in dawg")
        self.assertEqual(3, self.dawg.get_word_count(), "Word count not equal")

    def test_word_add_all_file_path(self):
        self.dawg = DAWG()
        self.dawg.add_all(small_dataset)  # From a file
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('AARGH' in self.dawg, "Word should be in dawg")
        self.assertTrue('AARRGHH' in self.dawg, "Word should be in dawg")
        self.assertTrue('AAS' in self.dawg, "Word should be in dawg")
        self.assertEqual(178691, self.dawg.get_word_count(),
                         "Word count not equal")

示例#17

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_word_add(self):
     self.dawg = DAWG()
     self.dawg.add('axe')
     self.assertIsInstance(self.dawg, DAWG,
                           "Object should be of type `lexpy.dawg.DAWG`")
     self.assertTrue('axe' in self.dawg, "Word should be in dawg")

示例#18

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_word_in_dawg(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ash', 'ashley'])
     self.dawg.reduce()
     self.assertTrue('ash' in self.dawg, "Word should be in dawg")

示例#19

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_word_not_int_dawg2(self):
     self.dawg = DAWG()
     self.dawg.add_all(['ash', 'ashley'])
     self.dawg.reduce()
     self.assertFalse('mash lolley' in self.dawg,
                      "Word should not be in dawg")

示例#20

0

显示文件

 def test_with_count(self):
     d = DAWG()
     d.add_all(['ash', 'ashes', 'ashes', 'ashley'])
     d.reduce()
     expected = [('ash', 1), ('ashes', 2), ('ashley', 1)]
     self.assertListEqual(expected, d.search('a*', with_count=True))

示例#21

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_word_count_zero(self):
     self.dawg = DAWG()
     self.dawg.add_all([])
     self.dawg.reduce()
     self.assertEqual(0, self.dawg.get_word_count(), "Word count not equal")

示例#22

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

class TestWildCardSearch(unittest.TestCase):
    def test_dawg_asterisk_search(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ash', 'ashley'])
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertEqual(sorted(self.dawg.search('a*')),
                         sorted(['ash', 'ashley']),
                         'The lists should be equal')

    def test_dawg_question_search(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ab', 'as', 'ash', 'ashley'])
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertEqual(sorted(self.dawg.search('a?')), sorted(['ab', 'as']),
                         'The lists should be equal')

    def test_dawg_wildcard_search(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ab', 'as', 'ash', 'ashley'])
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertEqual(sorted(self.dawg.search('*a******?')),
                         sorted(['ab', 'as', 'ash', 'ashley']),
                         'The lists should be equal')

    def test_dawg_wildcard_exception(self):
        self.dawg = DAWG()
        self.dawg.add_all(['ab', 'as', 'ash', 'ashley'])
        self.dawg.reduce()
        self.assertIsInstance(self.dawg, DAWG,
                              "Object should be of type `lexpy.dawg.DAWG`")
        self.assertTrue('ash' in self.dawg, "Word should be in dawg")
        self.assertTrue('ashley' in self.dawg, "Word should be in dawg")
        self.assertRaises(InvalidWildCardExpressionError, self.dawg.search,
                          '#$%^a')

示例#23

0

显示文件

 def test_without_count(self):
     d = DAWG()
     d.add_all(['ash', 'ashes', 'ashes', 'ashley'])
     d.reduce()
     expected = ['ash', 'ashes', 'ashley']
     self.assertListEqual(expected, d.search('a*'))

示例#24

0

显示文件

文件： test_dawg.py 项目： aosingh/lexpy

 def test_dawg_reduced_node_count(self):
     self.dawg = DAWG()
     self.dawg.add_all(["tap", "taps", "top", "tops"])
     self.dawg.reduce()
     self.assertEqual(6, len(self.dawg), "Number of nodes")