示例#1
0
    def test_basic(self):
        self.assertEqual(fasttrie.Trie().node_count(), 1)

        tr = fasttrie.Trie()
        del tr
        tr = fasttrie.Trie()
        tr[uni_escape("key")] = 55
        
        self.assertTrue(uni_escape("key") in tr)
        
        self.assertFalse(uni_escape("ke") in tr)
        self.assertFalse(5 in tr)
        self.assertEqual(len(tr), 1)
        
        self.assertRaises(_fasttrie.Error, tr.__getitem__, 5)
        
        ucs1_string = uni_escape("testing")
        ucs2_string = uni_escape("testing\N{ARABIC LETTER ALEF}")
        ucs4_string = uni_escape("testing\N{GOTHIC LETTER AHSA}")
        
        tr[ucs1_string] = 4 
        tr[ucs2_string] = 5 
        tr[ucs4_string] = 6
        
        self.assertEqual(tr[ucs1_string], 4)
        self.assertEqual(tr[ucs2_string], 5)
        self.assertEqual(tr[ucs4_string], 6)
        del tr[ucs2_string]
        self.assertRaises(KeyError, tr.__getitem__, ucs2_string)
        
        try:
            del tr[uni_escape("tes")]
            raise Exception("KeyError should be raised here.")
        except KeyError:
            pass

        try:
            tr[5] = 54
            raise Exception("Fasttrie.Error should be raised here.")
        except _fasttrie.Error:
            pass
        
        del tr
        tr = self._create_trie()
        self.assertEqual(tr.node_count(), 11)

        del tr
        test_strings = sorted(''.join(key_iter) for key_iter in itertools.product(string.ascii_letters, repeat=3))
        tr = fasttrie.Trie()
        for key in test_strings:
            tr[key] = key
        self.assertEqual(len(tr.items()), 52 ** 3)
        self.assertEqual(tr.node_count(), ((52 ** 3) + (52 ** 2) + (52 ** 1) + 1))  # 52 children for each node up the tree
        sorted_keys = sorted(tr.keys())
        self.assertEqual(sorted_keys, sorted(tr.values()))
        self.assertEqual(sorted_keys, test_strings)
示例#2
0
 def test_update(self):
     tr = fasttrie.Trie()
     tr.update([('a', 1), ('b', 2)])
     self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 2)])
     tr.update(b=3, c=4)
     self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 3), ('c', 4)])
     tr.update({'c': 5, 'd': 6})
     self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 3), ('c', 5), ('d', 6)])
     tr2 = fasttrie.Trie(e=7, a=0)
     tr.update(tr2)
     self.assertEqual(sorted(tr.items()), [('a', 0), ('b', 3), ('c', 5), ('d', 6), ('e', 7)])
示例#3
0
 def test_get(self):
     tr = fasttrie.Trie(a=1, b=None)
     self.assertEqual(tr.get('a'), 1)
     self.assertEqual(tr.get('b'), None)
     self.assertEqual(tr.get('c'), None)
     self.assertEqual(tr.get('d', 'foo'), 'foo')
     self.assertEqual(tr.get('a', 'foo'), 1)
示例#4
0
    def _test_corrections_with_dataset(self):
        tr = fasttrie.Trie()

        lines = _read_lines(path="tests/out_keys_8859_9", encoding="iso-8859-9")
        for line in lines:
            tr[line] = 2

        self.assertEqual(len(tr), 82489)
        self.assertEqual(tr.node_count(), 310764)
        self.assertEqual(tr[uni_escape("ramazan")], 2)
        self.assertEqual(len(tr.corrections(uni_escape("ra"), 3)), 5639)
        self.assertEqual(len(set(list(tr.iter_corrections(uni_escape("ra"), 3)))), 5639)
        self.assertEqual(set(list((tr.iter_corrections(uni_escape("abe"), 3)))), 
            tr.corrections(uni_escape("abe"), 3))

        # for a random trie element: check correction(x, depth) is generating correct
        # DL distance. distance shall be 0 < x < 4.
        import random
        MAX_EDIT_DISTANCE = 4
        items = list(tr.iter_suffixes())
        item = items.pop()
        for i in range(1, MAX_EDIT_DISTANCE):
            crs = tr.corrections(item, i)
            for e in crs:
                self.assertTrue(damerau_levenshtein(item, e) <= i)
示例#5
0
 def test_copy(self):
     key = "aqswdefr"  # String unlikely to be used elsewhere, for accurate refcount tracking
     tr = fasttrie.Trie(a=1, b=None)
     tr[key] = 0
     key_refcount = sys.getrefcount(key)
     refcounts = (sys.getrefcount(1), sys.getrefcount(None))
     copy = tr.copy()
     self.assertEqual(sorted(tr.items()), sorted(copy.items()))
     self.assertEqual(refcounts, (sys.getrefcount(1) - 1, sys.getrefcount(None) - 1))
     # Keys do not get stored as Python objects, and therefore shouldn't increase refcounts
     self.assertEqual(key_refcount, sys.getrefcount(key))
示例#6
0
 def test_clear(self):
     val = 'foo'
     init_ref_count = sys.getrefcount(val)
     tr = fasttrie.Trie(i=val, j=val, k=None)
     self.assertEqual(len(tr), 3)
     self.assertEqual(sys.getrefcount(val), init_ref_count + 2)
     self.assertEqual(sorted(tr.items()), [('i', 'foo'), ('j', 'foo'), ('k', None)])
     self.assertEqual(sys.getrefcount(val), init_ref_count + 2)
     tr.clear()
     self.assertEqual(len(tr), 0)
     self.assertEqual(sys.getrefcount(val), init_ref_count)
     self.assertEqual(sorted(tr.items()), [])
示例#7
0
    def _create_trie(self):
        u = str
        tr = fasttrie.Trie()
        tr[uni_escape("A")] = 1
        tr[uni_escape("to")] = 1
        tr[uni_escape("tea")] = 1
        tr[uni_escape("ted")] = 1
        tr[uni_escape("ten")] = 1
        tr[uni_escape("i")] = 1
        tr[uni_escape("in")] = 1
        tr[uni_escape("inn")] = 1

        return tr
示例#8
0
    def _create_trie2(self):
        """
        A complex trie trie including different char sizes together.
        Note that Python2.x uses UTF16 internally which U+10001 starts mapping
        chars to 2 bytes.
        """
        tr = fasttrie.Trie()
        # utf16,utf32: 0x0627
        tr[uni_escape("\N{ARABIC LETTER ALEF}")] = 1 
        tr[uni_escape("\N{ARABIC LETTER ALEF}\N{ARABIC LETTER ALEF}")] = 1

        # utf16: 0xD800 0xDF30, utf32: 0x00010330
        tr[uni_escape("\N{ARABIC LETTER ALEF}\N{GOTHIC LETTER AHSA}")] = 1 
        tr[uni_escape("\N{ARABIC LETTER ALEF}\N{GOTHIC LETTER AHSA}A")] = 1

        # utf16: 0xD800 0xDC01, utf32: 0x00010001
        tr[uni_escape("\N{ARABIC LETTER ALEF}\N{LINEAR B SYLLABLE B038 E}")] = 1
        tr[uni_escape("\N{ARABIC LETTER ALEF}ABC\N{GOTHIC LETTER AHSA}")] = 1

        return tr
示例#9
0
    def test_refcount(self):

        def _GRC(obj):
            return sys.getrefcount(obj)-3

        class A:
            _a_destructor_called = False
            def __del__(self):
                A._a_destructor_called = True
                
        tr = fasttrie.Trie()
        a = A()
        tr[uni_escape("mo")] = a
        self.assertEqual(_GRC(tr[uni_escape("mo")]), 2)
        del a
        self.assertEqual(_GRC(tr[uni_escape("mo")]), 1)
        self.assertTrue(isinstance(tr[uni_escape("mo")], A))
        ae = tr[uni_escape("mo")]
        del ae
        self.assertEqual(_GRC(tr[uni_escape("mo")]), 1)
        del tr[uni_escape("mo")]
        self.assertTrue(A._a_destructor_called)
        
        self.assertEqual(_GRC(tr), 1)
示例#10
0
    def _test_suffixes(self):

        # del suffixes after referencing
        tr = self._create_trie()
        suffixes = tr.iter_suffixes(uni_escape("in"))
        del tr[uni_escape("in")]
        del tr[uni_escape("inn")]
        self.assertRaises(RuntimeError, list, suffixes)
        self.assertRaises(RuntimeError, list, suffixes)

        tr = self._create_trie()
        suffixes = tr.iter_suffixes(uni_escape("i"))
        del tr[uni_escape("in")]
        self.assertRaises(RuntimeError, list, suffixes)

        # trie self_iter and suffixes should be same
        suffixes = tr.iter_suffixes()
        self.assertEqual(len(list(tr)), len(list(suffixes)))

        # break iteration in the middle and test if it resets again
        for x in suffixes:
            if x == uni_escape("in"):
                break
        self.assertEqual(len(list(tr)), len(list(suffixes)))
        
        # 0 len iteration
        tr = fasttrie.Trie()
        for x in tr:
            pass
        
        # non-existent suffix iter
        tr = self._create_trie()
        self.assertEqual(len(list(tr.iter_suffixes(uni_escape("INVALID")))), 0)
        self.assertEqual(len(tr.suffixes()), len(list(tr.iter_suffixes())))
        self.assertEqual(len(tr.suffixes()), len(list(tr.iter_suffixes())), 
            len(tr))
示例#11
0
 def test_init(self):
     tr1 = fasttrie.Trie([('a', 1), ('b', 2)])
     tr2 = fasttrie.Trie(a=1, b=2)
     self.assertEqual(sorted(tr1.items()), sorted(tr2.items()))
     del tr1
     del tr2