def test_basic(self): self.assertEqual(fasttrie.Trie().node_count(), 1) tr = fasttrie.Trie() del tr tr = fasttrie.Trie() tr[uni_escape("key")] = 55 self.assertTrue(uni_escape("key") in tr) self.assertFalse(uni_escape("ke") in tr) self.assertFalse(5 in tr) self.assertEqual(len(tr), 1) self.assertRaises(_fasttrie.Error, tr.__getitem__, 5) ucs1_string = uni_escape("testing") ucs2_string = uni_escape("testing\N{ARABIC LETTER ALEF}") ucs4_string = uni_escape("testing\N{GOTHIC LETTER AHSA}") tr[ucs1_string] = 4 tr[ucs2_string] = 5 tr[ucs4_string] = 6 self.assertEqual(tr[ucs1_string], 4) self.assertEqual(tr[ucs2_string], 5) self.assertEqual(tr[ucs4_string], 6) del tr[ucs2_string] self.assertRaises(KeyError, tr.__getitem__, ucs2_string) try: del tr[uni_escape("tes")] raise Exception("KeyError should be raised here.") except KeyError: pass try: tr[5] = 54 raise Exception("Fasttrie.Error should be raised here.") except _fasttrie.Error: pass del tr tr = self._create_trie() self.assertEqual(tr.node_count(), 11) del tr test_strings = sorted(''.join(key_iter) for key_iter in itertools.product(string.ascii_letters, repeat=3)) tr = fasttrie.Trie() for key in test_strings: tr[key] = key self.assertEqual(len(tr.items()), 52 ** 3) self.assertEqual(tr.node_count(), ((52 ** 3) + (52 ** 2) + (52 ** 1) + 1)) # 52 children for each node up the tree sorted_keys = sorted(tr.keys()) self.assertEqual(sorted_keys, sorted(tr.values())) self.assertEqual(sorted_keys, test_strings)
def test_update(self): tr = fasttrie.Trie() tr.update([('a', 1), ('b', 2)]) self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 2)]) tr.update(b=3, c=4) self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 3), ('c', 4)]) tr.update({'c': 5, 'd': 6}) self.assertEqual(sorted(tr.items()), [('a', 1), ('b', 3), ('c', 5), ('d', 6)]) tr2 = fasttrie.Trie(e=7, a=0) tr.update(tr2) self.assertEqual(sorted(tr.items()), [('a', 0), ('b', 3), ('c', 5), ('d', 6), ('e', 7)])
def test_get(self): tr = fasttrie.Trie(a=1, b=None) self.assertEqual(tr.get('a'), 1) self.assertEqual(tr.get('b'), None) self.assertEqual(tr.get('c'), None) self.assertEqual(tr.get('d', 'foo'), 'foo') self.assertEqual(tr.get('a', 'foo'), 1)
def _test_corrections_with_dataset(self): tr = fasttrie.Trie() lines = _read_lines(path="tests/out_keys_8859_9", encoding="iso-8859-9") for line in lines: tr[line] = 2 self.assertEqual(len(tr), 82489) self.assertEqual(tr.node_count(), 310764) self.assertEqual(tr[uni_escape("ramazan")], 2) self.assertEqual(len(tr.corrections(uni_escape("ra"), 3)), 5639) self.assertEqual(len(set(list(tr.iter_corrections(uni_escape("ra"), 3)))), 5639) self.assertEqual(set(list((tr.iter_corrections(uni_escape("abe"), 3)))), tr.corrections(uni_escape("abe"), 3)) # for a random trie element: check correction(x, depth) is generating correct # DL distance. distance shall be 0 < x < 4. import random MAX_EDIT_DISTANCE = 4 items = list(tr.iter_suffixes()) item = items.pop() for i in range(1, MAX_EDIT_DISTANCE): crs = tr.corrections(item, i) for e in crs: self.assertTrue(damerau_levenshtein(item, e) <= i)
def test_copy(self): key = "aqswdefr" # String unlikely to be used elsewhere, for accurate refcount tracking tr = fasttrie.Trie(a=1, b=None) tr[key] = 0 key_refcount = sys.getrefcount(key) refcounts = (sys.getrefcount(1), sys.getrefcount(None)) copy = tr.copy() self.assertEqual(sorted(tr.items()), sorted(copy.items())) self.assertEqual(refcounts, (sys.getrefcount(1) - 1, sys.getrefcount(None) - 1)) # Keys do not get stored as Python objects, and therefore shouldn't increase refcounts self.assertEqual(key_refcount, sys.getrefcount(key))
def test_clear(self): val = 'foo' init_ref_count = sys.getrefcount(val) tr = fasttrie.Trie(i=val, j=val, k=None) self.assertEqual(len(tr), 3) self.assertEqual(sys.getrefcount(val), init_ref_count + 2) self.assertEqual(sorted(tr.items()), [('i', 'foo'), ('j', 'foo'), ('k', None)]) self.assertEqual(sys.getrefcount(val), init_ref_count + 2) tr.clear() self.assertEqual(len(tr), 0) self.assertEqual(sys.getrefcount(val), init_ref_count) self.assertEqual(sorted(tr.items()), [])
def _create_trie(self): u = str tr = fasttrie.Trie() tr[uni_escape("A")] = 1 tr[uni_escape("to")] = 1 tr[uni_escape("tea")] = 1 tr[uni_escape("ted")] = 1 tr[uni_escape("ten")] = 1 tr[uni_escape("i")] = 1 tr[uni_escape("in")] = 1 tr[uni_escape("inn")] = 1 return tr
def _create_trie2(self): """ A complex trie trie including different char sizes together. Note that Python2.x uses UTF16 internally which U+10001 starts mapping chars to 2 bytes. """ tr = fasttrie.Trie() # utf16,utf32: 0x0627 tr[uni_escape("\N{ARABIC LETTER ALEF}")] = 1 tr[uni_escape("\N{ARABIC LETTER ALEF}\N{ARABIC LETTER ALEF}")] = 1 # utf16: 0xD800 0xDF30, utf32: 0x00010330 tr[uni_escape("\N{ARABIC LETTER ALEF}\N{GOTHIC LETTER AHSA}")] = 1 tr[uni_escape("\N{ARABIC LETTER ALEF}\N{GOTHIC LETTER AHSA}A")] = 1 # utf16: 0xD800 0xDC01, utf32: 0x00010001 tr[uni_escape("\N{ARABIC LETTER ALEF}\N{LINEAR B SYLLABLE B038 E}")] = 1 tr[uni_escape("\N{ARABIC LETTER ALEF}ABC\N{GOTHIC LETTER AHSA}")] = 1 return tr
def test_refcount(self): def _GRC(obj): return sys.getrefcount(obj)-3 class A: _a_destructor_called = False def __del__(self): A._a_destructor_called = True tr = fasttrie.Trie() a = A() tr[uni_escape("mo")] = a self.assertEqual(_GRC(tr[uni_escape("mo")]), 2) del a self.assertEqual(_GRC(tr[uni_escape("mo")]), 1) self.assertTrue(isinstance(tr[uni_escape("mo")], A)) ae = tr[uni_escape("mo")] del ae self.assertEqual(_GRC(tr[uni_escape("mo")]), 1) del tr[uni_escape("mo")] self.assertTrue(A._a_destructor_called) self.assertEqual(_GRC(tr), 1)
def _test_suffixes(self): # del suffixes after referencing tr = self._create_trie() suffixes = tr.iter_suffixes(uni_escape("in")) del tr[uni_escape("in")] del tr[uni_escape("inn")] self.assertRaises(RuntimeError, list, suffixes) self.assertRaises(RuntimeError, list, suffixes) tr = self._create_trie() suffixes = tr.iter_suffixes(uni_escape("i")) del tr[uni_escape("in")] self.assertRaises(RuntimeError, list, suffixes) # trie self_iter and suffixes should be same suffixes = tr.iter_suffixes() self.assertEqual(len(list(tr)), len(list(suffixes))) # break iteration in the middle and test if it resets again for x in suffixes: if x == uni_escape("in"): break self.assertEqual(len(list(tr)), len(list(suffixes))) # 0 len iteration tr = fasttrie.Trie() for x in tr: pass # non-existent suffix iter tr = self._create_trie() self.assertEqual(len(list(tr.iter_suffixes(uni_escape("INVALID")))), 0) self.assertEqual(len(tr.suffixes()), len(list(tr.iter_suffixes()))) self.assertEqual(len(tr.suffixes()), len(list(tr.iter_suffixes())), len(tr))
def test_init(self): tr1 = fasttrie.Trie([('a', 1), ('b', 2)]) tr2 = fasttrie.Trie(a=1, b=2) self.assertEqual(sorted(tr1.items()), sorted(tr2.items())) del tr1 del tr2