def test_save(self): trieobj = trie.trie() trieobj["foo"] = 1 k = trieobj.keys() self.assertEqual(k, ["foo"]) v = trieobj.values() self.assertEqual(v, [1]) self.assertEqual(trieobj.get("bar", 99), 99) trieobj["hello"] = "55a" self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foa", 0), []) self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)]) x = sorted(trieobj.get_approximate("foa", 2)) self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]) # foo foo- foo- # foa f-oa fo-a # mismatch a->o # insertion after f, deletion of o # insertion after o, deletion of o x = trieobj.get_approximate("foo", 4) y = {} for z in x: y[z] = y.get(z, 0) + 1 x = sorted(y.items()) self.assertEqual(x, [(("foo", 1, 0), 1), (("hello", "55a", 4), 6)]) h = StringIO() trie.save(h, trieobj) h.seek(0) trieobj = trie.load(h) k = trieobj.keys() self.assertTrue("foo" in k) self.assertTrue("hello" in k) self.assertEqual(repr(trieobj["foo"]), "1") self.assertEqual(repr(trieobj["hello"]), "'55a'")
def test_large_save_load(self): """Generate random key/val pairs in three length categories. 100 items in each category. Insert them into a trie and into a reference dict. Write the trie to a temp file and read it back, verify that trie entries match the reference dict. """ cmp_dict = {} trieobj = trie.trie() self.assertEqual(trieobj.get("foobar"), None) for max_str_len in [100, 1000, 10000]: cmp_dict = {} for i in range(1000): key = ''.join([ random.choice(ascii_lowercase) for _ in range(max_str_len) ]) val = ''.join([ random.choice(ascii_lowercase) for _ in range(max_str_len) ]) trieobj[key] = val cmp_dict[key] = val for key in cmp_dict: self.assertEqual(trieobj[key], cmp_dict[key]) with tempfile.TemporaryFile(mode='w+b') as f: trie.save(f, trieobj) f.seek(0) trieobj = trie.load(f) for key in cmp_dict: self.assertEqual(trieobj[key], cmp_dict[key])
def test_save(self): trieobj = trie.trie() trieobj["foo"] = 1 k = list(trieobj.keys()) self.assertEqual(k, ["foo"]) v = list(trieobj.values()) self.assertEqual(v, [1]) self.assertEqual(trieobj.get("bar", 99), 99) trieobj["hello"] = '55a' self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foa", 0), []) self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)]) x = sorted(trieobj.get_approximate("foa", 2)) self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]) # foo foo- foo- # foa f-oa fo-a # mismatch a->o # insertion after f, deletion of o # insertion after o, deletion of o x = trieobj.get_approximate("foo", 4) y = {} for z in x: y[z] = y.get(z, 0) + 1 x = sorted(y.items()) self.assertEqual(x, [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]) h = BytesIO() trie.save(h, trieobj) h.seek(0) trieobj = trie.load(h) k = list(trieobj.keys()) self.assertTrue("foo" in k) self.assertTrue("hello" in k) self.assertEqual(repr(trieobj["foo"]), '1') self.assertEqual(repr(trieobj["hello"]), "'55a'")
def test_large_save_load(self): """Generate random key/val pairs in three length categories. 100 items in each category. Insert them into a trie and into a reference dict. Write the trie to a temp file and read it back, verify that trie entries match the reference dict. """ cmp_dict = {} trieobj = trie.trie() self.assertEqual(trieobj.get("foobar"), None) for max_str_len in [100, 1000, 10000]: cmp_dict = {} for i in range(1000): key = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)]) val = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)]) trieobj[key] = val cmp_dict[key] = val for key in cmp_dict: self.assertEqual(trieobj[key], cmp_dict[key]) with tempfile.TemporaryFile(mode='w+b') as f: trie.save(f, trieobj) f.seek(0) trieobj = trie.load(f) for key in cmp_dict: self.assertEqual(trieobj[key], cmp_dict[key])
def loadIndexFromDisk (): fp = open (indexFileName,'rb') index = trie.load (fp) #index = pickle.load(fp) fp.close() fp = open(docDictionary, 'rb') docDict = pickle.load (fp) fp.close() return index, docDict[0], docDict[1]
# mismatch a->o # insertion after f, deletion of o # insertion after o, deletion of o x = trieobj2.get_approximate("foo", 4) y = {} for z in x: y[z] = y.get(z, 0) + 1 x = y.items() x.sort() print x # [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)] h = StringIO.StringIO() trie.save(h, trieobj2) h.seek(0) trieobj3 = trie.load(h) k = trieobj3.keys() k.sort() for m in k: # foo 1 print m, repr(trieobj3[m]) # hello '55a' # Found bug, doesn't handle insertions and deletions at end properly. trieobj = trie.trie() trieobj["hello"] = 1 print trieobj.get_approximate('he', 2) # [] print trieobj.get_approximate('he', 3) # [('hello', 1, 3)] print trieobj.get_approximate('hello me!', 3) # [] print trieobj.get_approximate('hello me!', 4) # [('hello', 1, 4)] print trieobj.get_approximate('hello me!', 5) # [('hello', 1, 4)]
def loadFromFile (self, indexFile): # TODO assert that trie should be empty right now, # otherwise you'll loose the index fp = open(indexFile, 'r') self.index = trie.load (fp) self.modified = False