def test_find_nosep(self): t = TST(sep=None) self.paths(t) self.assertEquals(dict(t), { 'binary/WEB-INF/tiles/footer/footer.jsp' : 1, 'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2, 'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3, 'binary/addAccount.jsp' : 4, 'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5, 'source/dist/addClient.jsp' : 6, }) self.assertEquals(dict(t.find('*')), { 'binary/WEB-INF/tiles/footer/footer.jsp' : 1, 'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2, 'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3, 'binary/addAccount.jsp' : 4, 'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5, 'source/dist/addClient.jsp' : 6, }) self.assertEquals(dict(t.find('**')), { 'binary/WEB-INF/tiles/footer/footer.jsp' : 1, 'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2, 'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3, 'binary/addAccount.jsp' : 4, 'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5, 'source/dist/addClient.jsp' : 6, })
def test_find_r(self): t = TST() self.paths(t) self.assertEquals(dict(t.find('*/*.jsp')), { 'binary/addAccount.jsp' : 4 })
def __init__(self, root, host, port): with open('{}/VERSION'.format(root)) as v: self._version = v.readline().rstrip() self._root = root self._host = host self._port = port self._tst = TST() self._start_time = time.time() self._last_time = self._start_time self._last_time_lock = threading.Lock() self._kill_timer = None
def test_iteritems(self): s = set() l1 = list() t = TST() for x in xrange(100): k = base64.b64encode(os.urandom(x%10 + 1)).rstrip('=') if k in s: continue l1.append((k, x)) t[k] = x s.add(k) l1.sort() self.assertEquals(l1, list(t.iteritems()))
def test_find_simple(self): s = set() l1 = list() t = TST() for x in xrange(100): k = base64.b64encode(os.urandom(x%10 + 1)).rstrip('=') if k in s: continue l1.append(k) t[k] = x s.add(k) l1.sort() for x in l1: assert bool(tuple(t.find(x)))
def test_delete(self): tst = TST() tst.put("a", "A") self.assertEquals(1, len(tst)) self.assertEquals("A", tst.get("a")) tst.delete("b") self.assertEquals(1, len(tst)) tst.delete("a") self.assertEquals(1, len(tst)) self.assertIsNone(tst.get("a"))
def test_remove_(self): t = TST() self.insert(t) del t['aaa'] self.assertEquals(dict(t), {'a':1,'aa':2}) self.assertEquals(dict(t.find('*')), {'a':1,'aa':2}) del t['aa'] self.assertEquals(dict(t), {'a':1,}) self.assertEquals(dict(t.find('*')), {'a':1,}) del t['a'] self.assertEquals(dict(t), {}) self.assertEquals(dict(t.find('*')), {}) self.insert(t) self.assertEquals(dict(t), {'a':1,'aa':2,'aaa':3}) self.assertEquals(dict(t.find('*')), {'a':1,'aa':2,'aaa':3})
def AddDictionaries(self, dicts): tsts = dict() with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: for fdict in dicts: futures = list() words = list() letter = "" for word in [word.rstrip('\n') for word in open(fdict)]: # this assumes the dictionary is in lower-case order if word[0].lower() == letter.lower(): words.append(word) else: tst = tsts.get(letter, TST()) tsts[letter] = tst futures.append( executor.submit(self.FillTST, tst, words)) letter = word[0].lower() words = list(word) for future in concurrent.futures.as_completed(futures): try: tst = future.result() except Exception as e: print e for tst in tsts.values(): # TODO Add posibility to merge two tsts if they are overlapping self._tst.Take(tst) print 'Added {} words'.format(self._tst.Size())
def __init__(self, *args, **kwargs): ''' @params *args, **kwargs = passed to self.update see documentation for update for more info (basically a copy constructor) use like dict(). eg: t = TST({'ab':12, 'cd':34}, sep=None) t = TST(((k, v) for k,v in {'ab':12, 'cd':34}.iteritems()), sep=None) ''' self.tst = TST() self.update(*args, **kwargs)
def test_q(self): t = TST() t['a'] =1 t['b'] =1 t['c'] =1 self.assertTrue(set(dict(t.find('?')).keys()), set(t.keys())) t = TST() t['/a'] = 1 t['/b'] = 1 t['/c'] = 1 self.assertEquals(set(dict(t.find('??')).keys()), set()) self.assertEquals(set(dict(t.find('/?')).keys()), set(t.keys())) t = TST() t['dog'] = 1 t['dig'] = 1 t['dug'] = 1 self.assertEquals(set(dict(t.find('d?g')).keys()),set(t.keys()))
def test_match(self): t = TST(sep=None) t['what'] = 1 t['where'] = 1 t['when'] = 1 t['widget'] = 1 t['wizard'] = 1 t['wow'] = 1 t['wowo'] = 1 self.assertEquals(dict(t.find('w*e*')), {'where':1,'when':1,'widget':1}) self.assertEquals(dict(t.find('*e')), {'where':1}) self.assertEquals(dict(t.find('*et')), {'widget':1}) self.assertEquals(dict(t.find('wo*')), {'wow':1, 'wowo':1}) self.assertEquals(dict(t.find('*a*')), {'what':1, 'wizard':1}) self.assertEquals(dict(t.find('*za*')), {'wizard':1}) for k in t.keys(): self.assertEquals(dict(t.find(k)), {k:1}) self.assertEquals(dict(t.find('*dg*')), {'widget':1}) self.assertEquals(dict(t.find('*he*')), {'when':1, 'where':1})
def test_q_nosep(self): t = TST(sep=None) t['a'] =1 t['b'] =1 t['c'] =1 self.assertTrue(set(dict(t.find('?')).keys()), set(t.keys())) t = TST(sep=None) t['/a'] = 1 t['/b'] = 1 t['/c'] = 1 self.assertEquals(set(dict(t.find('??')).keys()), set(t.keys())) self.assertEquals(set(dict(t.find('/?')).keys()), set(t.keys()))
node = stack[0] stack = stack[1:] seen[node] = True if node == end: return path(node, parents, []) else: siblings = filter(lambda s: not seen.has_key(s) and s not in stack, tst.near_search(node, 1)) for n in siblings: parents[n] = node stack += siblings return None if __name__ == "__main__": import time tst = TST() for word in file('/usr/share/dict/american-english'): w = word.strip() if w: tst.insert(word.strip()) # simple test input = open('test/simple.in') start = input.readline().strip() end = input.readline().strip() t1 = time.time() seq = bfs(tst,start,end) print "time: %.2f" % (time.time() - t1) for word in seq: print word # another simple test input.close()
class CamelService(): def __init__(self, root, host, port): with open('{}/VERSION'.format(root)) as v: self._version = v.readline().rstrip() self._root = root self._host = host self._port = port self._tst = TST() self._start_time = time.time() self._last_time = self._start_time self._last_time_lock = threading.Lock() self._kill_timer = None def Status(self): status = dict() status['server.version'] = self._version status['server.stdout'] = sys.stdout.name status['server.stderr'] = sys.stderr.name status['server.address'] = '{}:{}'.format(self._host, self._port) status['server.pid'] = os.getpid() status['server.words'] = self._tst.Size() status['server.root'] = self._root return status def Touch(self): with self._last_time_lock: self._last_time = time.time() def TimerUpdate(self, delay=SERVICE_KILL_TIME): if self._kill_timer is not None: self._kill_timer.cancel() self._kill_timer = threading.Timer(delay, self._CheckStatus) self._kill_timer.start() def TimerKill(self): if self._kill_timer: self._kill_timer.cancel() self._kill_timer = None def _CheckStatus(self): with self._last_time_lock: now = time.time() diff = now - self._last_time if diff < SERVICE_KILL_TIME: self.TimerUpdate(SERVICE_KILL_TIME - (diff)) return print 'No activity for the last {} seconds'.format( SERVICE_KILL_TIME) self.Stop() def Start(self): print 'CamelService Start' self._server = ThreadedHTTPServer((self._host, self._port), CamelRequestHandler) self._server_thread = threading.Thread( target=self._server.serve_forever) # self._server_thread.daemon = True self._server_thread.start() self.TimerUpdate() def Stop(self): print 'CamelService Stop' self.TimerKill() self._server.shutdown() self._server_thread.join() def ToCamelCase(self, string): result = list() groups = list() self._BreakIntoWords(string, groups, list(), "") for group in groups: current = "" for word in group: current += word.title() result.append(current) return result def AddDictionaries(self, dicts): tsts = dict() with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: for fdict in dicts: futures = list() words = list() letter = "" for word in [word.rstrip('\n') for word in open(fdict)]: # this assumes the dictionary is in lower-case order if word[0].lower() == letter.lower(): words.append(word) else: tst = tsts.get(letter, TST()) tsts[letter] = tst futures.append( executor.submit(self.FillTST, tst, words)) letter = word[0].lower() words = list(word) for future in concurrent.futures.as_completed(futures): try: tst = future.result() except Exception as e: print e for tst in tsts.values(): # TODO Add posibility to merge two tsts if they are overlapping self._tst.Take(tst) print 'Added {} words'.format(self._tst.Size()) def FillTST(self, tst, words): # This prevents worst case time for TST random.shuffle(words) for word in words: if len(word) > 1: tst.Put(word, word) return tst def _BreakIntoWords(self, string, groups, current, bad): assert len(string) != 0, "Must not be empty" # Use longest matches first prefixes = self._tst.AllPrefixesOf(string)[::-1] if len(prefixes) == 0: bad += string[0:1] string = string[1:] if len(string) == 0: current.append(bad) groups.append(current) else: self._BreakIntoWords(string, groups, current, bad) else: for prefix in prefixes: # Add non matched part of string as unknown word if len(bad) != 0: current.append(bad) bad = "" clone = current[:] clone.append(prefix) rest = string[len(prefix):] if len(rest) == 0: groups.append(clone) else: self._BreakIntoWords(rest, groups, clone, bad)
from tst import TST tree = TST() tree.put("apple", 100) print(tree.get("apple"))
#TST - A Ternary Search Trie #Author: Tim Henderson #Contact: [email protected] or [email protected] #This File: Dotty Test #Copyright (c) 2010, Tim Henderson #All rights reserved. from tst import TST tst = TST() tst['abc'] = 1 tst['abcde'] = 2 tst['abe'] = 3 tst['abefg'] = 4 tst['abce'] = 5 tst['aba'] = 6 tst['boy'] = 7 tst['bad'] = 8 tst['buster'] = 9 tst['cactus'] = 10 print tst.dotty()
seen[node] = True if node == end: return path(node, parents, []) else: siblings = filter(lambda s: not seen.has_key(s) and s not in stack, tst.near_search(node, 1)) for n in siblings: parents[n] = node stack += siblings return None if __name__ == "__main__": import time tst = TST() for word in file('/usr/share/dict/american-english'): w = word.strip() if w: tst.insert(word.strip()) # simple test input = open('test/simple.in') start = input.readline().strip() end = input.readline().strip() t1 = time.time() seq = bfs(tst, start, end) print "time: %.2f" % (time.time() - t1) for word in seq: print word # another simple test input.close()
def test_longest_prefix(self): tst = TST() tst.put("a", "A") tst.put("anterior", "ANTERIOR") tst.put("ant", "ANT") tst.put("aunt", "AUNT") self.assertEquals(tst.longestPrefixOf("auntie"), "aunt") self.assertEquals(tst.longestPrefixOf("ant"), "ant") self.assertEquals(tst.longestPrefixOf(""), "") self.assertEquals(tst.longestPrefixOf("b"), "")
class SuffixTree(MutableMapping): def __init__(self, *args, **kwargs): ''' @params *args, **kwargs = passed to self.update see documentation for update for more info (basically a copy constructor) use like dict(). eg: t = TST({'ab':12, 'cd':34}, sep=None) t = TST(((k, v) for k,v in {'ab':12, 'cd':34}.iteritems()), sep=None) ''' self.tst = TST() self.update(*args, **kwargs) def find(self, substr): if not substr: for k,v in self.iteritems(): yield k,v root = None next = (self.tst.heads[ord(substr[0])], 1) while next: n, d = next if n == None: return if n.internal(): if d == len(substr): root = n break; ch = substr[d] if ch < n.ch: next = (n.l, d); continue elif ch == n.ch: next = (n.m, d+1); continue elif ch > n.ch: next = (n.r, d); continue elif n.key[:len(substr)] == substr: root = n break; return # now expand root q = deque() found = set() q.appendleft(root) while q: n = q.pop() if not n: continue if n.accepting: found |= n.val q.append(n.r) q.append(n.m) q.append(n.l) for k in found: yield k[1:], self.tst.get(k) def keys(self): return [k for k, v in self.iteritems()] def iteritems(self): q = deque() h = self.tst.heads[ord(START)] if h == None: return q.appendleft(h) while q: n = q.pop() if not n: continue if n.accepting: yield n.key[1:-1], n.val q.append(n.r) q.append(n.m) q.append(n.l) def __len__(self): return len(self.iteritems()) def __setitem__(self, key, value): fullkey = START + key self.tst[fullkey] = value for i in xrange(0, len(key)): curkey = key[i:] keys = self.tst.get(curkey, set()) keys.add(fullkey) self.tst[curkey] = keys def __getitem__(self, key): fullkey = START + key return self.tst[fullkey] def __delitem__(self, key): raise RuntimeError, 'Removing from SuffixTree is not allowed' def __iter__(self): for k,v in self.iteritems(): yield k def __contains__(self, pattern): try: x = self[pattern] except KeyError: #try: return bool(tuple(self.find(pattern))) #except KeyError: return False return False return True def __str__(self): return str(dict(self)) def __repr__(self): return str(self)
def test_insert_(self): t = TST() self.insert(t) self.assertEquals(dict(t), {'a':1,'aa':2,'aaa':3}) self.assertEquals(dict(t.find('*')), {'a':1,'aa':2,'aaa':3})
def test_prefix_match(self): tst = TST() tst.put("a", "A") tst.put("anterior", "ANTERIOR") tst.put("antidisassembly", "ANTIDISASSEMBLY") tst.put("ant", "ANT") tst.put("aunt", "AUNT") r = tst.prefixMatch("ant"); self.assertEquals(r.qsize(), 3) self.assertEquals(r.get(), "ant") self.assertEquals(r.get(), "anterior") self.assertEquals(r.get(), "antidisassembly") r = tst.prefixMatch("bob"); self.assertEquals(r.qsize(), 0) r = tst.prefixMatch("aunt"); self.assertEquals(r.qsize(), 1) r = tst.prefixMatch("auntie") self.assertEquals(r.qsize(), 0)
def test_put_get(self): tst = TST() self.assertEquals(0, len(tst)) self.assertIsNone(tst.get("a")) tst.put("a", "a") self.assertEquals(1, len(tst)) self.assertEquals("a", tst.get("a")) tst.put("b", "b") self.assertEquals(2, len(tst)) self.assertEquals("a", tst.get("a")) self.assertEquals("b", tst.get("b")) tst.put("a", "new_a") self.assertEquals(2, len(tst)) self.assertEquals("new_a", tst.get("a")) self.assertTrue(tst.contains("b")) self.assertFalse(tst.contains("ab"))