Пример #1
0
 def test_find_nosep(self):
     t = TST(sep=None)
     self.paths(t)
     self.assertEquals(dict(t),
         {
             'binary/WEB-INF/tiles/footer/footer.jsp' : 1,
             'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2,
             'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3,
             'binary/addAccount.jsp' : 4,
             'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5,
             'source/dist/addClient.jsp' : 6,
         })
     self.assertEquals(dict(t.find('*')),
         {
             'binary/WEB-INF/tiles/footer/footer.jsp' : 1,
             'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2,
             'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3,
             'binary/addAccount.jsp' : 4,
             'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5,
             'source/dist/addClient.jsp' : 6,
         })
     self.assertEquals(dict(t.find('**')),
         {
             'binary/WEB-INF/tiles/footer/footer.jsp' : 1,
             'binary/WEB-INF/tiles/form/addAccountForm.jsp' : 2,
             'binary/WEB-INF/tiles/menu/menu_empty.jsp' : 3,
             'binary/addAccount.jsp' : 4,
             'source/dist/WEB-INF/tiles/menu/menu_empty.jsp' : 5,
             'source/dist/addClient.jsp' : 6,
         })
Пример #2
0
 def test_find_r(self):
     t = TST()
     self.paths(t)
     self.assertEquals(dict(t.find('*/*.jsp')),
         {
             'binary/addAccount.jsp' : 4
         })
Пример #3
0
 def __init__(self, root, host, port):
     with open('{}/VERSION'.format(root)) as v:
         self._version = v.readline().rstrip()
     self._root = root
     self._host = host
     self._port = port
     self._tst = TST()
     self._start_time = time.time()
     self._last_time = self._start_time
     self._last_time_lock = threading.Lock()
     self._kill_timer = None
Пример #4
0
 def test_iteritems(self):
     s = set()
     l1 = list()
     t = TST()
     for x in xrange(100):
         k = base64.b64encode(os.urandom(x%10 + 1)).rstrip('=')
         if k in s: continue
         l1.append((k, x))
         t[k] = x
         s.add(k)
     l1.sort()
     self.assertEquals(l1, list(t.iteritems()))
Пример #5
0
 def test_find_simple(self):
     s = set()
     l1 = list()
     t = TST()
     for x in xrange(100):
         k = base64.b64encode(os.urandom(x%10 + 1)).rstrip('=')
         if k in s: continue
         l1.append(k)
         t[k] = x
         s.add(k)
     l1.sort()
     for x in l1:
         assert bool(tuple(t.find(x)))
Пример #6
0
 def test_delete(self):
     tst = TST()
     tst.put("a", "A")
     self.assertEquals(1, len(tst))
     self.assertEquals("A", tst.get("a"))
     tst.delete("b")
     self.assertEquals(1, len(tst))
     tst.delete("a")
     self.assertEquals(1, len(tst))
     self.assertIsNone(tst.get("a"))
Пример #7
0
 def test_remove_(self):
     t = TST()
     self.insert(t)
     del t['aaa']
     self.assertEquals(dict(t), {'a':1,'aa':2})
     self.assertEquals(dict(t.find('*')), {'a':1,'aa':2})
     del t['aa']
     self.assertEquals(dict(t), {'a':1,})
     self.assertEquals(dict(t.find('*')), {'a':1,})
     del t['a']
     self.assertEquals(dict(t), {})
     self.assertEquals(dict(t.find('*')), {})
     self.insert(t)
     self.assertEquals(dict(t), {'a':1,'aa':2,'aaa':3})
     self.assertEquals(dict(t.find('*')), {'a':1,'aa':2,'aaa':3})
Пример #8
0
    def AddDictionaries(self, dicts):
        tsts = dict()

        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            for fdict in dicts:
                futures = list()
                words = list()
                letter = ""
                for word in [word.rstrip('\n') for word in open(fdict)]:
                    # this assumes the dictionary is in lower-case order
                    if word[0].lower() == letter.lower():
                        words.append(word)
                    else:
                        tst = tsts.get(letter, TST())
                        tsts[letter] = tst
                        futures.append(
                            executor.submit(self.FillTST, tst, words))
                        letter = word[0].lower()
                        words = list(word)

                for future in concurrent.futures.as_completed(futures):
                    try:
                        tst = future.result()
                    except Exception as e:
                        print e

        for tst in tsts.values():
            # TODO Add posibility to merge two tsts if they are overlapping
            self._tst.Take(tst)

        print 'Added {} words'.format(self._tst.Size())
Пример #9
0
    def __init__(self, *args, **kwargs):
        '''
        @params *args, **kwargs = passed to self.update see documentation for
            update for more info (basically a copy constructor) use like dict().

        eg:
            t = TST({'ab':12, 'cd':34}, sep=None)
            t = TST(((k, v) for k,v in {'ab':12, 'cd':34}.iteritems()), sep=None)
        '''
        self.tst = TST()
        self.update(*args, **kwargs)
Пример #10
0
 def test_q(self):
     t = TST()
     t['a'] =1
     t['b'] =1
     t['c'] =1
     self.assertTrue(set(dict(t.find('?')).keys()), set(t.keys()))
     t = TST()
     t['/a'] = 1
     t['/b'] = 1
     t['/c'] = 1
     self.assertEquals(set(dict(t.find('??')).keys()), set())
     self.assertEquals(set(dict(t.find('/?')).keys()), set(t.keys()))
     t = TST()
     t['dog'] = 1
     t['dig'] = 1
     t['dug'] = 1
     self.assertEquals(set(dict(t.find('d?g')).keys()),set(t.keys()))
Пример #11
0
 def test_match(self):
     t = TST(sep=None)
     t['what'] = 1
     t['where'] = 1
     t['when'] = 1
     t['widget'] = 1
     t['wizard'] = 1
     t['wow'] = 1
     t['wowo'] = 1
     self.assertEquals(dict(t.find('w*e*')), {'where':1,'when':1,'widget':1})
     self.assertEquals(dict(t.find('*e')), {'where':1})
     self.assertEquals(dict(t.find('*et')), {'widget':1})
     self.assertEquals(dict(t.find('wo*')), {'wow':1, 'wowo':1})
     self.assertEquals(dict(t.find('*a*')), {'what':1, 'wizard':1})
     self.assertEquals(dict(t.find('*za*')), {'wizard':1})
     for k in t.keys():
         self.assertEquals(dict(t.find(k)), {k:1})
     self.assertEquals(dict(t.find('*dg*')), {'widget':1})
     self.assertEquals(dict(t.find('*he*')), {'when':1, 'where':1})
Пример #12
0
 def test_q_nosep(self):
     t = TST(sep=None)
     t['a'] =1
     t['b'] =1
     t['c'] =1
     self.assertTrue(set(dict(t.find('?')).keys()), set(t.keys()))
     t = TST(sep=None)
     t['/a'] = 1
     t['/b'] = 1
     t['/c'] = 1
     self.assertEquals(set(dict(t.find('??')).keys()), set(t.keys()))
     self.assertEquals(set(dict(t.find('/?')).keys()), set(t.keys()))
Пример #13
0
        node = stack[0]
        stack = stack[1:]
        seen[node] = True
        if node == end:
            return path(node, parents, [])
        else:
            siblings = filter(lambda s: not seen.has_key(s) and s not in stack, tst.near_search(node, 1))
        for n in siblings:
            parents[n] = node
        stack += siblings

    return None

if __name__ == "__main__":
    import time
    tst = TST()
    for word in file('/usr/share/dict/american-english'):
        w = word.strip()
        if w:
            tst.insert(word.strip())
    # simple test
    input = open('test/simple.in')
    start = input.readline().strip()
    end   = input.readline().strip()
    t1 = time.time()
    seq = bfs(tst,start,end)
    print "time: %.2f" % (time.time() - t1)
    for word in seq:
        print word
    # another simple test
    input.close()
Пример #14
0
class CamelService():
    def __init__(self, root, host, port):
        with open('{}/VERSION'.format(root)) as v:
            self._version = v.readline().rstrip()
        self._root = root
        self._host = host
        self._port = port
        self._tst = TST()
        self._start_time = time.time()
        self._last_time = self._start_time
        self._last_time_lock = threading.Lock()
        self._kill_timer = None

    def Status(self):
        status = dict()
        status['server.version'] = self._version
        status['server.stdout'] = sys.stdout.name
        status['server.stderr'] = sys.stderr.name
        status['server.address'] = '{}:{}'.format(self._host, self._port)
        status['server.pid'] = os.getpid()
        status['server.words'] = self._tst.Size()
        status['server.root'] = self._root
        return status

    def Touch(self):
        with self._last_time_lock:
            self._last_time = time.time()

    def TimerUpdate(self, delay=SERVICE_KILL_TIME):
        if self._kill_timer is not None:
            self._kill_timer.cancel()

        self._kill_timer = threading.Timer(delay, self._CheckStatus)
        self._kill_timer.start()

    def TimerKill(self):
        if self._kill_timer:
            self._kill_timer.cancel()
            self._kill_timer = None

    def _CheckStatus(self):
        with self._last_time_lock:
            now = time.time()
            diff = now - self._last_time

            if diff < SERVICE_KILL_TIME:
                self.TimerUpdate(SERVICE_KILL_TIME - (diff))
                return

            print 'No activity for the last {} seconds'.format(
                SERVICE_KILL_TIME)
            self.Stop()

    def Start(self):
        print 'CamelService Start'
        self._server = ThreadedHTTPServer((self._host, self._port),
                                          CamelRequestHandler)
        self._server_thread = threading.Thread(
            target=self._server.serve_forever)
        # self._server_thread.daemon = True
        self._server_thread.start()
        self.TimerUpdate()

    def Stop(self):
        print 'CamelService Stop'
        self.TimerKill()
        self._server.shutdown()
        self._server_thread.join()

    def ToCamelCase(self, string):
        result = list()
        groups = list()
        self._BreakIntoWords(string, groups, list(), "")
        for group in groups:
            current = ""
            for word in group:
                current += word.title()

            result.append(current)

        return result

    def AddDictionaries(self, dicts):
        tsts = dict()

        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            for fdict in dicts:
                futures = list()
                words = list()
                letter = ""
                for word in [word.rstrip('\n') for word in open(fdict)]:
                    # this assumes the dictionary is in lower-case order
                    if word[0].lower() == letter.lower():
                        words.append(word)
                    else:
                        tst = tsts.get(letter, TST())
                        tsts[letter] = tst
                        futures.append(
                            executor.submit(self.FillTST, tst, words))
                        letter = word[0].lower()
                        words = list(word)

                for future in concurrent.futures.as_completed(futures):
                    try:
                        tst = future.result()
                    except Exception as e:
                        print e

        for tst in tsts.values():
            # TODO Add posibility to merge two tsts if they are overlapping
            self._tst.Take(tst)

        print 'Added {} words'.format(self._tst.Size())

    def FillTST(self, tst, words):
        # This prevents worst case time for TST
        random.shuffle(words)

        for word in words:
            if len(word) > 1:
                tst.Put(word, word)
        return tst

    def _BreakIntoWords(self, string, groups, current, bad):
        assert len(string) != 0, "Must not be empty"
        # Use longest matches first
        prefixes = self._tst.AllPrefixesOf(string)[::-1]

        if len(prefixes) == 0:
            bad += string[0:1]
            string = string[1:]
            if len(string) == 0:
                current.append(bad)
                groups.append(current)
            else:
                self._BreakIntoWords(string, groups, current, bad)
        else:
            for prefix in prefixes:
                # Add non matched part of string as unknown word
                if len(bad) != 0:
                    current.append(bad)
                    bad = ""

                clone = current[:]
                clone.append(prefix)

                rest = string[len(prefix):]
                if len(rest) == 0:
                    groups.append(clone)
                else:
                    self._BreakIntoWords(rest, groups, clone, bad)
Пример #15
0
from tst import TST

tree = TST()

tree.put("apple", 100)

print(tree.get("apple"))
Пример #16
0
#TST - A Ternary Search Trie
#Author: Tim Henderson
#Contact: [email protected] or [email protected]

#This File: Dotty Test

#Copyright (c) 2010, Tim Henderson
#All rights reserved.


from tst import TST

tst = TST()
tst['abc'] = 1
tst['abcde'] = 2
tst['abe'] = 3
tst['abefg'] = 4
tst['abce'] = 5
tst['aba'] = 6
tst['boy'] = 7
tst['bad'] = 8
tst['buster'] = 9
tst['cactus'] = 10

print tst.dotty()
Пример #17
0
        seen[node] = True
        if node == end:
            return path(node, parents, [])
        else:
            siblings = filter(lambda s: not seen.has_key(s) and s not in stack,
                              tst.near_search(node, 1))
        for n in siblings:
            parents[n] = node
        stack += siblings

    return None


if __name__ == "__main__":
    import time
    tst = TST()
    for word in file('/usr/share/dict/american-english'):
        w = word.strip()
        if w:
            tst.insert(word.strip())
    # simple test
    input = open('test/simple.in')
    start = input.readline().strip()
    end = input.readline().strip()
    t1 = time.time()
    seq = bfs(tst, start, end)
    print "time: %.2f" % (time.time() - t1)
    for word in seq:
        print word
    # another simple test
    input.close()
Пример #18
0
 def test_longest_prefix(self):
     tst = TST()
     tst.put("a", "A")
     tst.put("anterior", "ANTERIOR")
     tst.put("ant", "ANT")
     tst.put("aunt", "AUNT")
     self.assertEquals(tst.longestPrefixOf("auntie"), "aunt")
     self.assertEquals(tst.longestPrefixOf("ant"), "ant")
     self.assertEquals(tst.longestPrefixOf(""), "")
     self.assertEquals(tst.longestPrefixOf("b"), "")
Пример #19
0
class SuffixTree(MutableMapping):
    
    def __init__(self, *args, **kwargs):
        '''
        @params *args, **kwargs = passed to self.update see documentation for
            update for more info (basically a copy constructor) use like dict().

        eg:
            t = TST({'ab':12, 'cd':34}, sep=None)
            t = TST(((k, v) for k,v in {'ab':12, 'cd':34}.iteritems()), sep=None)
        '''
        self.tst = TST()
        self.update(*args, **kwargs)
    
    def find(self, substr): 
        if not substr:
            for k,v in self.iteritems():
                yield k,v
        root = None
        next = (self.tst.heads[ord(substr[0])], 1)
        while next:
            n, d = next
            if n == None:
              return
            if n.internal():
                if d == len(substr):
                    root = n
                    break;
                ch = substr[d]
                if   ch <  n.ch: next = (n.l, d);   continue
                elif ch == n.ch: next = (n.m, d+1); continue
                elif ch >  n.ch: next = (n.r, d);   continue
            elif n.key[:len(substr)] == substr:
                root = n
                break;
            return
        # now expand root
        q = deque()
        found = set()
        q.appendleft(root)
        while q:
            n = q.pop()
            if not n: continue
            if n.accepting:
                found |= n.val
            q.append(n.r)
            q.append(n.m)
            q.append(n.l)
        for k in found:
            yield k[1:], self.tst.get(k)

    def keys(self):
        return [k for k, v in self.iteritems()]
    
    def iteritems(self):
        q = deque()
        h = self.tst.heads[ord(START)]
        if h == None: return
        q.appendleft(h)
        while q:
            n = q.pop()
            if not n: continue
            if n.accepting:
                yield n.key[1:-1], n.val
            q.append(n.r)
            q.append(n.m)
            q.append(n.l)
    
    def __len__(self):
        return len(self.iteritems())

    def __setitem__(self, key, value):
        fullkey = START + key
        self.tst[fullkey] = value
        for i in xrange(0, len(key)):
            curkey = key[i:]
            keys = self.tst.get(curkey, set())
            keys.add(fullkey)
            self.tst[curkey] = keys

    def __getitem__(self, key):
        fullkey = START + key
        return self.tst[fullkey]

    def __delitem__(self, key):
        raise RuntimeError, 'Removing from SuffixTree is not allowed'

    def __iter__(self):
        for k,v in self.iteritems():
            yield k
    
    def __contains__(self, pattern):
        try:
            x = self[pattern]
        except KeyError:
            #try: return bool(tuple(self.find(pattern)))
            #except KeyError: return False
            return False
        return True

    def __str__(self):
        return str(dict(self))

    def __repr__(self):
        return str(self)
Пример #20
0
 def test_insert_(self):
     t = TST()
     self.insert(t)
     self.assertEquals(dict(t), {'a':1,'aa':2,'aaa':3})
     self.assertEquals(dict(t.find('*')), {'a':1,'aa':2,'aaa':3})
Пример #21
0
 def test_prefix_match(self):
     tst = TST()
     tst.put("a", "A")
     tst.put("anterior", "ANTERIOR")
     tst.put("antidisassembly", "ANTIDISASSEMBLY")
     tst.put("ant", "ANT")
     tst.put("aunt", "AUNT")
     r = tst.prefixMatch("ant");
     self.assertEquals(r.qsize(), 3)
     self.assertEquals(r.get(), "ant")
     self.assertEquals(r.get(), "anterior")
     self.assertEquals(r.get(), "antidisassembly")
     r = tst.prefixMatch("bob");
     self.assertEquals(r.qsize(), 0)
     r = tst.prefixMatch("aunt");
     self.assertEquals(r.qsize(), 1)
     r = tst.prefixMatch("auntie")
     self.assertEquals(r.qsize(), 0)
Пример #22
0
 def test_put_get(self):
     tst = TST()
     self.assertEquals(0, len(tst))
     self.assertIsNone(tst.get("a"))
     tst.put("a", "a")
     self.assertEquals(1, len(tst))
     self.assertEquals("a", tst.get("a"))
     tst.put("b", "b")
     self.assertEquals(2, len(tst))
     self.assertEquals("a", tst.get("a"))
     self.assertEquals("b", tst.get("b"))
     tst.put("a", "new_a")
     self.assertEquals(2, len(tst))
     self.assertEquals("new_a", tst.get("a"))
     self.assertTrue(tst.contains("b"))
     self.assertFalse(tst.contains("ab"))