Exemplo n.º 1
0
    def wrapper(*args, **kwargs):
        self = args[0]
        model_file = args[1]
        string_match = args[2]
        model_dict = pickle.load(open(model_file, "rb"))
        self._title_freq_dict = collections.Counter(
            model_dict["title_freq_dict"])
        self._desc_freq_dict = collections.Counter(
            model_dict["desc_freq_dict"])
        for desc in model_dict["co_freq_dict"]:
            self._co_freq_dict[desc].update(model_dict["co_freq_dict"][desc])

        self.string_match = string_match
        if self.string_match == 'max':
            self.desc_trie = trie.Trie(self._co_freq_dict)
        elif self.string_match == 'all':
            self.desc_actrie = actrie.load(self._co_freq_dict)

        self.max_length_desc = 0
        for desc in self._co_freq_dict:
            if len(desc) > self.max_length_desc:
                self.max_length_desc = len(desc)

        self.word_popularity_dict = {}
        for line in open('data/clusters300k.txt'):
            l = line.strip().split('\t')
            title = "".join(l[0].split())
            prob = float(l[1])
            self.word_popularity_dict[title] = prob
        self.smallest_prob = -15.7357
        self.word_trie = trie.Trie(self.word_popularity_dict)

        func(*args, **kwargs)
Exemplo n.º 2
0
    def __init__(self, init_types):
        # defined types
        self.type_equations = T.Trie(node_type=T.TypeDefTrieNode)
        # currently known types
        self.type_assignments = T.Trie(node_type=T.TypeAssignmentTrieNode)
        # variable types
        self.var_types = T.Trie(node_type=T.VarTypeTrieNode)

        for x in init_types:
            self.add(x)
Exemplo n.º 3
0
 def commit_state(self):
     if not len(self.journal):
         return
     for address in self.caches['all']:
         acct = rlp.decode(self.state.get(address.decode('hex'))) \
             or self.mk_blank_acct()
         for i, (key, typ, default) in enumerate(acct_structure):
             if key == 'storage':
                 t = trie.Trie(utils.get_db_path(), acct[i])
                 t.proof_mode = self.proof_mode
                 t.proof_nodes = self.proof_nodes
                 for k, v in self.caches.get('storage:' + address,
                                             {}).iteritems():
                     enckey = utils.zpad(utils.coerce_to_bytes(k), 32)
                     val = rlp.encode(utils.int_to_big_endian(v))
                     if v:
                         t.update(enckey, val)
                     else:
                         t.delete(enckey)
                 acct[i] = t.root_hash
                 if self.proof_mode == RECORDING:
                     self.proof_nodes.extend(t.proof_nodes)
             else:
                 if address in self.caches[key]:
                     v = self.caches[key].get(address, default)
                     acct[i] = utils.encoders[acct_structure[i][1]](v)
         self.state.update(address.decode('hex'), rlp.encode(acct))
     if self.proof_mode == RECORDING:
         self.proof_nodes.extend(self.state.proof_nodes)
         self.state.proof_nodes = []
     self.reset_cache()
Exemplo n.º 4
0
def tags5_analysis(dns_names):
    #setup dictionaries
    city_dict_io = open('dictionary.dat', 'r')
    city_trie = trie.Trie()
    for cities in city_dict_io:
        tmp = cities.rstrip().lower()
        if len(tmp) > 3:
            city_trie[tmp] = 0
    #print "Calculating and sorting feature vectors"
    for i in dns_names:
        if len(i) < 3:
            continue
        if i[0] == '*':
            print i
            continue
        to_proc = ArkDNSEntry()
        to_proc.add_dns_name(i.lower())
        fv = to_proc.create_feature_vector()
        to_proc.city_search(city_trie)
        to_proc.ip_search()
        #to_proc.remove_overlap()
        #if fv != '':
        #        if lev3.findall(i[::-1]) or rr.findall(i[::-1]) or cog.findall(i[::-1]) or tf.findall(i[::-1]):
        #print i
        #for j in fv:
        #    print str(j),
        #    print ",",
        #print "\n",
        to_proc.print_tags5_console()
Exemplo n.º 5
0
 def run():
     st = time.time()
     x = trie.Trie(db.EphemDB())
     for i in range(10000):
         x.update(to_string(i), to_string(i**3))
     print('elapsed', time.time() - st)
     return x.root_hash
Exemplo n.º 6
0
    def __init__(self, parent, dictionary_file):
        """ Create the necessary widgets and pack them into the frame. """

        tk.Frame.__init__(self, parent)
        self.dictionary = trie.Trie()
        self._read_dictionary(dictionary_file)
        parent.title("CS240 Spell Checker")

        self.text = tk.Text(self,
                            height=TEXT_HEIGHT,
                            width=TEXT_WIDTH,
                            wrap=tk.WORD)
        self.text.grid(row=0, column=0, columnspan=2)

        self.button = tk.Button(self, text="Check Spelling")
        self.button.grid(row=1, column=0)
        self.button.bind('<ButtonPress>', self._button_callback)

        self.info_text = tk.StringVar()
        self.info_label = tk.Label(self,
                                   textvariable=self.info_text,
                                   width=INFO_WIDTH)
        self.info_label.grid(row=1, column=1, sticky='E')

        self.pack()
Exemplo n.º 7
0
 def __init__(self):
     self.swDS = trie.Trie()
     with open("swfile.txt", "r") as swfile:
         word = swfile.read()
         while word:
             self.swDS.addWord(word)
             word = swfile.read()
Exemplo n.º 8
0
def test_single_trie():
    word = 'Hello'
    t = trie.Trie([word])
    r = t.query(word[:1], 5)
    assert r[0] == True
    assert r[1] == 1
    assert r[2][0][1] == word
Exemplo n.º 9
0
 def account_to_dict(self, address, with_storage_root=False):
     if with_storage_root:
         assert len(self.journal) == 0
     med_dict = {}
     for i, val in enumerate(self.get_acct(address)):
         name, typ, default = acct_structure[i]
         key = acct_structure[i][0]
         if name == 'storage':
             strie = trie.Trie(utils.get_db_path(), val)
             if with_storage_root:
                 med_dict['storage_root'] = strie.get_root_hash().encode(
                     'hex')
         else:
             med_dict[key] = self.caches[key].get(address,
                                                  utils.printers[typ](val))
     med_dict['storage'] = {}
     d = strie.to_dict()
     for k in d.keys() + self.caches['all'].keys():
         v = d.get(k, None)
         subcache = self.caches.get('storage:' + address, {})
         v2 = subcache.get(utils.big_endian_to_int(k), None)
         hexkey = '0x' + k.encode('hex')
         if v2 is not None:
             if v2 != 0:
                 med_dict['storage'][
                     hexkey] = '0x' + utils.int_to_big_endian(v2).encode(
                         'hex')
         elif v is not None:
             med_dict['storage'][hexkey] = '0x' + rlp.decode(v).encode(
                 'hex')
     return med_dict
Exemplo n.º 10
0
def main(argv):
    tries = trie.Trie()

    for line in files.read_lines(argv[0]):
        tries.insert(line)

    print '\n'.join('%s->%s:%s' % trie for trie in tries.edges())
Exemplo n.º 11
0
 def commit_state(self):
     changes = []
     if not len(self.journal):
         # log_state.trace('delta', changes=[])
         return
     for address in self.caches['all']:
         acct = rlp.decode(self.state.get(address.decode('hex'))) \
             or self.mk_blank_acct()
         for i, (key, typ, default) in enumerate(acct_structure):
             if key == 'storage':
                 t = trie.Trie(self.db, acct[i])
                 for k, v in self.caches.get('storage:' + address, {}).iteritems():
                     enckey = utils.zpad(utils.coerce_to_bytes(k), 32)
                     val = rlp.encode(utils.int_to_big_endian(v))
                     changes.append(['storage', address, k, v])
                     if v:
                         t.update(enckey, val)
                     else:
                         t.delete(enckey)
                 acct[i] = t.root_hash
             else:
                 if address in self.caches[key]:
                     v = self.caches[key].get(address, default)
                     changes.append([key, address, v])
                     acct[i] = self.encoders[acct_structure[i][1]](v)
         self.state.update(address.decode('hex'), rlp.encode(acct))
     log_state.trace('delta', changes=changes)
     self.reset_cache()
Exemplo n.º 12
0
 def account_to_dict(self, address, with_storage_root=False,
                     with_storage=True, for_vmtest=False):
     if with_storage_root:
         assert len(self.journal) == 0
     med_dict = {}
     for i, val in enumerate(self.get_acct(address)):
         name, typ, default = acct_structure[i]
         key = acct_structure[i][0]
         if name == 'storage':
             strie = trie.Trie(self.db, val)
             if with_storage_root:
                 med_dict['storage_root'] = strie.get_root_hash().encode('hex')
         else:
             med_dict[key] = utils.printers[typ](self.caches[key].get(address, val))
     if with_storage:
         med_dict['storage'] = {}
         d = strie.to_dict()
         subcache = self.caches.get('storage:' + address, {})
         subkeys = [utils.zpad(utils.coerce_to_bytes(kk), 32) for kk in subcache.keys()]
         for k in d.keys() + subkeys:
             v = d.get(k, None)
             v2 = subcache.get(utils.big_endian_to_int(k), None)
             hexkey = '0x' + utils.zunpad(k).encode('hex')
             if v2 is not None:
                 if v2 != 0:
                     med_dict['storage'][hexkey] = \
                         '0x' + utils.int_to_big_endian(v2).encode('hex')
             elif v is not None:
                 med_dict['storage'][hexkey] = '0x' + rlp.decode(v).encode('hex')
     return med_dict
 def __init__(self, text):
     self.text = text
     self.model = trie.Trie()
     self.model.load_from_pickle("train_data")
     # self.result = []
     self.result_all = []
     # self.leftover = []
     self.startIndex = 0
Exemplo n.º 14
0
def test_lookup_table_2_level_excludes():
    t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c'])
    table = t.lookup_table(2)
    assert table['aa'][0] == 2
    assert len(table['aa'][1]) == 2
    assert table['bb'][0] == 1
    assert len(table['bb'][1]) == 1
    assert 'cc' not in table
 def __init__(self, text):
     self.text = text  #.decode('utf-8')
     self.model = trie.Trie()
     self.model.load_from_pickle("train_data_set")
     self.result = []
     self.result_all = []
     self.leftover = []
     self.startIndex = 0
Exemplo n.º 16
0
def test_lookup_table_1_level_content():
    t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c'])
    table = t.lookup_table(1)
    for t in ((1, 'a'), (1, 'aa'), (1, 'aaa')):
        assert t in table['a'][1]
    for t in ((1, 'b'), (1, 'bb')):
        assert t in table['b'][1]
    assert (1, 'c') in table['c'][1]
Exemplo n.º 17
0
def test_iteritems():
    t = trie.Trie()
    t['f'] = 'fval'
    t['foo'] = 'fooval'
    t['bar'] = 'barval'
    t['baz'] = 'bazval'
    itered = list(t.iteritems())
    assert itered == zip(map(list, 'bar baz f foo'.split()), 'barval bazval fval fooval'.split()), itered
Exemplo n.º 18
0
def test_double_similar_token():
    word1 = 'Hello'
    word2 = 'HellO'
    t = trie.Trie([word1, word2])
    r = t.query(word1[:1], 5)
    assert r[0] == True
    assert r[1] == 2
    assert r[2][0][1] in (word1, word2)
Exemplo n.º 19
0
def test_iter():
    t = trie.Trie()
    t['f'] = 'fval'
    t['foo'] = 'fooval'
    t['bar'] = 'barval'
    t['baz'] = 'bazval'
    itered = list(t)
    assert itered == map(list, 'bar baz f foo'.split()), itered
Exemplo n.º 20
0
def test_itervalues():
    t = trie.Trie()
    t['f'] = 'fval'
    t['foo'] = 'fooval'
    t['bar'] = 'barval'
    t['baz'] = 'bazval'
    itered = list(t.itervalues())
    assert itered == 'barval bazval fval fooval'.split(), itered
Exemplo n.º 21
0
    def __init__(self):
        # @TODO - Create a logger for this server and
        # log the creation of this resource

        self.t = trie.Trie()

        with open('input.txt', 'rt') as f:
            for line in f:
                self.t.insert(line.strip('\n '))
Exemplo n.º 22
0
def run_shrink_word():
    t = trie.Trie()
    t.load_english(maxlen=6)
    words = ['smart', 'father', 'lymph', 'rope']
    for word in words:
        res, lst = shrink_word(word, t)
        print(res)
        print(lst)
        print()
Exemplo n.º 23
0
def test_print():

    testcls = trie.Trie()

    testcls.print_structure()

    outfile = '../output/out.txt'

    testcls.print_structure(outfile)
Exemplo n.º 24
0
def test_serialise():
    try:
        import cPickle as pickle
    except ImportError:
        import pickle
    t = trie.Trie()
    t['f'] = 'fval'
    t['foo'] = 'fooval'
    t['bar'] = 'barval'
    t['baz'] = 'bazval'
    t2 = pickle.loads(pickle.dumps(t))
    itered = list(t2.itervalues())
    assert itered == 'barval bazval fval fooval'.split(), itered

    t = trie.Trie()
    t['obj'] = object()
    t2 = pickle.loads(pickle.dumps(t))
    assert type(t2['obj']) == object, t2['obj']
Exemplo n.º 25
0
 def __init__(self, option = distorm.Decode32Bits, debug = 0):
     self.__asmgadget = trie.Trie()
     self.__asmgadget.set_case_sensitive(False)
     self.__search_depth = 3 # default depth for instruction search
     self.__backward_depth = 3 # default number of insts for backward processing
     self.__max_duplicate = 3 # default number duplicate gadgets, keep somes for enough offset alternatives
     self.__gadget_info = {"hash": "", "name": "", "base_addr": 0, "data_addr": 0}
     self.__debug = debug
     self.__decode_option = option
Exemplo n.º 26
0
def test_lookup_table_2_level_priority():
    t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c'])
    t.update('bb', 3)
    t.update('aaa', 2)
    t.update('aa', 1)
    table = t.lookup_table()
    assert table['b'][1][0] == (4, 'bb')
    assert table['a'][1][0] == (3, 'aaa')
    assert table['a'][1][1] == (2, 'aa')
Exemplo n.º 27
0
def test_lookup_table_1_level_length():
    t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c'])
    table = t.lookup_table(1)
    assert table['a'][0] == 3
    assert len(table['a'][1]) == 3
    assert table['b'][0] == 2
    assert len(table['b'][1]) == 2
    assert table['c'][0] == 1
    assert len(table['c'][1]) == 1
Exemplo n.º 28
0
 def __init__(self):
     self._online = trie.Trie()
     # this is redundant, but faster; it's very slow to iterate
     # over the trie
     self._online_names = {}
     self.guest_count = 0
     self.pin_ivar = set()
     self.pin_var = set()
     self.gin_var = set()
Exemplo n.º 29
0
 def account_to_dict(self, address):
     med_dict = {}
     for i, val in enumerate(self.get_acct(address)):
         med_dict[acct_structure[i][0]] = val
     strie = trie.Trie(utils.get_db_path(), med_dict['storage']).to_dict()
     med_dict['storage'] = {
         utils.decode_int(k): utils.decode_int(v)
         for k, v in strie.iteritems()
     }
     return med_dict
Exemplo n.º 30
0
def main(argv):
    lines = files.read_lines(argv[0])

    tries = trie.Trie()
    for line in lines[1:]:
        tries.insert(line)

    matches = tries.matching(lines[0])

    print ' '.join(str(match[0]) for match in matches)