def wrapper(*args, **kwargs): self = args[0] model_file = args[1] string_match = args[2] model_dict = pickle.load(open(model_file, "rb")) self._title_freq_dict = collections.Counter( model_dict["title_freq_dict"]) self._desc_freq_dict = collections.Counter( model_dict["desc_freq_dict"]) for desc in model_dict["co_freq_dict"]: self._co_freq_dict[desc].update(model_dict["co_freq_dict"][desc]) self.string_match = string_match if self.string_match == 'max': self.desc_trie = trie.Trie(self._co_freq_dict) elif self.string_match == 'all': self.desc_actrie = actrie.load(self._co_freq_dict) self.max_length_desc = 0 for desc in self._co_freq_dict: if len(desc) > self.max_length_desc: self.max_length_desc = len(desc) self.word_popularity_dict = {} for line in open('data/clusters300k.txt'): l = line.strip().split('\t') title = "".join(l[0].split()) prob = float(l[1]) self.word_popularity_dict[title] = prob self.smallest_prob = -15.7357 self.word_trie = trie.Trie(self.word_popularity_dict) func(*args, **kwargs)
def __init__(self, init_types): # defined types self.type_equations = T.Trie(node_type=T.TypeDefTrieNode) # currently known types self.type_assignments = T.Trie(node_type=T.TypeAssignmentTrieNode) # variable types self.var_types = T.Trie(node_type=T.VarTypeTrieNode) for x in init_types: self.add(x)
def commit_state(self): if not len(self.journal): return for address in self.caches['all']: acct = rlp.decode(self.state.get(address.decode('hex'))) \ or self.mk_blank_acct() for i, (key, typ, default) in enumerate(acct_structure): if key == 'storage': t = trie.Trie(utils.get_db_path(), acct[i]) t.proof_mode = self.proof_mode t.proof_nodes = self.proof_nodes for k, v in self.caches.get('storage:' + address, {}).iteritems(): enckey = utils.zpad(utils.coerce_to_bytes(k), 32) val = rlp.encode(utils.int_to_big_endian(v)) if v: t.update(enckey, val) else: t.delete(enckey) acct[i] = t.root_hash if self.proof_mode == RECORDING: self.proof_nodes.extend(t.proof_nodes) else: if address in self.caches[key]: v = self.caches[key].get(address, default) acct[i] = utils.encoders[acct_structure[i][1]](v) self.state.update(address.decode('hex'), rlp.encode(acct)) if self.proof_mode == RECORDING: self.proof_nodes.extend(self.state.proof_nodes) self.state.proof_nodes = [] self.reset_cache()
def tags5_analysis(dns_names): #setup dictionaries city_dict_io = open('dictionary.dat', 'r') city_trie = trie.Trie() for cities in city_dict_io: tmp = cities.rstrip().lower() if len(tmp) > 3: city_trie[tmp] = 0 #print "Calculating and sorting feature vectors" for i in dns_names: if len(i) < 3: continue if i[0] == '*': print i continue to_proc = ArkDNSEntry() to_proc.add_dns_name(i.lower()) fv = to_proc.create_feature_vector() to_proc.city_search(city_trie) to_proc.ip_search() #to_proc.remove_overlap() #if fv != '': # if lev3.findall(i[::-1]) or rr.findall(i[::-1]) or cog.findall(i[::-1]) or tf.findall(i[::-1]): #print i #for j in fv: # print str(j), # print ",", #print "\n", to_proc.print_tags5_console()
def run(): st = time.time() x = trie.Trie(db.EphemDB()) for i in range(10000): x.update(to_string(i), to_string(i**3)) print('elapsed', time.time() - st) return x.root_hash
def __init__(self, parent, dictionary_file): """ Create the necessary widgets and pack them into the frame. """ tk.Frame.__init__(self, parent) self.dictionary = trie.Trie() self._read_dictionary(dictionary_file) parent.title("CS240 Spell Checker") self.text = tk.Text(self, height=TEXT_HEIGHT, width=TEXT_WIDTH, wrap=tk.WORD) self.text.grid(row=0, column=0, columnspan=2) self.button = tk.Button(self, text="Check Spelling") self.button.grid(row=1, column=0) self.button.bind('<ButtonPress>', self._button_callback) self.info_text = tk.StringVar() self.info_label = tk.Label(self, textvariable=self.info_text, width=INFO_WIDTH) self.info_label.grid(row=1, column=1, sticky='E') self.pack()
def __init__(self): self.swDS = trie.Trie() with open("swfile.txt", "r") as swfile: word = swfile.read() while word: self.swDS.addWord(word) word = swfile.read()
def test_single_trie(): word = 'Hello' t = trie.Trie([word]) r = t.query(word[:1], 5) assert r[0] == True assert r[1] == 1 assert r[2][0][1] == word
def account_to_dict(self, address, with_storage_root=False): if with_storage_root: assert len(self.journal) == 0 med_dict = {} for i, val in enumerate(self.get_acct(address)): name, typ, default = acct_structure[i] key = acct_structure[i][0] if name == 'storage': strie = trie.Trie(utils.get_db_path(), val) if with_storage_root: med_dict['storage_root'] = strie.get_root_hash().encode( 'hex') else: med_dict[key] = self.caches[key].get(address, utils.printers[typ](val)) med_dict['storage'] = {} d = strie.to_dict() for k in d.keys() + self.caches['all'].keys(): v = d.get(k, None) subcache = self.caches.get('storage:' + address, {}) v2 = subcache.get(utils.big_endian_to_int(k), None) hexkey = '0x' + k.encode('hex') if v2 is not None: if v2 != 0: med_dict['storage'][ hexkey] = '0x' + utils.int_to_big_endian(v2).encode( 'hex') elif v is not None: med_dict['storage'][hexkey] = '0x' + rlp.decode(v).encode( 'hex') return med_dict
def main(argv): tries = trie.Trie() for line in files.read_lines(argv[0]): tries.insert(line) print '\n'.join('%s->%s:%s' % trie for trie in tries.edges())
def commit_state(self): changes = [] if not len(self.journal): # log_state.trace('delta', changes=[]) return for address in self.caches['all']: acct = rlp.decode(self.state.get(address.decode('hex'))) \ or self.mk_blank_acct() for i, (key, typ, default) in enumerate(acct_structure): if key == 'storage': t = trie.Trie(self.db, acct[i]) for k, v in self.caches.get('storage:' + address, {}).iteritems(): enckey = utils.zpad(utils.coerce_to_bytes(k), 32) val = rlp.encode(utils.int_to_big_endian(v)) changes.append(['storage', address, k, v]) if v: t.update(enckey, val) else: t.delete(enckey) acct[i] = t.root_hash else: if address in self.caches[key]: v = self.caches[key].get(address, default) changes.append([key, address, v]) acct[i] = self.encoders[acct_structure[i][1]](v) self.state.update(address.decode('hex'), rlp.encode(acct)) log_state.trace('delta', changes=changes) self.reset_cache()
def account_to_dict(self, address, with_storage_root=False, with_storage=True, for_vmtest=False): if with_storage_root: assert len(self.journal) == 0 med_dict = {} for i, val in enumerate(self.get_acct(address)): name, typ, default = acct_structure[i] key = acct_structure[i][0] if name == 'storage': strie = trie.Trie(self.db, val) if with_storage_root: med_dict['storage_root'] = strie.get_root_hash().encode('hex') else: med_dict[key] = utils.printers[typ](self.caches[key].get(address, val)) if with_storage: med_dict['storage'] = {} d = strie.to_dict() subcache = self.caches.get('storage:' + address, {}) subkeys = [utils.zpad(utils.coerce_to_bytes(kk), 32) for kk in subcache.keys()] for k in d.keys() + subkeys: v = d.get(k, None) v2 = subcache.get(utils.big_endian_to_int(k), None) hexkey = '0x' + utils.zunpad(k).encode('hex') if v2 is not None: if v2 != 0: med_dict['storage'][hexkey] = \ '0x' + utils.int_to_big_endian(v2).encode('hex') elif v is not None: med_dict['storage'][hexkey] = '0x' + rlp.decode(v).encode('hex') return med_dict
def __init__(self, text): self.text = text self.model = trie.Trie() self.model.load_from_pickle("train_data") # self.result = [] self.result_all = [] # self.leftover = [] self.startIndex = 0
def test_lookup_table_2_level_excludes(): t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c']) table = t.lookup_table(2) assert table['aa'][0] == 2 assert len(table['aa'][1]) == 2 assert table['bb'][0] == 1 assert len(table['bb'][1]) == 1 assert 'cc' not in table
def __init__(self, text): self.text = text #.decode('utf-8') self.model = trie.Trie() self.model.load_from_pickle("train_data_set") self.result = [] self.result_all = [] self.leftover = [] self.startIndex = 0
def test_lookup_table_1_level_content(): t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c']) table = t.lookup_table(1) for t in ((1, 'a'), (1, 'aa'), (1, 'aaa')): assert t in table['a'][1] for t in ((1, 'b'), (1, 'bb')): assert t in table['b'][1] assert (1, 'c') in table['c'][1]
def test_iteritems(): t = trie.Trie() t['f'] = 'fval' t['foo'] = 'fooval' t['bar'] = 'barval' t['baz'] = 'bazval' itered = list(t.iteritems()) assert itered == zip(map(list, 'bar baz f foo'.split()), 'barval bazval fval fooval'.split()), itered
def test_double_similar_token(): word1 = 'Hello' word2 = 'HellO' t = trie.Trie([word1, word2]) r = t.query(word1[:1], 5) assert r[0] == True assert r[1] == 2 assert r[2][0][1] in (word1, word2)
def test_iter(): t = trie.Trie() t['f'] = 'fval' t['foo'] = 'fooval' t['bar'] = 'barval' t['baz'] = 'bazval' itered = list(t) assert itered == map(list, 'bar baz f foo'.split()), itered
def test_itervalues(): t = trie.Trie() t['f'] = 'fval' t['foo'] = 'fooval' t['bar'] = 'barval' t['baz'] = 'bazval' itered = list(t.itervalues()) assert itered == 'barval bazval fval fooval'.split(), itered
def __init__(self): # @TODO - Create a logger for this server and # log the creation of this resource self.t = trie.Trie() with open('input.txt', 'rt') as f: for line in f: self.t.insert(line.strip('\n '))
def run_shrink_word(): t = trie.Trie() t.load_english(maxlen=6) words = ['smart', 'father', 'lymph', 'rope'] for word in words: res, lst = shrink_word(word, t) print(res) print(lst) print()
def test_print(): testcls = trie.Trie() testcls.print_structure() outfile = '../output/out.txt' testcls.print_structure(outfile)
def test_serialise(): try: import cPickle as pickle except ImportError: import pickle t = trie.Trie() t['f'] = 'fval' t['foo'] = 'fooval' t['bar'] = 'barval' t['baz'] = 'bazval' t2 = pickle.loads(pickle.dumps(t)) itered = list(t2.itervalues()) assert itered == 'barval bazval fval fooval'.split(), itered t = trie.Trie() t['obj'] = object() t2 = pickle.loads(pickle.dumps(t)) assert type(t2['obj']) == object, t2['obj']
def __init__(self, option = distorm.Decode32Bits, debug = 0): self.__asmgadget = trie.Trie() self.__asmgadget.set_case_sensitive(False) self.__search_depth = 3 # default depth for instruction search self.__backward_depth = 3 # default number of insts for backward processing self.__max_duplicate = 3 # default number duplicate gadgets, keep somes for enough offset alternatives self.__gadget_info = {"hash": "", "name": "", "base_addr": 0, "data_addr": 0} self.__debug = debug self.__decode_option = option
def test_lookup_table_2_level_priority(): t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c']) t.update('bb', 3) t.update('aaa', 2) t.update('aa', 1) table = t.lookup_table() assert table['b'][1][0] == (4, 'bb') assert table['a'][1][0] == (3, 'aaa') assert table['a'][1][1] == (2, 'aa')
def test_lookup_table_1_level_length(): t = trie.Trie(['a', 'aa', 'aaa', 'b', 'bb', 'c']) table = t.lookup_table(1) assert table['a'][0] == 3 assert len(table['a'][1]) == 3 assert table['b'][0] == 2 assert len(table['b'][1]) == 2 assert table['c'][0] == 1 assert len(table['c'][1]) == 1
def __init__(self): self._online = trie.Trie() # this is redundant, but faster; it's very slow to iterate # over the trie self._online_names = {} self.guest_count = 0 self.pin_ivar = set() self.pin_var = set() self.gin_var = set()
def account_to_dict(self, address): med_dict = {} for i, val in enumerate(self.get_acct(address)): med_dict[acct_structure[i][0]] = val strie = trie.Trie(utils.get_db_path(), med_dict['storage']).to_dict() med_dict['storage'] = { utils.decode_int(k): utils.decode_int(v) for k, v in strie.iteritems() } return med_dict
def main(argv): lines = files.read_lines(argv[0]) tries = trie.Trie() for line in lines[1:]: tries.insert(line) matches = tries.matching(lines[0]) print ' '.join(str(match[0]) for match in matches)