Пример #1
0
 def load_state(env, alloc):
     db = env.db
     state = SecureTrie(Trie(db, BLANK_ROOT))
     count = 0
     databaseLog.debug("Loading state from snapshot")
     for addr in alloc:
         databaseLog.debug("[%d] loading account %s", count, addr)
         account = alloc[addr]
         acct = Account.blank_account(db, env.config['ACCOUNT_INITIAL_NONCE'])
         if len(account['storage']) > 0:
             t = SecureTrie(Trie(db, BLANK_ROOT))
             c = 0
             for k in account['storage']:
                 v = account['storage'][k]
                 enckey = zpad(decode_hex(k), 32)
                 t.update(enckey, decode_hex(v))
                 c += 1
                 if c % 1000 and len(db.db_service.uncommitted) > 50000:
                     databaseLog.debug("%d uncommitted. committing...", len(db.db_service.uncommitted))
                     db.commit()
             acct.storage = t.root_hash
         if account['nonce']:
             acct.nonce = int(account['nonce'])
         if account['balance']:
             acct.balance = int(account['balance'])
         state.update(decode_hex(addr), rlp.encode(acct))
         count += 1
     db.commit()
     return state
    def classifier(self, test_data0, test_data1):
        test_set1 = test_data0
        test_set2 = test_data1
        rules_tree1 = Trie(test_set1)
        rules_tree2 = Trie(test_set2)
        rules_tree1.trie_for_rules()
        rules_tree2.trie_for_rules()
        imp_rules1 = rules_tree1.important_rules_selection(0.0001)
        imp_rules2 = rules_tree2.important_rules_selection(0.0001)

        inf1 = RulesImportance(imp_rules1, rules_tree1, rules_tree2, 1.2)
        inf2 = RulesImportance(imp_rules2, rules_tree2, rules_tree1, 1.2)
        classifier = Classification(inf1, inf2)

        all_data = {}
        temp_object = _from_list_to_dataset(all_data, test_data0, 0, 1)
        n_0 = len(test_data0) + 1
        all_data = _from_list_to_dataset(temp_object, test_data1, 1, n_0)
        results = []

        print("Length Length %f" % len(all_data))
        for id in all_data.iterkeys():
            data = all_data[id]["data"]
            status = all_data[id]["status"]
            results.append((self._classify_object(data), status))

        return results
Пример #3
0
    def __init__(self):
        #ES+w2v实现实体链接和消歧
        self.es_helper = es_helper()
        self.w2v = self.loadw2v(cur_dir + "/data/w2v_qa_8w.bigram")

        #加载自定义词典
        # jieba.load_userdict(cur_dir + "/../config/vocab/word.vocab")
        self.insur_kw = Trie()
        self.scop_kw = Trie()
        self.clauses_kw = Trie()
        self.type_kw = Trie()

        with open(cur_dir + "/../config/vocab/insurance.vocab", 'r') as fr:
            for line in fr:
                word = line.strip()
                self.insur_kw.add(word)

        with open(cur_dir + "/../config/vocab/scope.vocab", 'r') as fr:
            for line in fr:
                word = line.strip()
                self.scop_kw.add(word)
        # with open(cur_dir+"/../config/vocab/clauses.vocab", 'r') as fr:
        #     for line in fr:
        #         word = line.strip()
        #         #print(word)
        #         self.clauses_kw.add(word)
        with open(cur_dir + "/../config/vocab/type.vocab", 'r') as fr:
            for line in fr:
                word = line.strip()
                self.type_kw.add(word)
Пример #4
0
    def __init__(self, doc, n_gram=5, min_len=2, custom_dict_path=None):
        sentences = process(doc)

        self.corpus = seg_with_custom_dict(sentences, custom_dict_path)
        VisualWord.corpus = self.corpus
        self.n_gram = n_gram
        self.min_len = min_len

        candidates = self.gen_candidates(self.corpus)

        self.total = len(candidates)
        WordInfo.total = self.total

        self.prefix_trie = Trie()
        self.suffix_trie = Trie(reverse=True)

        self.prefix_trie.build(candidates)
        self.suffix_trie.build(candidates)

        self.words_info = list(set([self.calculate(candidate) for candidate in candidates
                                    if self.min_len <= len(candidate) <= self.n_gram]))

        avg = np.array([w.score * w.count for w in self.words_info]).mean()
        self.words_info = [w for w in self.words_info if w.score * w.count > avg]
        self.filter_sub_words()
        self.words_info.sort(key=lambda w: w.score * w.count, reverse=True)
        self.real_words = [wi.real_word for wi in self.words_info]
Пример #5
0
    def setUpClass(cls):
        #            root
        #          /      \
        #       /           \
        #     a *             b
        #    / \             /
        #   d   n *         a
        #  /   / \       /  |  \
        # d * d * y *   g * t *  y *
        #              / \   \
        #             e   s * h *
        #            /
        #           l *
        # asterisk denotes a word

        trie = Trie()
        cls._trie = Trie()
        cls._trie.insert("a")
        cls._trie.insert("add")
        cls._trie.insert("an")
        cls._trie.insert("and")
        cls._trie.insert("any")
        cls._trie.insert("bagel")
        cls._trie.insert("bag")
        cls._trie.insert("bags")
        cls._trie.insert("bat")
        cls._trie.insert("bath")
        cls._trie.insert("bay")

        cls._trie_length = 11  # magic number, the number of words in the trie
Пример #6
0
    def filter_sub_words(self):
        vwords = [wi.vword for wi in self.words_info]
        pre_trie = Trie()
        suf_trie = Trie(reverse=True)
        pre_trie.build(vwords)
        suf_trie.build(vwords)
        words_info = set()
        for wi in self.words_info:
            vword = wi.vword
            left = self.get_word_count(vword, prefix_search=True, suffix=True)
            right = self.get_word_count(vword, prefix_search=True, suffix=False)
            is_suf = len(left) > 0
            is_pre = len(right) > 0
            if not is_suf and not is_pre:
                words_info.add(wi)
                pre = ('住所地', '住所', '住')
                suf = ('分行', '公司')

                for p in pre:
                    if wi.real_word.startswith(p):
                        wi.real_word = wi.real_word.replace(p, '')
                        break
                for s in suf:
                    idx = wi.real_word.find(s)
                    if idx != -1:
                        idx += len(s)
                        wi.real_word = wi.real_word[0:idx]
                        break

        self.words_info = list(words_info)
Пример #7
0
 def func():
     # 等待tags和alias加载完毕
     while not tags or not all_alias:
         time.sleep(0.1)
     cmd_trie, tag_trie = Trie(), Trie()
     cmd_trie.add(cfg.cmds)
     cmd_trie.add(all_alias['cmd'].keys())
     tag_trie.add(tags.keys())
     global tries
     tries = {'cmd': cmd_trie, 'tag': tag_trie}
Пример #8
0
    def test(self):
        trie = Trie()
        assert trie.size == 0
        assert repr(trie) == "Trie({})"
        assert trie.words == []

        trie.add('cat')
        assert trie.size == 1
        assert repr(trie) == "Trie({'c': {'a': {'t': {'end': True}}}})"
        assert trie.words == ['cat']

        trie.add('cat')
        assert trie.size == 1
        assert trie.words == ['cat']

        assert 'cat' in trie
        assert 'ca' not in trie
        assert 'catalog' not in trie

        trie.add('dog')
        assert trie.size == 2
        assert trie.words == ['cat', 'dog']

        assert 'dog' in trie
        assert trie.is_prefix('ca') is True

        trie.add('deer')
        trie.add('pan')
        trie.add('panda')
        assert trie.size == 5
        assert trie.words == ['cat', 'dog', 'deer', 'pan', 'panda']
        assert 'd' not in trie
        assert 'pan' in trie
        assert 'pand' not in trie

        assert trie.is_prefix('d') is True
        assert trie.is_prefix('do') is True
        assert trie.is_prefix('pa') is True
        assert trie.is_prefix('pan') is True
        assert trie.is_prefix('pand') is True
        assert trie.is_prefix('panda') is True
        assert trie.is_prefix('da') is False
        assert trie.is_prefix('pana') is False

        trie.add('do')
        assert trie.prefix_words('pa') == ['pan', 'panda']
        assert trie.prefix_words('d') == ['do', 'dog', 'deer']
        assert trie.prefix_words('do') == ['do', 'dog']
        assert trie.prefix_words('cat') == ['cat']
        assert trie.prefix_words('pap') == []

        trie = Trie(['cat', 'dog', 'do', 'deer', 'pan', 'panda'])
        print('\n', trie)
Пример #9
0
    def test_init(self):
        t1 = Trie()
        base1 = t1.get_base_node()
        self.assertFalse(base1.isend)

        t2 = Trie('testfile1.txt')
        # contains python, javascript, java
        base2 = t2.get_base_node()
        self.assertFalse(base2.isend)
        self.assertEqual(base2.children[3], None)
        self.assertTrue(
            isinstance(base2.children[ord('j') - ord('a')], TrieNode))
        self.assertTrue(
            isinstance(base2.children[ord('p') - ord('a')], TrieNode))
Пример #10
0
def createTrie(dictionary_file):
	with open(dictionary_file, 'r') as fin:
		content = fin.readlines()

	trie = Trie()
	for line in content:
		head = trie
		word = line.rstrip()
		for letter in word:
			if not head[letter]:
				head.addBranch(Trie(letter))
			head = head[letter]
		head.setWordEnd()

	return trie
Пример #11
0
 def __init__(self, db, chainid, height, apphash):
     self.db = db
     self.chain_id = chainid
     self.last_block_height = height
     self.last_block_hash = apphash
     self.storage = StateTrie(Trie(self.db, apphash))
     """
Пример #12
0
    def to_dict(self):
        state = self.state.to_dict(True)
        nstate = {}
        for s in state:
            t = Trie('statedb', state[s][STORAGE_INDEX])
            o = [0] * ACCT_RLP_LENGTH
            o[NONCE_INDEX] = decode_int(state[s][NONCE_INDEX])
            o[BALANCE_INDEX] = decode_int(state[s][BALANCE_INDEX])
            o[CODE_INDEX] = state[s][CODE_INDEX]
            td = t.to_dict(True)
            o[STORAGE_INDEX] = {k: decode_int(td[k]) for k in td}
            nstate[s.encode('hex')] = o

        return {
            "number": self.number,
            "prevhash": self.prevhash,
            "uncles_root": self.uncles_root,
            "coinbase": self.coinbase,
            "state": nstate,
            "transactions_root": self.transactions_root,
            "difficulty": self.difficulty,
            "timestamp": self.timestamp,
            "extradata": self.extradata,
            "nonce": self.nonce
        }
Пример #13
0
def _save_trie(rsc_dir, entries):
    """
    트라이를 저장한다.
    Args:
        rsc_dir:  대상 리소스 디렉토리
        entries:  엔트리 리스트
    """
    trie = Trie()
    total_tag_nums = 0
    for entry in entries:
        val = total_tag_nums
        val += 1    # 인덱스는 0이 아니라 1부터 시작한다.
        val *= 2    # 어절 완전일치의 경우 짝수
        val += 1 if entry.is_pfx else 0    # 전망매칭 패턴의 경우 홀수
        trie.insert(entry.word, val)
        total_tag_nums += len(entry.tag_nums)
    trie.save(f'{rsc_dir}/preanal.tri')

    val_file = f'{rsc_dir}/preanal.val'
    with open(val_file, 'wb') as fout:
        fout.write(struct.pack('H', 0))    # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다.
        for idx, entry in enumerate(entries, start=1):
            logging.debug('%d: %s: %s: %s', idx, entry.word, entry.tag_outs, entry.tag_nums)
            fout.write(struct.pack('H' * len(entry.tag_nums), *entry.tag_nums))
    logging.info('value saved: %s', val_file)
    logging.info('total entries: %d', len(entries))
    logging.info('expected size: %d',
                 (sum([len(e.tag_nums) for e in entries])+1) * struct.Struct('H').size)
Пример #14
0
def test_should_autocomplete(client):
    trie = Trie()
    trie.insert_words(['Facebook', 'Facebook Lite', 'Faca'])
    api.trie = trie
    response = client.get(url_for('autocomplete'), query_string={'q': 'face'})
    assert response.status_code == 200
    assert response.json['options'] == ['Facebook', 'Facebook Lite']
Пример #15
0
 def test_grid_bigger_grid(self):
     words = 'you shall know a word by the company it keeps'.split()
     dictionary = Trie(words)
     nrow = ncol = 6
     grid = LetterGrid(''.join(words), nrow=nrow, ncol=ncol)
     self.assertCountEqual({'you', 'know', 'a', 'word', 'the', 'keeps'},
                           grid.find_words_from_dict(dictionary))
Пример #16
0
def find_words(board: List[List[str]], words: List[str]) -> List[str]:
    """
    Given a 2D board and a list of words from the dictionary, find all words in the board.

    :see https://leetcode.com/problems/word-search-ii/

    :param board:
    :param words:
    :return:
    """
    if not board or not words:
        return []

    trie = Trie()
    for word in words:
        trie.insert(word)

    exists = set()

    row, col = len(board), len(board[0])
    for ri in range(row):
        for ci in range(col):
            _dfs(board, ri, ci, '', trie, exists)

    return list(exists)
Пример #17
0
def build_receipt_proof(w3, txn_hash):
    receipt_trie = Trie(db={})
    receipt = w3.eth.getTransactionReceipt(txn_hash)
    block = w3.eth.getBlock(receipt.blockHash)
    for i, tr in enumerate(block.transactions):
        path = rlp.encode(i)
        sibling_receipt = w3.eth.getTransactionReceipt(tr.hex())
        value = get_rlp_receipt(sibling_receipt)
        receipt_trie.set(path, value)
        if i == receipt.transactionIndex:
            rlp_txn_receipt = value  # We are interested in this txn

    txn_path = rlp.encode(receipt.transactionIndex)
    parent_nodes = []
    t = receipt_trie
    parent_nodes.append(t.root_node)
    node = t.root_node
    nibs = nibbles.bytes_to_nibbles(txn_path)
    for nib in nibs:
        if len(node) == 2:  # Leaf node. We are done.
            break
        next_node = rlp.decode(t.db[node[nib]])
        parent_nodes.append(next_node)
        node = next_node

    rlp_parent_nodes = rlp.encode(parent_nodes)
    print('Calculated hash = %s' %
          HexBytes(w3.sha3(rlp.encode(t.root_node))).hex())
    print('Receipts root = %s' % HexBytes(block.receiptsRoot).hex())

    return rlp_txn_receipt, receipt.blockHash, txn_path, rlp_parent_nodes
Пример #18
0
    def from_corpus(cls, corpus):
        trie = Trie()
        number_of_docs = 0

        for docID, document in enumerate(corpus):
            number_of_docs += 1
            tokens = process(document.description)
            for index, token in enumerate(tokens):
                # create the term at which each rotation of the word will point
                term = Term(token, docID, index)
                token = token + "$"
                # rotate the word for the wildcard
                for i in token:
                    trie.insert(token, term)
                    token = token[1:] + token[0]

            if docID % 1000 == 0 and docID != 0:
                print(str(docID), end='...')
                # enable this to limit the dimension of the index for testing
                #if(docID % 20000 == 0):
                #    break

        idx = cls()
        idx._number_of_docs = number_of_docs
        idx._trie = trie
        return idx
Пример #19
0
    def test_largestCommonPrefix(self):
        string_list = ["hello", "hellow", "hel"]

        trie = Trie()

        for word in string_list:
            trie.build(word)

        longest, words = trie.wordsWihtLargestCommonPrefix()

        print(longest)
        print(words)

        self.assertEqual(longest == 5, True)

        result = ["hello", "hellow"]
        self.assertEqual(all([word in result for word in words]), True)

        string_list = ["ai", "aii", "aiii", "aiiii", "aiiiii", "aiiiiii"]

        for word in string_list:
            trie.build(word)

        longest, words = trie.wordsWihtLargestCommonPrefix()

        self.assertEqual(longest == 6, True)
        result = ["aiiiii", "aiiiiii"]

        print(words)
        self.assertEqual(all([word in result for word in words]), True)
Пример #20
0
def test_deprecated_trie():
    with pytest.warns(DeprecationWarning):
        trie = Trie(db={})

    trie[b'foo'] = b'bar'
    assert b'foo' in trie
    assert trie[b'foo'] == b'bar'
Пример #21
0
def main2():
    dictionary = ['geeks', 'uiz', 'quiz', 'gee', 'geek']
    root = Trie()
    for word in dictionary:
        root.insert(word)
    boggle = ['giz', 'uek', 'qse']
    find_words(boggle, root)
Пример #22
0
 def friendTrie(self):
     tr = Trie()
     users = list(self.graph.keys())
     for i in range(len(users)):
         tr.insert(users[i])
     pprint.pprint(tr.trie)
     return tr
Пример #23
0
def test_trie(actions, args, expecteds):
    trie = Trie()
    actuals = []
    for action, arg, expected in zip(actions, args, expecteds):
        actual = getattr(trie, action)(*arg)
        actuals.append(actual)
    assert actuals == expecteds
Пример #24
0
def main():
    dictionary = Trie('data/dictionary.txt')
    intro()
    replay = True
    ghost = Ghost(dictionary)

    while replay:
        print('-' * 60)
        print("Let's start!\nYou go first.")
        ghost.human_play()
        while True:
            ghost.computer_play()
            if ghost.check_result(ghost.curr_word, COMP):
                break

            print("------------------------")
            print("Your turn\nCurrent string:", ghost.curr_word)
            ghost.human_play()
            if ghost.check_result(ghost.curr_word, HUMAN):
                break

        # Enter nothing or anything starts with 'n' to quit.
        ans = input('Do you want to play another game? (Y/N) ').lower().strip()
        if not ans or ans[0] == 'n':
            print('Thanks for playing!')
            replay = False
        ghost.reset()
Пример #25
0
 def __init__(self):
     insurance_data = cur_dir + "/data/insurance_data.csv"
     w2v_data = cur_dir + "/data/w2v.txt"
     self.ins_kw = Trie()
     self.invertIndex = self._build_invertIndex(insurance_data)
     self.w2v = self._build_w2vec(w2v_data)
     self.ins2v = self._build_ins2vec(insurance_data)
Пример #26
0
 def set_trie(self, filename):
     """ sets up new trie object """
     data = self.read_file(filename)
     if isinstance(data, list):
         self.trie = Trie()
         for word in data:
             self.trie.insert(word)
def build_cost_trie(data_file_path: str) -> Trie:
    """
    Given path to data file will return a trie
    with cost of prefix

    Params:
    data_file_path: string of path to file with
    phone number prefix, cost data
    """
    # get number prefixes and costs from
    # lines = numbers_and_costs_from(data_file_path)

    trie = Trie()
    # iterates through list of phone number prefixes
    # inserting them into our trie
    # line contains cost and phone number

    stopwatch.mark("Tokenizing input")
    with open(data_file_path, 'r') as f:
        lines = (l.split(',') for l in f.readlines())

    stopwatch.mark("Building cost trie")
    for num, cost in lines:
        trie.insert(num[1:], float(cost))

    return trie
class TestMediumSet(unittest.TestCase):
    trie = Trie()
    with open("25000_words.txt") as f_hdl:
        words = [entry.replace("\n", "") for entry in f_hdl.readlines()]
    trie.add_words(words)

    def test_prefix(self):
        expected_result = {'matches': ['zygote'], 'next_chars': ['o']}
        self.assertEqual(self.trie.from_prefix("zyg"), expected_result)

    def test_wrong_prefix(self):
        expected_result = {'matches': [], 'next_chars': []}
        self.assertEqual(self.trie.from_prefix("zzzz"), expected_result)

    def test_nonstr_prefix(self):
        self.assertRaises(TypeError, self.trie.from_prefix, ["zyg", "z"])

    def test_none_prefix(self):
        self.assertRaises(ValueError, self.trie.from_prefix, None)

    def test_word_exists(self):
        self.assertTrue(self.trie.word_exists("ZYGOTE"))

    def test_word_exists_force_case(self):
        self.assertFalse(self.trie.word_exists("ZYGOTE", ignore_case=False))

    def test_nonstr_word_exists(self):
        self.assertRaises(TypeError, self.trie.word_exists, ["zyg", "z"])

    def test_none_word_exists(self):
        self.assertRaises(ValueError, self.trie.word_exists, None)
Пример #29
0
 def test_init(self):
     tree = Trie()
     assert tree.size == 0
     assert tree.is_empty() is True
     tree.insert('hello')
     tree.search('hello') == 'hello'
     tree.search('state') == None
Пример #30
0
 def test_1(self):
     newTree = Trie()
     newTree.insert("hello")
     self.assertTrue(newTree.search("hello"))
     self.assertTrue(newTree.startsWith("hel"))
     self.assertFalse(newTree.search("bye"))
     self.assertFalse(newTree.startsWith("b"))