Exemplo n.º 1
0
def gen_zhengzhou_tree(dirname=myconfig.ZZ_STD_ADD,
                       sav_file=myconfig.zhengzhou_std_word,
                       sav_file_2=myconfig.zhengzhou_std_tree):
    addr_kv_rec = open("./addr_match.txt", 'w+')
    print('\n>gen_zhengzhou_tree start')
    #pdb.set_trace()
    my_tree = trie_tree.Trie()
    my_word = trie_tree.Trie()
    paths = os.walk(dirname)
    sum_lines = []
    cnt = 0
    for _, _, fs in paths:
        for f in fs:
            pth = os.path.join(dirname, str(f))
            lines = open(pth, 'r').readlines()
            np.random.shuffle(lines)
            #lines = open(pth,'r').readlines()[:myconfig.TRAIN_DATA]
            for line in lines:
                if not ',' in line:
                    continue
                _line = line.split(',')[1]
                line = utils.pre_trans(_line)
                addr_kv_rec.write('%s\t%s\n' % (str(line), str(_line)))
                cnt += 1
                if cnt % 10000 == 1:
                    print(cnt)
                my_tree.insert(line)
                my_word.insert(_line)
    utils.save_var(my_word, sav_file)
    utils.save_var(my_tree, sav_file_2)
    print('\n>my address tree save ok')
    addr_kv_rec.close()
Exemplo n.º 2
0
def test_format_df():
    logger.debug('\n> 树的合并')
    from function_ultra import utils
    mStandAddrTreeBuilder = StandAddrTreeBuilder()  # 实例
    my_tree = utils.read_var(myconfig.MY_TREE)
    mStandAddrTreeBuilder.format_tree(my_tree.root)
    utils.save_var(my_tree, myconfig.MY_TREE)
    pdb.set_trace()
    logger.debug('\n> 树的合并测试完成')
Exemplo n.º 3
0
def gen_address_tree(filename=myconfig.STDTXTPATH, sav_file=myconfig.MY_TREE):
    print('\n>gen_address_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    for sent in lines:
        my_tree.insert(sent)
    utils.save_var(my_tree, sav_file)
    print('\n>my address tree save ok')
    return my_tree
Exemplo n.º 4
0
def gen_std_tree_from_dataframe(data_src, sav_file=myconfig.MY_TREE):
    # 从dataframe创建标准地址树
    print('\n>gen_std_tree_from_dataframe start')
    my_tree = trie_tree.Trie()
    for item in data_src:
        clritem = remove_nan(item)
        print(clritem)
        pdb.set_trace()
        my_tree.part_insert(my_tree.root, clritem)
    utils.save_var(my_tree, sav_file)
    print('\n>gen_std_tree_from_dataframe ready and save finish')
    return myconfig.SUCCESS
Exemplo n.º 5
0
def gen_word_tree(filename=myconfig.STDTXTPATH, sav_file=myconfig.MY_WORD):
    print('\n>gen_address_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    print(len(lines))
    for sent in lines:
        words = sent.split('/')
        for word in words:
            my_tree.insert(word)
    utils.save_var(my_tree, sav_file)
    print('\n>my address tree save ok')
    return my_tree
Exemplo n.º 6
0
def gen_std_tree(filename=myconfig.STDTXTPATH,
                 sav_file=myconfig.MY_TREE,
                 delimeter='/'):
    print('\n>gen_std_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    for sent in lines:
        words = sent.split(delimeter)
        my_tree.insert(words)
    utils.save_var(my_tree, sav_file)
    print('\n>my std tree save ok')
    return my_tree
Exemplo n.º 7
0
 def first_init_DG(self):
     my_tree = utils.read_var(myconfig.MY_TREE)
     di = nx.DiGraph()
     my_tree.trans_tree_2_graph(my_tree.root,di)
     utils.save_var(di,myconfig.DIGRAPH) 
     return my_tree,utils.read_var(myconfig.DIGRAPH)