Exemplo n.º 1
0
    word_dict_root=WordTree()
    fp=codecs.open('dict/word3.txt','r','utf8')## 来自国家语言委员会
    print 'dict/word3.txt loaded'
    all_line=fp.readlines()
    fp.close()
    word_dict_root.BuildFindTree(all_line)
    word_dict_root.LoadWordType()
    word_dict_root.LoadWordFreqFile()
    word_dict_root.LoadHudongbaikeWords()
    word_dict_root.LoadXinHuaZhiDian()
    print 'dict loaded'
    return word_dict_root.LoadFinish2()
    #if worddict:
    #    worddict.buildDict('data/dictdb','data/outdata','data/outindex',db_env_flag)

def DumpGroupTree():
    sqlcon=sqlite3.connect('../fetch_hudongbaike/data/group.db')
    sqlc=sqlcon.cursor()
    sqlc.execute('select word,parent_group from groupword')

    f=codecs.open('data/grouplist.txt','w','utf8')
    for word,parent_group in sqlc:
        if parent_group!=None:
            print >>f,'%s %s'%(word,parent_group)
    f.close()
if __name__ == '__main__':
    print worddict2.version()
    wordlist=BuildDefaultWordDic()
    buildres=worddict2.buildDict('/app_data/chinese_decode/dbindex',wordlist)
    print 'build res:',buildres
    DumpGroupTree()
Exemplo n.º 2
0
#-*-coding:utf-8-*-
import re
import string
import codecs
import pickle
import gzip

try:
    import ujson as json
except Exception, e:
    import json

try:
    import worddict2 as worddict
    ver = worddict.version()
    if ver[1] < 2:
        print 'worddict version error %d.%d.%d' % ver
        exit(0)
except Exception, e:
    worddict = None


class WordCell:
    freq = 0
    type = None
    weight = 0
    wordgroup = None


class DbTree:
    dbenv = None
Exemplo n.º 3
0
#-*-coding:utf-8-*-
import re
import string
import codecs
import pickle
import gzip

try:
    import ujson as json
except Exception,e:
    import json

try:
    import worddict2 as worddict
    ver=worddict.version()
    if ver[1]<2:
        print 'worddict version error %d.%d.%d'%ver
        exit(0)
except Exception,e:
    worddict=None

class WordCell:
    freq=0
    type=None
    weight=0
    wordgroup=None

class DbTree:
    dbenv=None
    db=None
    cursor=None