word_dict_root=WordTree() fp=codecs.open('dict/word3.txt','r','utf8')## 来自国家语言委员会 print 'dict/word3.txt loaded' all_line=fp.readlines() fp.close() word_dict_root.BuildFindTree(all_line) word_dict_root.LoadWordType() word_dict_root.LoadWordFreqFile() word_dict_root.LoadHudongbaikeWords() word_dict_root.LoadXinHuaZhiDian() print 'dict loaded' return word_dict_root.LoadFinish2() #if worddict: # worddict.buildDict('data/dictdb','data/outdata','data/outindex',db_env_flag) def DumpGroupTree(): sqlcon=sqlite3.connect('../fetch_hudongbaike/data/group.db') sqlc=sqlcon.cursor() sqlc.execute('select word,parent_group from groupword') f=codecs.open('data/grouplist.txt','w','utf8') for word,parent_group in sqlc: if parent_group!=None: print >>f,'%s %s'%(word,parent_group) f.close() if __name__ == '__main__': print worddict2.version() wordlist=BuildDefaultWordDic() buildres=worddict2.buildDict('/app_data/chinese_decode/dbindex',wordlist) print 'build res:',buildres DumpGroupTree()
#-*-coding:utf-8-*- import re import string import codecs import pickle import gzip try: import ujson as json except Exception, e: import json try: import worddict2 as worddict ver = worddict.version() if ver[1] < 2: print 'worddict version error %d.%d.%d' % ver exit(0) except Exception, e: worddict = None class WordCell: freq = 0 type = None weight = 0 wordgroup = None class DbTree: dbenv = None
#-*-coding:utf-8-*- import re import string import codecs import pickle import gzip try: import ujson as json except Exception,e: import json try: import worddict2 as worddict ver=worddict.version() if ver[1]<2: print 'worddict version error %d.%d.%d'%ver exit(0) except Exception,e: worddict=None class WordCell: freq=0 type=None weight=0 wordgroup=None class DbTree: dbenv=None db=None cursor=None