# -*- coding: utf-8 -*- import sys sys.path[0:0] = ["build/lib.linux-i686-2.6/"] import cmmseg #cmmseg.init('F:\\deps\\mmseg\\src\\win32') cmmseg.init('/usr/local/coreseek/dict/') rs = cmmseg.segment((u'中文分词').encode('utf-8')) for i in rs: print i.decode('utf-8')
def init_cmmseg_dict(dict_path): logger.info("init cmmseg %s" % dict_path) try: cmmseg.instance(dict_path) except: cmmseg.init(dict_path)
def __init__(self, dict_etc_path="/usr/local/coreseek/dict"): cmmseg.init(dict_etc_path) Tokenizer.__init__(self, cmmseg.segment)
# -*- coding: utf-8 -*- import cmmseg cmmseg.init('F:\\deps\\mmseg\\src\\win32') rs = cmmseg.segment((u'中文分词').encode('utf-8')) for i in rs: print i.decode('utf-8')