Пример #1
0
def cmmseg2_seg(w, encoding='utf8'):
    segs = []
    if type(w) != unicode:
        w = w.decode(encoding)
    for x in cmmseg.segment(w.encode('utf8')):
        try:
            x = x.decode("utf8")
            segs.append(x)
        except:
            pass
    return segs
Пример #2
0
def cmmseg2_seg(w, encoding="utf8"):
    segs = []
    if type(w) != unicode:
        w = w.decode(encoding)
    for x in cmmseg.segment(w.encode("utf8")):
        try:
            x = x.decode("utf8")
            segs.append(x)
        except:
            pass
    return segs
Пример #3
0
# -*- coding: utf-8 -*-

import cmmseg
#cmmseg.init('F:\\deps\\mmseg\\src\\win32')
seg = cmmseg.MMSeg('\\deps\\mmseg\\src\\win32')
rs = cmmseg.segment((u'中文分词').encode('utf-8'))
for i in rs:
    print i.decode('utf-8')