Python cut示例

编程语言: Python

命名空间/包名称: word_cut.jieba

方法/功能: cut

hotexamples.com的示例: 4

Python cut - 已找到4个示例。这些是从开源项目中提取的最受好评的word_cut.jieba.cut现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： get_IDF.py 项目： haomingchan0811/iPIN

def IDF(inputFile = '../src/Pos_frequency'):
    IDFdict = {}        # record the IDF of every word
    infile = open(inputFile, 'r')
    outfile = open('IDF.out', 'w')
    end = 0
    while not end:
        line = infile.readline().strip().encode('utf-8')
        if line != '':
            print line
            line = clean(line)
            print line
            seg = jieba.cut(line)
            W = ''
            for i in seg:
                W += i + ' '
                if i not in IDFdict.keys():
                    IDFdict[i] = 1
                else:
                    IDFdict[i] += 1
            print W
        else:
            end = 1
    
    Len = len(IDFdict)                                                  #Total number of words
    for i in IDFdict.keys():
        IDFdict[i] = math.log(Len / IDFdict[i])                 # IDF of each word
        outfile.write('%s %s\n' % (i, IDFdict[i]))

    data = open('IDF.pk', 'w')            #serialize the IDF dictionary into pickle file
    pickle.dump(IDFdict, data)
    data.close()

示例#2

显示文件

def IDF(inputFile='../src/Pos_frequency'):
    IDFdict = {}  # record the IDF of every word
    infile = open(inputFile, 'r')
    outfile = open('IDF.out', 'w')
    end = 0
    while not end:
        line = infile.readline().strip().encode('utf-8')
        if line != '':
            print line
            line = clean(line)
            print line
            seg = jieba.cut(line)
            W = ''
            for i in seg:
                W += i + ' '
                if i not in IDFdict.keys():
                    IDFdict[i] = 1
                else:
                    IDFdict[i] += 1
            print W
        else:
            end = 1

    Len = len(IDFdict)  #Total number of words
    for i in IDFdict.keys():
        IDFdict[i] = math.log(Len / IDFdict[i])  # IDF of each word
        outfile.write('%s %s\n' % (i, IDFdict[i]))

    data = open('IDF.pk', 'w')  #serialize the IDF dictionary into pickle file
    pickle.dump(IDFdict, data)
    data.close()

示例#3

显示文件

def text2word(text):
    List = []
    label, jd = text.split('/x01')
    seg = jieba.cut(jd)
    for wd in seg:
        if wd not in symbol and wd != ' ':
            List.append(wd.encode('utf8'))
    return List

示例#4

显示文件

def segmentation(text):
    newText = ''
    text = text.strip()
#     text = text.translate(string.maketrans("",""), string.punctuation)      # remove the punctuation
    seg = jieba.cut(text)
    for wd in seg:
        newText += wd
        newText += ' '
    return newText.strip()