Python SEG.set示例

编程语言: Python

命名空间/包名称: smallseg

类/类型: SEG

方法/功能: set

hotexamples.com的示例: 4

Python SEG.set - 已找到4个示例。这些是从开源项目中提取的最受好评的smallseg.SEG.set现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

cut(6)

SEG(5)

set(2)

示例#1

显示文件

def get_data():
    '''
	Get the training and text datasets from local folds
	Positive and negative datasets were stored in different folds
	When loading the datasets , do sentences segmentation with smallseg tool
	'''
    posPath = '/home/zhouxc/skindetector/AdultWebsiteText/'
    negPath = '/home/zhouxc/skindetector/NormalWebsiteText/'
    posFiles = os.listdir(posPath)
    negFiles = os.listdir(negPath)

    trainingData = []
    seg = SEG()
    seg.set(dic)
    c = 0
    print '---------------------Read Positive DataSet-----------------'
    for fileName in posFiles:
        #if c > 100: break
        c += 1
        print "PositiveData" + str(c)
        path = posPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Positive'))
    print '---------------------Positive DataSet done-----------------'
    c = 0

    print '---------------------Read Negative DataSet-----------------'
    for fileName in negFiles:
        #if c > 100:	break
        c += 1
        print "NegativeData" + str(c)
        path = negPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Negative'))
    print '--------Negative DataSet  done-----------------------------------'

    return trainingData, trainingData

示例#2

显示文件

文件： textClassifier.py 项目： GarfieldEr007/Adult-WebSite-Classifier

def get_data():
	'''
	Get the training and text datasets from local folds
	Positive and negative datasets were stored in different folds
	When loading the datasets , do sentences segmentation with smallseg tool
	'''
	posPath = '/home/zhouxc/skindetector/AdultWebsiteText/'
	negPath = '/home/zhouxc/skindetector/NormalWebsiteText/'
	posFiles = os.listdir(posPath)
	negFiles = os.listdir(negPath)

	trainingData = []
	seg = SEG()
	seg.set(dic)
	c = 0
	print '---------------------Read Positive DataSet-----------------'
	for fileName in posFiles:
		#if c > 100: break
		c += 1
		print "PositiveData" + str(c)
		path = posPath + fileName
		data = seg.cut(open(path).read())
		text = [word.encode('utf-8') for word in data if word.encode('utf-8') in pornDict]
		trainingData.append((text , 'Positive'))
	print '---------------------Positive DataSet done-----------------'
	c = 0
	
	print '---------------------Read Negative DataSet-----------------'
	for fileName in negFiles:
		#if c > 100:	break
		c += 1
		print "NegativeData" + str(c)
		path = negPath + fileName
		data = seg.cut(open(path).read())
		text = [word.encode('utf-8') for word in data if word.encode('utf-8')  in pornDict]
		trainingData.append((text , 'Negative'))
	print '--------Negative DataSet  done-----------------------------------'
	
	return trainingData  , trainingData

示例#3

显示文件

文件： pirrerank.py 项目： swapnadesale/pmir

                    + str(i) \
                    + '''')" href="'''\
                    + url \
                    + '''" target="_blank"><font size="3">''' \
                    + arrowscript \
                    + title \
                    + '''</font></a><br /><font size="-1">''' \
                    + snippet \
                    + '''<br /><font color="#008000">''' \
                    + url \
                    + '''<br /></font></font></td></tr></table>\n'''
        pageStr += resultStr
        i += 1

    return pageStr


if __name__ == '__main__':
    #resultsList = ["我是中国人民的儿子", "你是我儿子", "中国人民万岁", "我永远是中国人民的儿子"]
    seg = SEG()
    #print 'Load dict...'
    words = "main.dic"
    seg.set(words)
    #print "Dict is OK."

    #print psudorerank(resultsList, 2)
    username = "******"
    engine = request.GET.get("engine", "")
    resultsTable = ResultInfoTable[engine]
    [query, pagecontent] = userFeedbackRerank(username, resultsTable, seg)

示例#4

显示文件

文件： benchmark.py 项目： Nuos/lab

#encoding=utf-8
try:
    import psyco
    psyco.full()
except:
    pass

s3 = file("text.txt").read()
words = [x.rstrip() for x in file("main.dic") ]
from smallseg import SEG
seg = SEG()
print 'Load dict...'
seg.set(words)
print "Dict is OK."
from time import time

for i in xrange(1,101):
    start = time()
    for j in xrange(0,i):
        A = seg.cut(s3)
    cost = time()-start
    print i,"times, cost:",cost

print "********************************"