def featureSelect(filenum): Dict={} counter=util.Counter() Dict=segment.fileToDict(filenum) for item in Dict.keys(): if item in CONSTDICT or len(item)<6: del(Dict[item]) else: counter[item]=Dict[item] return counter
def featureDictWord(labelNum,num): str=('../data/trainData/Data/class%d/'+'%d.txt') % (labelNum,num) print ("%d.txt") % num Dict={} counter=util.Counter() Dict=segment.fileToDict(str) for item in Dict.keys(): if item in CONSTDICT or len(item)<6: del(Dict[item]) else: counter[item]=1 #print "featureSelected featureDictWord has finished!" return counter
def featureDict(labelNum,num): str=('../data/trainData/Data/class%d/'+'%d.txt') % (labelNum,num) Dict={} counter=util.Counter() Dict=segment.fileToDict(str) for item in Dict.keys(): if item in CONSTDICT or len(item)<6: del(Dict[item]) else: counter[item]=Dict[item] #print sort=sorted(Dict.items(),key=lambda e:e[1],reverse=True) #print json.dumps(sort,encoding='UTF-8',ensure_ascii=False) #print "length of sort",len(sort) return counter
#coding=utf-8 import segment import json """ test def file to dict """ d={} d=segment.fileToDict('../data/test.txt') #for item in d: # print type(item) # print item,d[item] print json.dumps(d,encoding='UTF-8',ensure_ascii=False)