Python LidstoneProbDist.samples示例

编程语言: Python

命名空间/包名称: nltk.probability

类/类型: LidstoneProbDist

方法/功能: samples

hotexamples.com的示例: 2

Python LidstoneProbDist.samples - 已找到2个示例。这些是从开源项目中提取的最受好评的nltk.probability.LidstoneProbDist.samples现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

LidstoneProbDist(30)

__init__(2)

logprob(2)

prob(2)

freqdist(1)

samples(1)

示例#1

显示文件

文件： featureDesign.py 项目： chrissly31415/amimanera

def lidstoneProbDist(olddf):
    """
    Use nltk to create probdist
    """
    #http://www.inf.ed.ac.uk/teaching/courses/icl/nltk/probability.pdf
    #https://github.com/tuzzeg/detect_insults/blob/master/README.md
    print "Creating LidStone Probdist...",nltk.__version__
    tutto=[]
    
    #olddf = olddf.ix[random.sample(olddf.index, 10)]
    olddf=pd.DataFrame(olddf['body'])
    
    print type(olddf)
    for ind in olddf.index:
	  print ind
	  row=[]
	  row.append(ind)
	  text=olddf.ix[ind,'body']
	  tokens=word_tokenize(text)
	  #print tokens
	  
	  t_fd = FreqDist(tokens)
	  pdist = LidstoneProbDist(t_fd,0.1)
	  print pdist.samples()
	  #for tok in tokens:
	  #    print pdist[3][tok]
	  #t_fd.plot(cumulative=False)
	  raw_input("HITKEY")
	  row=tokens
	  #print tagged
	  #print len(tagged)

	  tutto.append(row)
    newdf=pd.DataFrame(tutto).set_index(0)
    newdf.columns=taglist
    print newdf.head(20)
    print newdf.describe()
    newdf.to_csv("../stumbled_upon/data/lidstone.csv")

示例#2

显示文件

def lidstoneProbDist(olddf):
    """
    Use nltk to create probdist
    """
    #http://www.inf.ed.ac.uk/teaching/courses/icl/nltk/probability.pdf
    #https://github.com/tuzzeg/detect_insults/blob/master/README.md
    print("Creating LidStone Probdist...", nltk.__version__)
    tutto = []

    #olddf = olddf.ix[random.sample(olddf.index, 10)]
    olddf = pd.DataFrame(olddf['body'])

    print(type(olddf))
    for ind in olddf.index:
        print(ind)
        row = []
        row.append(ind)
        text = olddf.ix[ind, 'body']
        tokens = word_tokenize(text)
        #print tokens

        t_fd = FreqDist(tokens)
        pdist = LidstoneProbDist(t_fd, 0.1)
        print(pdist.samples())
        #for tok in tokens:
        #    print pdist[3][tok]
        #t_fd.plot(cumulative=False)
        input("HITKEY")
        row = tokens
        #print tagged
        #print len(tagged)

        tutto.append(row)
    newdf = pd.DataFrame(tutto).set_index(0)
    newdf.columns = taglist
    print(newdf.head(20))
    print(newdf.describe())
    newdf.to_csv("../stumbled_upon/data/lidstone.csv")