Python ConditionalFreqDist.plot示例

编程语言: Python

命名空间/包名称: nltk.probability

方法/功能: plot

hotexamples.com的示例: 3

Python ConditionalFreqDist.plot - 已找到3个示例。这些是从开源项目中提取的最受好评的nltk.probability.ConditionalFreqDist.plot现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

ConditionalFreqDist(30)

conditions(22)

keys(9)

N(7)

B(4)

items(3)

plot(3)

tabulate(3)

__init__(2)

__add__(1)

_fdists(1)

get(1)

iteritems(1)

示例#1

显示文件

A = set(allwords)
longwords = [w for w in A if len(w) > 12]  #单词长度>12的所有单词
print(sorted(longwords))

from nltk.probability import FreqDist, ConditionalFreqDist
"""
FreqDist: 创建一个所给数据的频率分布
B(): 不同单词的个数
N(): 所有单词的个数
tabulate(20): 把前20组数据以表格的形式显示出来
fd2.plot(20,cumulative=True): 参数cumulative 对数据进行累计 
"""
fd2 = FreqDist([sx.lower() for sx in allwords if sx.isalpha()])
print("不同单词的个数：%d" % fd2.B())
print("所有单词的个数：%d" % fd2.N())
fd2.tabulate(20)  #把前20组数据 以表格的形式显示出来
fd2.plot(20)
fd2.plot(20, cumulative=True)
"""
freq('the')  #单词the出现的频率
ConditionalFreqDist( ): 条件频率统计的函数，研究类别之间的系统性的差异
"""
from nltk.corpus import inaugural
print(fd2.freq('the'))  #单词the出现的频率
cfd = ConditionalFreqDist((fileid, len(w)) for fileid in inaugural.fileids()
                          for w in inaugural.word(fileid)
                          if fileid > '1980' and fileid < '2010')
print(cfd.items())
cfd.plot()

示例#2

显示文件

文件： session-3.py 项目： datakid/nltk

from nltk.probability import ConditionalFreqDist
import matplotlib
% matplotlib inline
cfdist = ConditionalFreqDist()
for filename in os.listdir(corpus_path):
    text = open(os.path.join(corpus_path, filename)).read()
    #split text of file on 'end metadata'
    text = text.split("<!--end metadata-->")
    #parse metadata using previously defined function "parse_metadata"
    metadata = parse_metadata(text[0])
    #skip all speeches for which there is no exact date
    if metadata['Date'][0] == 'c':
        continue
    #build a frequency distribution graph by year, that is, take the final bit of the 'Date' string after '/'
    cfdist['count'][metadata['Date'].split('/')[-1]] += 1
cfdist.plot()

# <markdowncell>
# Now let's build another graph, but this time by the 'Description' field:

# <codecell>
cfdist2 = ConditionalFreqDist()
for filename in os.listdir(corpus_path):
    text = open(os.path.join(corpus_path, filename)).read()
    text = text.split("<!--end metadata-->")
    metadata = parse_metadata(text[0])
    if metadata['Date'][0] == 'c':
        continue
    cfdist2['count'][metadata['Description']] += 1
cfdist2.plot()

示例#3

显示文件

文件： NLP_PDF_Text_Analytics.py 项目： rishiganeshv/NLP-PDF-Text-Analytics

y_pos = np.arange(len(x))
plt.yticks(y_pos, x)
plt.title('Frequency Count of Top 20 Tri-Grams')
plt.ylabel('Frequent Words')
plt.xlabel('Count')
plt.show()
plt.savefig('Most Frequent 20 Tri-Grams.jpeg')

# ## 4. Word Length Distribution Plot

# #### This plot is word length on x-axis vs number of words of that length on the y-axis. This plot helps to visualise the composition of different word length in the text corpus.

from nltk.probability import ConditionalFreqDist

cfdist = ConditionalFreqDist((len(word), word) for word in allwords)
cfdist.plot()

# ## t-SNE Corpus Visualization

#  Visualizing document similarity is to use t-distributed stochastic neighbor embedding

from yellowbrick.text import TSNEVisualizer
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()

docs = tfidf.fit_transform(allwords)

# Create the visualizer and draw the vectors
tsne = TSNEVisualizer()
tsne.fit(docs)