from nltk.book import text1 text1.concordance( "monstrous") # Show a concordance view of a word with its context text1.similar("monstrous") # Show words that appear in similar context text1.common_contexts(["monstrous", "very" ]) # Examine the context shared by two or more words print len(text1) # Count number of words and punctuations print len(set(text1)) # Print vocab size of the text print text3.count("smote") # Print num word occurence
from collections import Counter import nltk from nltk.book import text1, text2 file = open('manifesto.txt', 'rU') raw_manifesto = file.read().decode('utf-8') tokens = nltk.word_tokenize(raw_manifesto) text = nltk.Text(tokens) print Counter(tokens) distribution = nltk.FreqDist(text) distribution.most_common(50) distribution.plot() text1.concordance("galactic") # No galactic whales :'( print "\n\n" text1.similar("monstrous") print "\n\n" text2.similar("monstrous")
from nltk.book import text1 from nltk.book import text4 from nltk.book import text6 print(text1.concordance("monstrous")) print(text1.similar("monstrous")) print(text1.collocations()) text4.dispersion_plot( ["citizens", "democracy", "freedom", "duties", "America"]) print(text6.count("Very")) print(text6.count('the') / float(len(text6)) * 100) print(text4.count("bless")) print(text4[100]) print(text4.index('the')) print(text4[524]) print(text4.index('men')) print(text4[0:len(text4)])
#!env python from nltk.book import text1 # Every occurrence with context print text1.concordance("monstrous") print text1.similar("monstrous") # S5
#coding:utf-8 import nltk as nk from nltk.book import text1 as t1 from nltk.book import text4 as t4 print '=================================' ''' 下载测试数据 ''' # nltk.download() print '===============查找关键词==================' t1.concordance("america") print '===============查找相似上下文===============' t1.similar("america") print '=============共同的语法结构=================' t1.common_contexts(['in', 'of']) print '=================词汇分布图=================' t4.dispersion_plot(['citizens', 'democaracy', 'freedom', 'america']) print '=================统计最常出现的词================' freList = nk.FreqDist(t1) freList.plot(50, cumulative=False) print '=================统计长度超过15的词===============' v = set(t1) long_words = filter(lambda x: len(x) > 15, v)[:10] print long_words
#/Users/randou/Esther/Brandeis/2019 Fall/LING131A NLP/Exercises # -*- coding: utf-8 -*- import nltk nltk.download() from nltk.book import * from nltk.book import text1 import pandas as pd # ============================================================================= # 1.3 Searching Text # ============================================================================= text1.concordance('monstrous') #appearance of a word text1.similar('monstrous') #words used in the similar context text1.common_contexts(['monstrous', 'mystifying']) text1.dispersion_plot(['love', 'peace', 'luck', 'fortune']) text1.generate() # ============================================================================= # 1.4 Counting Vocabulary # ============================================================================= len(text1) len(sorted(set(text1))) len(set(text1)) / len(text1) # lexical richness text1.count('love') def lexical_diversity(text): return len(set(text)) / len(text)
print("hello world") import nltk nltk.download('book') from nltk.book import text1 text1 = nltk.book.text1 text1.concordance('monstrous') # busca concordancias en el text1 text1.similar('monstrous') text2 = nltk.book.text2 text2.similar('monstrous') text2.common_contexts(["monstrous", "very"]) text1.common_contexts(["monstrous", "whale"]) nltk.book.text4.dispersion_plot( ["citizens", "democracy", "freedom", "duties", "America"]) nltk.book.text3.generate() # Counting Vocabulary len(text1) len(text2) # Tokens (individual unit of text) and Vocabulary (distinct unit) len(nltk.book.text3) # count tokens len(set(nltk.book.text3)) # vocabulary # lexical richnes of a text len(set(text2)) / len(text2) # Function def lexical_diversity(text): return len(set(text)) / len(text)
from nltk.book import text1 as moby_dick # nltk.download() moby_dick.similar("ship")
#!env python from nltk.book import text1 # Every occurrence with context print text1.concordance("monstrous") print text1.similar("monstrous") # S5