示例#1
0
from nltk.book import text1

text1.concordance(
    "monstrous")  # Show a concordance view of a word with its context

text1.similar("monstrous")  # Show words that appear in similar context

text1.common_contexts(["monstrous", "very"
                       ])  # Examine the context shared by two or more words

print len(text1)  # Count number of words and punctuations

print len(set(text1))  # Print vocab size of the text

print text3.count("smote")  # Print num word occurence
示例#2
0
from collections import Counter
import nltk
from nltk.book import text1, text2

file = open('manifesto.txt', 'rU')
raw_manifesto = file.read().decode('utf-8')

tokens = nltk.word_tokenize(raw_manifesto)
text = nltk.Text(tokens)

print Counter(tokens)
distribution = nltk.FreqDist(text)
distribution.most_common(50)
distribution.plot()

text1.concordance("galactic")  # No galactic whales :'(

print "\n\n"
text1.similar("monstrous")

print "\n\n"
text2.similar("monstrous")
示例#3
0
from nltk.book import text1
from nltk.book import text4
from nltk.book import text6

print(text1.concordance("monstrous"))
print(text1.similar("monstrous"))
print(text1.collocations())
text4.dispersion_plot(
    ["citizens", "democracy", "freedom", "duties", "America"])

print(text6.count("Very"))
print(text6.count('the') / float(len(text6)) * 100)
print(text4.count("bless"))
print(text4[100])
print(text4.index('the'))
print(text4[524])
print(text4.index('men'))
print(text4[0:len(text4)])
示例#4
0
#!env python

from nltk.book import text1

# Every occurrence with context
print text1.concordance("monstrous")

print text1.similar("monstrous")

# S5
示例#5
0
#coding:utf-8
import nltk as nk
from nltk.book import text1 as t1
from nltk.book import text4 as t4
print '================================='
'''
下载测试数据
'''
# nltk.download()

print '===============查找关键词=================='
t1.concordance("america")

print '===============查找相似上下文==============='
t1.similar("america")

print '=============共同的语法结构================='
t1.common_contexts(['in', 'of'])

print '=================词汇分布图================='
t4.dispersion_plot(['citizens', 'democaracy', 'freedom', 'america'])

print '=================统计最常出现的词================'
freList = nk.FreqDist(t1)
freList.plot(50, cumulative=False)

print '=================统计长度超过15的词==============='
v = set(t1)
long_words = filter(lambda x: len(x) > 15, v)[:10]
print long_words
#/Users/randou/Esther/Brandeis/2019 Fall/LING131A NLP/Exercises
# -*- coding: utf-8 -*-

import nltk
nltk.download()
from nltk.book import *
from nltk.book import text1
import pandas as pd

# =============================================================================
# 1.3 Searching Text
# =============================================================================

text1.concordance('monstrous')  #appearance of a word
text1.similar('monstrous')  #words used in the similar context
text1.common_contexts(['monstrous', 'mystifying'])
text1.dispersion_plot(['love', 'peace', 'luck', 'fortune'])
text1.generate()

# =============================================================================
# 1.4 Counting Vocabulary
# =============================================================================

len(text1)
len(sorted(set(text1)))
len(set(text1)) / len(text1)  # lexical richness
text1.count('love')


def lexical_diversity(text):
    return len(set(text)) / len(text)
示例#7
0
print("hello world")
import nltk
nltk.download('book')
from nltk.book import text1
text1 = nltk.book.text1
text1.concordance('monstrous')  # busca concordancias en el text1
text1.similar('monstrous')
text2 = nltk.book.text2
text2.similar('monstrous')
text2.common_contexts(["monstrous", "very"])
text1.common_contexts(["monstrous", "whale"])
nltk.book.text4.dispersion_plot(
    ["citizens", "democracy", "freedom", "duties", "America"])
nltk.book.text3.generate()

# Counting Vocabulary
len(text1)
len(text2)

# Tokens (individual unit of text) and Vocabulary (distinct unit)
len(nltk.book.text3)  # count tokens
len(set(nltk.book.text3))  # vocabulary

#   lexical richnes of a text
len(set(text2)) / len(text2)


# Function
def lexical_diversity(text):
    return len(set(text)) / len(text)
示例#8
0
from nltk.book import text1 as moby_dick

# nltk.download()

moby_dick.similar("ship")
示例#9
0
文件: e1.py 项目: blang/textmining
#!env python

from nltk.book import text1


# Every occurrence with context
print text1.concordance("monstrous")

print text1.similar("monstrous")

# S5