Python similar示例，nltk.book.text1.similar Python示例

示例#1

0

显示文件

from nltk.book import text1

text1.concordance(
    "monstrous")  # Show a concordance view of a word with its context

text1.similar("monstrous")  # Show words that appear in similar context

text1.common_contexts(["monstrous", "very"
                       ])  # Examine the context shared by two or more words

print len(text1)  # Count number of words and punctuations

print len(set(text1))  # Print vocab size of the text

print text3.count("smote")  # Print num word occurence

示例#2

0

显示文件

from collections import Counter
import nltk
from nltk.book import text1, text2

file = open('manifesto.txt', 'rU')
raw_manifesto = file.read().decode('utf-8')

tokens = nltk.word_tokenize(raw_manifesto)
text = nltk.Text(tokens)

print Counter(tokens)
distribution = nltk.FreqDist(text)
distribution.most_common(50)
distribution.plot()

text1.concordance("galactic")  # No galactic whales :'(

print "\n\n"
text1.similar("monstrous")

print "\n\n"
text2.similar("monstrous")

示例#3

0

显示文件

文件： 03_Searching_Counting.py 项目： amir-jafari/NLP

from nltk.book import text1
from nltk.book import text4
from nltk.book import text6

print(text1.concordance("monstrous"))
print(text1.similar("monstrous"))
print(text1.collocations())
text4.dispersion_plot(
    ["citizens", "democracy", "freedom", "duties", "America"])

print(text6.count("Very"))
print(text6.count('the') / float(len(text6)) * 100)
print(text4.count("bless"))
print(text4[100])
print(text4.index('the'))
print(text4[524])
print(text4.index('men'))
print(text4[0:len(text4)])

示例#4

0

显示文件

#!env python

from nltk.book import text1

# Every occurrence with context
print text1.concordance("monstrous")

print text1.similar("monstrous")

# S5

示例#5

0

显示文件

文件： nktk-base-demo.py 项目： yinyayun/python-demo

#coding:utf-8
import nltk as nk
from nltk.book import text1 as t1
from nltk.book import text4 as t4
print '================================='
'''
下载测试数据
'''
# nltk.download()

print '===============查找关键词=================='
t1.concordance("america")

print '===============查找相似上下文==============='
t1.similar("america")

print '=============共同的语法结构================='
t1.common_contexts(['in', 'of'])

print '=================词汇分布图================='
t4.dispersion_plot(['citizens', 'democaracy', 'freedom', 'america'])

print '=================统计最常出现的词================'
freList = nk.FreqDist(t1)
freList.plot(50, cumulative=False)

print '=================统计长度超过15的词==============='
v = set(t1)
long_words = filter(lambda x: len(x) > 15, v)[:10]
print long_words

示例#6

0

显示文件

文件： NLTKChapter1.py 项目： Ran-Dou/Natural-Language-Processing-with-Python

#/Users/randou/Esther/Brandeis/2019 Fall/LING131A NLP/Exercises
# -*- coding: utf-8 -*-

import nltk
nltk.download()
from nltk.book import *
from nltk.book import text1
import pandas as pd

# =============================================================================
# 1.3 Searching Text
# =============================================================================

text1.concordance('monstrous')  #appearance of a word
text1.similar('monstrous')  #words used in the similar context
text1.common_contexts(['monstrous', 'mystifying'])
text1.dispersion_plot(['love', 'peace', 'luck', 'fortune'])
text1.generate()

# =============================================================================
# 1.4 Counting Vocabulary
# =============================================================================

len(text1)
len(sorted(set(text1)))
len(set(text1)) / len(text1)  # lexical richness
text1.count('love')


def lexical_diversity(text):
    return len(set(text)) / len(text)

示例#7

0

显示文件

文件： 1.Basics.py 项目： castillosebastian/NLP_Intro

print("hello world")
import nltk
nltk.download('book')
from nltk.book import text1
text1 = nltk.book.text1
text1.concordance('monstrous')  # busca concordancias en el text1
text1.similar('monstrous')
text2 = nltk.book.text2
text2.similar('monstrous')
text2.common_contexts(["monstrous", "very"])
text1.common_contexts(["monstrous", "whale"])
nltk.book.text4.dispersion_plot(
    ["citizens", "democracy", "freedom", "duties", "America"])
nltk.book.text3.generate()

# Counting Vocabulary
len(text1)
len(text2)

# Tokens (individual unit of text) and Vocabulary (distinct unit)
len(nltk.book.text3)  # count tokens
len(set(nltk.book.text3))  # vocabulary

#   lexical richnes of a text
len(set(text2)) / len(text2)


# Function
def lexical_diversity(text):
    return len(set(text)) / len(text)

示例#8

0

显示文件

文件： nltk_test.py 项目： iyuanfang0105/nlp_learning

from nltk.book import text1 as moby_dick

# nltk.download()

moby_dick.similar("ship")

示例#9

0

显示文件

文件： e1.py 项目： blang/textmining

#!env python

from nltk.book import text1


# Every occurrence with context
print text1.concordance("monstrous")

print text1.similar("monstrous")

# S5