Python PlaintextCorpusReader.concordance примеры использования

Язык программирования: Python

Пространство имен/Пакет: nltk.corpus

Класс/Тип: PlaintextCorpusReader

Метод/Функция: concordance

Примеров на hotexamples.com: 7

Python PlaintextCorpusReader.concordance - 7 примеров найдено. Это лучшие примеры Python кода для nltk.corpus.PlaintextCorpusReader.concordance, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PlaintextCorpusReader(30)

fileids(30)

sents(30)

raw(30)

words(30)

paras(7)

open(4)

concordance(4)

_encoding(2)

similar(2)

collocations(1)

decode(1)

dispersion_plot(1)

Sents(1)

categories(1)

abspath(1)

split(1)

tokenized(1)

encode(1)

Пример #1

Показать файл

Файл: PrintTexts.py Проект: andreaantenan/NLTKbible

fdist1.plot(20, cumulative=False)

#Print list of all of frequently used words
top = []

print "\nMost Frequent Terms"
for (key, value) in sorted(fdist1.items(), key=operator.itemgetter(0)):
    if value > 2:
        top.append(key)
        print key, ":", value

#concordance
print "\nConfiguring concordance of most frequently used words"
for word in top:
    print " "
    print file.concordance(word, 150, lines=all)

#similar

print "\nWords similar to most used words throughout book:"
for word in top:
    print word, ":", file2.similar(word)
    print " "

#Dispersion plots of top10 and collocations#
print "\nProcessing dispersion plot of ten most common words..."
print file.dispersion_plot(top)

sys.exit()

Пример #2

Показать файл

Файл: bible.py Проект: andreaantenan/NLTKbible

for file in files:
    if file.endswith(".txt"):
        print file

file_name = raw_input("Choose the file:")

print "The file that was chosen is {0}".format(file_name)

from nltk.corpus import PlaintextCorpusReader
corpus_root = "."  # "." means the existing directory I am in
search_text = PlaintextCorpusReader(corpus_root, file_name)
search_text = nltk.Text(search_text.words())  #creates text object

keyword = raw_input("Specify word to search:")

search_text.concordance(keyword, 80, lines=30)

##NEW THING##
from nltk.corpus import PlaintextCorpusReader
corpus_root = '.'
search_text = PlaintextCorpusReader(corpus_root, file_name)
search_text = nltk.Text(search_text.words())

#
from nltk.corpus import stopwords
## path is andreaantenan/Desktop/cs195/nltk_data/corpora/stopwords/english.txt
stopwords = nltk.corpus.stopwords.words('bible.txt')

search_text = [word for word in search_text if word.lower() not in stopwords]

#frequency distribution vocabulary list; fd is a dictionary#

Пример #3

Показать файл

words = text.words('e961024.htm')
words = list(words)  #Convertir a lista de palabras

#Own corpus way 2
import nltk

path = '/Users/27AGO2019/Desktop/AbiiSnn/GitHub/Natural-Language-Processing/corpus/e961024.htm'
f = open(path, encoding='utf-8')  #Cod: utf-8, latin-1
text_string = f.read()
f.close()

tokens = nltk.word_tokenize(text_string)
text = nltk.Text(tokens)
print(text[:100])  #Separo los simbolos

text.concordance('actividad')
text.similar('actividad')

print(type(text))
print(len(text))

# HTML
from bs4 import BeautifulSoup

soup = BeautifulSoup(text_string, 'lxml')
text = soup.get_text()
type(text)

tokens = nltk.word_tokenize(text)
tokens

Пример #4

Показать файл

Файл: web.py Проект: bandstra/web

#file_name = sys.argv[1]
#search_word = sys.argv[2]

file_name = raw_input("\nChoose one of these files: ")

print "\nThe file that will be examined is {0}".format(file_name)

from nltk.corpus import PlaintextCorpusReader
corpus_root = '.'
search_text = PlaintextCorpusReader(corpus_root, file_name)
search_text = nltk.Text(search_text.words())

# KWIC concordance
search_word = raw_input(
    "Specify a search word for a keyword in context concordance list: ")
search_text.concordance(search_word, 80, lines=1000)

# Apply stopwords to search_text
from nltk.corpus import stopwords
stopwords = nltk.corpus.stopwords.words('bible')
#/Users/barrybandstra/nltk_data/corpora/stopwords
search_text = [word for word in search_text if word.lower() not in stopwords]

# Write search to output.txt file"
output_file = open("output.txt", "w")
for line in search_text:
    output_file.write(line), "\n"
output_file.close()

# Frequency distribution vocabulary list; fd is a dictionary
fd = nltk.FreqDist(search_text)

Пример #5

Показать файл

Файл: january_26_2016.py Проект: rharriso/nltk-workspace

wordlists = PlaintextCorpusReader(curr_dir, '/ASOIAF/*.txt')
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', *.txt')
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', '*.txt')
curr_dir = os.system('ls '+curr_dir+'/ASOIAF/')
os.system('ls '+curr_dir+'/ASOIAF/')
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', '*.txt')
os.system("ls "+curr_dir)
os.system("ls "+curr_dir.str())
curr_dir
os.path.dirname(os.path.realpath(__file__))
os.getcwd()
curr_dir = os.getcwd()
os.system("ls "+curr_dir)
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', '*.txt')
os.system("ls "+curr_dir+'/ASOIAF/', '*.txt')
os.system("ls "+curr_dir+'/ASOIAF/')
os.system("ls "+curr_dir+'/ASOIAF/')
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', '*.txt')
wordlists = PlaintextCorpusReader(curr_dir+'/ASOIAF/', '.*\.txt')
wordlist
wordlists
wordlists.words()
wordlists.concordance("Arya")
wordlists.fileids()
#
# Can also import bracket parse corpora (penn tree bank)
get_ipython().magic('save -f january_26_2016.py 0 - *')
get_ipython().magic('save -f january_26_2016.py')
get_ipython().magic('save -f january_23_2016.py 0-*')
get_ipython().magic('save january_26_2016.py 0-1000000')

Пример #6

Показать файл

Файл: web.py Проект: bandstra/web

#file_name = sys.argv[1]
#search_word = sys.argv[2]

file_name = raw_input("\nChoose one of these files: ")

print "\nThe file that will be examined is {0}".format(file_name)

from nltk.corpus import PlaintextCorpusReader
corpus_root = '.'
search_text = PlaintextCorpusReader(corpus_root,file_name)
search_text = nltk.Text(search_text.words())


# KWIC concordance
search_word = raw_input("Specify a search word for a keyword in context concordance list: ")
search_text.concordance(search_word,80,lines=1000)

# Apply stopwords to search_text
from nltk.corpus import stopwords
stopwords = nltk.corpus.stopwords.words('bible')
#/Users/barrybandstra/nltk_data/corpora/stopwords
search_text = [word for word in search_text if word.lower() not in stopwords]

# Write search to output.txt file"
output_file = open("output.txt", "w")
for line in search_text:
	output_file.write(line),"\n"
output_file.close()

# Frequency distribution vocabulary list; fd is a dictionary
fd = nltk.FreqDist(search_text)

Пример #7

Показать файл

Файл: bible.py Проект: bandstra/web

import nltk
import os

# Retrieve a file list
files = os.listdir('.')
print "All the files in the directory:"
for file in files:
	if file.endswith('.txt'):
		print file

file_name = raw_input("Choose a file: ")

print "The file that was chosen is {0}".format(file_name)

from nltk.corpus import PlaintextCorpusReader
corpus_root = "."
search_text = PlaintextCorpusReader(corpus_root,file_name)
search_text = nltk.Text(search_text.words())

keyword = raw_input("Specify a search term: ")

search_text.concordance(keyword,80,lines=30)