Exemplo n.º 1
0
 def chunked_paras(self, fileids=None, categories=None):
     return ChunkedCorpusReader.chunked_paras(
         self, self._resolve(fileids, categories))
Exemplo n.º 2
0
########## CHUNKED CORPUS READER ###############

###Implementing CCR
from nltk.corpus.reader import ChunkedCorpusReader
root="C:\\Users\\Matrix\\AppData\\Roaming\\nltk_data\\corpora\\cookbook\\"

reader=ChunkedCorpusReader(root,r'.*\.chunk')
#Each chunk-represented in braces is considered as a word
print reader.chunked_words()
#Each sentence will be included in a Tree()
print reader.chunked_sents()
print reader.chunked_paras()

#Getting tagged tokens for each chunk (each chunk is a word but each word is not a chunk)
print reader.chunked_words()[0].leaves()
print reader.chunked_sents()[1].leaves()
#Cant apply leaves directly to a para - but we can access a sentence of a given para.
print reader.chunked_para()[0][0].leaves()
###Implementing CCCR
from nltk.corpus.reader import ConllChunkCorpusReader
root="C:\\Users\\Matrix\\AppData\\Roaming\\nltk_data\\corpora\\cookbook\\"

reader=ConllChunkCorpusReader(root,r'.*\.iob',('NP','VP'.'PP'))
print reader.chunked_words()
print reader.chunked_sents()
print reader.iob_words()
print reader.iob_sents()
	def chunked_paras(self, fileids=None, categories=None):
		return ChunkedCorpusReader.chunked_paras(
			self, self._resolve(fileids, categories))
Exemplo n.º 4
0
from nltk.corpus.reader import ChunkedCorpusReader
from nltk.tokenize import SpaceTokenizer
import nltk

d = nltk.data.find('corpora/cookbook')
reader = ChunkedCorpusReader(d, r'.*\.chunk')
print(reader.chunked_words())
print(reader.chunked_sents())
print(reader.chunked_paras())

# reader.chunked_sents()[0].draw()
print(reader.chunked_sents()[0].leaves())