reader.tagged_paras() ''' different Tokenizer - works? ''' from nltk.tokenize import SpaceTokenizer reader = TaggedCorpusReader(path + '/corpora/cookbook/', r'.*\.pos',word_tokenizer=SpaceTokenizer()) reader.words() ''' different Sentence Tokenizer ''' from nltk.tokenize import LineTokenizer reader = TaggedCorpusReader(path + '/corpora/cookbook/', r'.*\.pos', sent_tokenizer=LineTokenizer()) reader.sents() ''' chunked Corpus Reader ''' from nltk.corpus.reader import ChunkedCorpusReader reader = ChunkedCorpusReader(path + '/corpora/cookbook/', r'.*\.chunk') reader.chunked_words() reader.chunked_sents() reader.chunked_paras() ''' draw tree ''' reader.chunked_sents()[0].draw() ''' get leaves ''' reader.chunked_words()[0].leaves() reader.chunked_sents()[0].leaves() reader.chunked_paras()[0][0].leaves() ''' categorized corpus ''' from nltk.corpus import brown brown.categories()