def demo(**kwargs): import nltk from nltk_contrib.coref import NLTK_COREF_DATA from nltk_contrib.coref.muc import muc6_documents, muc7_documents from nltk_contrib.coref.muc import MUCCorpusReader nltk.data.path.insert(0, NLTK_COREF_DATA) muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents) for sent in muc6.iob_sents()[:]: for word in sent: print word print print for sent in muc6.mentions(depth=None): for mention in sent: print mention if sent: print print muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents) for sent in muc7.iob_sents()[:]: for word in sent: print word print print for sent in muc7.mentions(depth=None): for mention in sent: print mention if sent: print print
def demo(**kwargs): import nltk from nltk_contrib.coref import NLTK_COREF_DATA from nltk_contrib.coref.muc import muc6_documents, muc7_documents from nltk_contrib.coref.muc import MUCCorpusReader nltk.data.path.insert(0, NLTK_COREF_DATA) muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents) for sent in muc6.iob_sents()[:]: for word in sent: print word print print for sent in muc6.mentions(depth=None): for mention in sent: print mention if sent: print print muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents) for sent in muc7.iob_sents()[:]: for word in sent: print word print print for sent in muc7.mentions(depth=None): for mention in sent: print mention if sent: print print
treebank_test_sequence = treebank_test.tagged_sents() treebank_estimator = LidstoneProbDistFactory model = train_model(HiddenMarkovModelTagger, treebank_train_sequence, treebank_test_sequence, options.model_file, options.num_train_sents, options.num_test_sents, estimator=treebank_estimator, verbose=options.verbose) elif options.train_chunker: conll2k_train = LazyCorpusLoader( 'conll2000', ConllChunkCorpusReader, ['train.txt'], ('NP','VP','PP')) conll2k_train_sequence = conll2k_train.iob_sents() conll2k_test = LazyCorpusLoader( 'conll2000', ConllChunkCorpusReader, ['test.txt'], ('NP','VP','PP')) conll2k_test_sequence = conll2k_test.iob_sents() conll2k_estimator = LidstoneProbDistFactory conll2k_transform = ClosedCategoryChunkTransform(TREEBANK_CLOSED_CATS) model = train_model(HiddenMarkovModelChunkTagger, conll2k_train_sequence, conll2k_test_sequence, options.model_file, options.num_train_sents, options.num_test_sents, estimator=conll2k_estimator, transform=conll2k_transform, verbose=options.verbose)