示例#1
0
def demo(**kwargs):
    import nltk
    from nltk_contrib.coref import NLTK_COREF_DATA    
    from nltk_contrib.coref.muc import muc6_documents, muc7_documents
    from nltk_contrib.coref.muc import MUCCorpusReader
    nltk.data.path.insert(0, NLTK_COREF_DATA)   
    muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents)
    for sent in muc6.iob_sents()[:]:
        for word in sent:
            print word
        print
    print
    for sent in muc6.mentions(depth=None):
        for mention in sent:
            print mention
        if sent: print
    print
    muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents)
    for sent in muc7.iob_sents()[:]:
        for word in sent:
            print word
        print
    print
    for sent in muc7.mentions(depth=None):
        for mention in sent:
            print mention
        if sent: print
    print
示例#2
0
def demo(**kwargs):
    import nltk
    from nltk_contrib.coref import NLTK_COREF_DATA
    from nltk_contrib.coref.muc import muc6_documents, muc7_documents
    from nltk_contrib.coref.muc import MUCCorpusReader
    nltk.data.path.insert(0, NLTK_COREF_DATA)
    muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents)
    for sent in muc6.iob_sents()[:]:
        for word in sent:
            print word
        print
    print
    for sent in muc6.mentions(depth=None):
        for mention in sent:
            print mention
        if sent: print
    print
    muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents)
    for sent in muc7.iob_sents()[:]:
        for word in sent:
            print word
        print
    print
    for sent in muc7.mentions(depth=None):
        for mention in sent:
            print mention
        if sent: print
    print
示例#3
0
        treebank_test_sequence = treebank_test.tagged_sents()
        treebank_estimator = LidstoneProbDistFactory
        model = train_model(HiddenMarkovModelTagger, 
                            treebank_train_sequence, 
                            treebank_test_sequence,
                            options.model_file, 
                            options.num_train_sents, 
                            options.num_test_sents,
                            estimator=treebank_estimator,
                            verbose=options.verbose)

    elif options.train_chunker:
        conll2k_train = LazyCorpusLoader(
            'conll2000', ConllChunkCorpusReader, 
            ['train.txt'], ('NP','VP','PP'))
        conll2k_train_sequence = conll2k_train.iob_sents()
        conll2k_test = LazyCorpusLoader(
            'conll2000', ConllChunkCorpusReader,
            ['test.txt'], ('NP','VP','PP'))
        conll2k_test_sequence = conll2k_test.iob_sents()
        conll2k_estimator = LidstoneProbDistFactory
        conll2k_transform = ClosedCategoryChunkTransform(TREEBANK_CLOSED_CATS)
        model = train_model(HiddenMarkovModelChunkTagger, 
                            conll2k_train_sequence, 
                            conll2k_test_sequence,
                            options.model_file, 
                            options.num_train_sents, 
                            options.num_test_sents,
                            estimator=conll2k_estimator,
                            transform=conll2k_transform,
                            verbose=options.verbose)