print 'loading chunker %s' % args.chunker if args.chunker == 'pattern': chunker = chunkers.PatternChunker() else: chunker = load_model(args.chunker) ####################### ## coverage analysis ## ####################### if args.score: if args.trace: print 'evaluating chunker score\n' chunked_sents = corpus.chunked_sents() if args.fraction != 1.0: cutoff = int(math.ceil(len(chunked_sents) * args.fraction)) chunked_sents = chunked_sents[:cutoff] print chunker.evaluate(chunked_sents), '\n' if args.trace: print 'analyzing chunker coverage of %s with %s\n' % ( args.corpus, chunker.__class__.__name__) iobs_found = FreqDist() sents = corpus.sents() if args.fraction != 1.0:
print 'loading chunker %s' % args.chunker if args.chunker == 'pattern': chunker = chunkers.PatternChunker() else: chunker = load_model(args.chunker) ####################### ## coverage analysis ## ####################### if args.score: if args.trace: print 'evaluating chunker score\n' chunked_sents = corpus.chunked_sents() if args.fraction != 1.0: cutoff = int(math.ceil(len(chunked_sents) * args.fraction)) chunked_sents = chunked_sents[:cutoff] print chunker.evaluate(chunked_sents), '\n' if args.trace: print 'analyzing chunker coverage of %s with %s\n' % (args.corpus, chunker.__class__.__name__) iobs_found = FreqDist() sents = corpus.sents() if args.fraction != 1.0: cutoff = int(math.ceil(len(sents) * args.fraction))
tagger = nltk.data.load(args.tagger) if args.trace: print "loading chunker %s" % args.chunker chunker = nltk.data.load(args.chunker) ####################### ## coverage analysis ## ####################### if args.score: if args.trace: print "evaluating chunker score\n" print chunker.evaluate(corpus.chunked_sents()), "\n" if args.trace: print "analyzing chunker coverage of %s with %s\n" % (args.corpus, chunker.__class__.__name__) iobs_found = FreqDist() for sent in corpus.sents(): tree = chunker.parse(tagger.tag(sent)) for child in tree.subtrees(lambda t: t.node != "S"): iobs_found.inc(child.node) iobs = iobs_found.samples() justify = max(7, *[len(iob) for iob in iobs])