for label in labels:
			ref = refsets[label]
			test = testsets[label]
			
			if not args.no_precision:
				print '%s precision: %f' % (label, precision(ref, test) or 0)
			
			if not args.no_recall:
				print '%s recall: %f' % (label, recall(ref, test) or 0)
			
			if not args.no_fmeasure:
				print '%s f-measure: %f' % (label, f_measure(ref, test) or 0)

if args.show_most_informative and hasattr(classifier, 'show_most_informative_features') and not (args.multi and args.binary) and not args.cross_fold:
	print '%d most informative features' % args.show_most_informative
	classifier.show_most_informative_features(args.show_most_informative)

##############
## pickling ##
##############

if not args.no_pickle and not args.cross_fold:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier))
		fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
	
	dump_object(classifier, fname, trace=args.trace)
################
## evaluation ##
################

if not args.no_eval:
    if args.trace:
        print "evaluating %s" % chunker.__class__.__name__

    print chunker.evaluate(test_chunks)

##############
## pickling ##
##############

if not args.no_pickle:
    if args.filename:
        fname = os.path.expanduser(args.filename)
    else:
        # use the last part of the corpus name/path as the prefix
        parts = [os.path.split(args.corpus.rstrip("/"))[-1]]

        if args.classifier:
            parts.append(args.classifier)
        elif args.sequential:
            parts.append(args.sequential)

        name = "%s.pickle" % "_".join(parts)
        fname = os.path.join(os.path.expanduser("~/nltk_data/chunkers"), name)

    dump_object(chunker, fname, trace=args.trace)
##########################
## Hierarchical combine ##
##########################

labels = combined.labels()
label_classifiers = {}

for h in args.hierarchy:
	label, path = h.split(':')
	
	if label not in labels:
		raise ValueError('%s is not in root labels: %s' % (label, labels))
	
	label_classifiers[label] = nltk.data.load(path)
	
	if args.trace:
		print 'mapping %s to %s from %s' % (label, label_classifiers[label], path)

if label_classifiers:
	if args.trace:
		'combining %d label classifiers for root %s' % (len(label_classifiers), combined)
	
	combined = multi.HierarchicalClassifier(combined, label_classifiers)

##############################
## dump combined classifier ##
##############################

fname = os.path.expanduser(args.filename)
dump_object(combined, fname, trace=args.trace)
Exemplo n.º 4
0
################

if not args.no_eval:
	print 'evaluating %s' % tagger.__class__.__name__
	print 'accuracy: %f' % tagger.evaluate(test_sents)

##############
## pickling ##
##############

if not args.no_pickle:
	if args.filename:
		fname = os.path.expanduser(args.filename)
	else:
		# use the last part of the corpus name/path as the prefix
		parts = [os.path.split(args.corpus.rstrip('/'))[-1]]
		
		if args.brill:
			parts.append('brill')
		
		if args.classifier:
			parts.append('_'.join(args.classifier))
		
		if args.sequential:
			parts.append(args.sequential)
		
		name = '%s.pickle' % '_'.join(parts)
		fname = os.path.join(os.path.expanduser('~/nltk_data/taggers'), name)
	
	dump_object(tagger, fname, trace=args.trace)