Python CRFTagger.evaluate примеры использования

Язык программирования: Python

Пространство имен/Пакет: nltk.tag.crf

Класс/Тип: CRFTagger

Метод/Функция: evaluate

Примеров на hotexamples.com: 3

Python CRFTagger.evaluate - 3 примера найдено. Это лучшие примеры Python кода для nltk.tag.crf.CRFTagger.evaluate, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CRFTagger(12)

train(8)

tag_sents(6)

set_model_file(5)

tag(5)

evaluate(2)

_feature_func(1)

_get_features(1)

Пример #1

Показать файл

Файл: SentenceTokenizer.py Проект: kmounlp/pyko

 def demo(self, test_sents):
     tagger = CRFTagger(feature_func=self.feature_detector)
     tagger.set_model_file(self.modelpath)
     for sent in test_sents:
         tagged = tagger.tag(untag(sent))
         for s in self._to_sentence(tagged):
             print(s)
     print(tagger.evaluate(test_sents))

Пример #2

Показать файл

Файл: disambiguation.py Проект: eldams/daba

def main():
	
	aparser = argparse.ArgumentParser(description='Daba disambiguator')
	# aparser.add_argument('-i', '--infile', help='Input file (.html)', default="sys.stdin")
	# aparser.add_argument('-o', '--outfile', help='Output file (.html)', default="sys.stdout")
	aparser.add_argument('-l', '--learn', help='Learn model from data (and save as F if provided)', default=None)
	aparser.add_argument('-p', '--pos', help='Prediction for POS', default=False, action='store_true')
	aparser.add_argument('-t', '--tone', help='Prediction for tones', default=False, action='store_true')
	aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true')
	aparser.add_argument('-e', '--evalsize', help='Percent of randomized data to use for evaluation (default 10)', default=10)
	aparser.add_argument('-v', '--verbose', help='Verbose output', default=False, action='store_true')
	args = aparser.parse_args()

	if args.learn:

		if not args.pos or args.tone or args.gloss:
			print 'Choose pos, tone, gloss or combination of them'
			exit(0)

		print 'Make list of files'
		files1 = glob.iglob("../corbama/*/*.dis.html")
		files2 = glob.iglob("../corbama/*.dis.html")
		allfiles = ""
		for file1, file2 in zip(files1, files2):
			allfiles += file1+','+file2+','
		allsents = []

		print 'Open files and find features / supervision tags'
		for infile in allfiles.split(','):
			if(len(infile)) :
				print '-', infile
				sent = []
				in_handler = formats.HtmlReader(infile, compatibility_mode=False)
				for token in in_handler:
					tag = ''
					if token.type == 'w' or token.type == 'c':
						tags = ''
						if args.pos:
							for ps in token.gloss.ps:
								tags += ps
						if args.tone:
							tags += token.gloss.form.encode('utf-8')
						if args.gloss:
							tags += token.gloss.gloss.encode('utf-8')
						sent.append((token.token, tags))
					if token.type == 'c' and token.token in ['.', '?', '!']:
						if len(sent) > 1:
							allsents.append(sent)
						sent = []

		datalength = len(allsents)
		p = (1-args.evalsize/100.0)
		print 'Randomize and split the data in train (', int(p*datalength),' sentences) / test (', int(datalength-p*datalength),' sentences)'
		random.seed(123456)
		random.shuffle(allsents)
		train_set = allsents[:int(p*datalength)]
		test_set = allsents[int(p*datalength):datalength]

		print 'Building classifier (CRF/NLTK)'
		tagger = CRFTagger(verbose = args.verbose, training_opt = {'feature.minfreq' : 10})
		t1 = time.time()
		tagger.train(train_set, args.learn)
		t2 = time.time()
		texec = t2-t1
		print "... done in",  time.strftime('%H %M %S', time.localtime(texec))

		print 'Evaluating classifier'
		print tagger.evaluate(test_set)

		if args.verbose:
			print 'Compute detailed output'

	else:
		print 'USE...'
		parser.print_help()

	exit(0)

Пример #3

Показать файл

def main():
	
	aparser = argparse.ArgumentParser(description='Daba disambiguator')
	# aparser.add_argument('-i', '--infile', help='Input file (.html)', default="sys.stdin")
	# aparser.add_argument('-o', '--outfile', help='Output file (.html)', default="sys.stdout")
	aparser.add_argument('-l', '--learn', help='Learn model from data (and save as F if provided)', default=None)
	aparser.add_argument('-p', '--pos', help='Prediction for POS', default=False, action='store_true')
	aparser.add_argument('-t', '--tone', help='Prediction for tones', default=False, action='store_true')
	aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true')
	aparser.add_argument('-e', '--evalsize', help='Percent of randomized data to use for evaluation (default 10)', default=10)
	aparser.add_argument('-v', '--verbose', help='Verbose output', default=False, action='store_true')
	args = aparser.parse_args()

	if args.learn:

		if not args.pos or args.tone or args.gloss:
			print 'Choose pos, tone, gloss or combination of them'
			exit(0)

		print 'Make list of files'
		files1 = glob.iglob("../corbama/*/*.dis.html")
		files2 = glob.iglob("../corbama/*.dis.html")
		allfiles = ""
		for file1, file2 in zip(files1, files2):
			allfiles += file1+','+file2+','
		allsents = []

		print 'Open files and find features / supervision tags'
		for infile in allfiles.split(','):
			if(len(infile)) :
				print '-', infile
				sent = []
				in_handler = formats.HtmlReader(infile, compatibility_mode=False)
				for token in in_handler:
					tag = ''
					if token.type == 'w' or token.type == 'c':
						tags = ''
						if args.pos:
							for ps in token.gloss.ps:
								tags += ps
						if args.tone:
							tags += token.gloss.form.encode('utf-8')
						if args.gloss:
							tags += token.gloss.gloss.encode('utf-8')
						sent.append((token.token, tags))
					if token.type == 'c' and token.token in ['.', '?', '!']:
						if len(sent) > 1:
							allsents.append(sent)
						sent = []

		datalength = len(allsents)
		p = (1-args.evalsize/100.0)
		print 'Randomize and split the data in train (', int(p*datalength),' sentences) / test (', int(datalength-p*datalength),' sentences)'
		random.seed(123456)
		random.shuffle(allsents)
		train_set = allsents[:int(p*datalength)]
		test_set = allsents[int(p*datalength):datalength]

		print 'Building classifier (CRF/NLTK)'
		tagger = CRFTagger(verbose = args.verbose, training_opt = {'feature.minfreq' : 10})
		t1 = time.time()
		tagger.train(train_set, args.learn)
		t2 = time.time()
		texec = t2-t1
		print "... done in",  time.strftime('%H %M %S', time.localtime(texec))

		print 'Evaluating classifier'
		tagger.evaluate(test_set)

		if args.verbose:
			print 'Compute detailed output'

	else:
		print 'USE...'

	exit(0)