def test(self, test_sequence, **kwargs): """ Tests the C{HiddenMarkovModelTagger} instance. @param test_sequence: a sequence of labeled test instances @type test_sequence: C{list} of C{list} @kwparam verbose: boolean flag indicating whether training should be verbose or include printed output @type verbose: C{bool} """ def words(sent): return [word for (word, tag) in sent] def tags(sent): return [tag for (word, tag) in sent] test_sequence = LazyMap(self._transform.transform, test_sequence) predicted_sequence = LazyMap(self._tag, LazyMap(words, test_sequence)) if kwargs.get('verbose', False): # This will be used again later for accuracy so there's no sense # in tagging it twice. test_sequence = list(test_sequence) predicted_sequence = list(predicted_sequence) for test_sent, predicted_sent in zip(test_sequence, predicted_sequence): print 'Test:', \ ' '.join(['%s/%s' % (str(token), str(tag)) for (token, tag) in test_sent]) print print 'Untagged:', \ ' '.join([str(token) for (token, tag) in test_sent]) print print 'HMM-tagged:', \ ' '.join(['%s/%s' % (str(token), str(tag)) for (token, tag) in predicted_sent]) print print 'Entropy:', \ self.entropy([(token, None) for (token, tag) in predicted_sent]) print print '-' * 60 test_tags = LazyConcatenation(LazyMap(tags, test_sequence)) predicted_tags = LazyConcatenation(LazyMap(tags, predicted_sequence)) acc = _accuracy(test_tags, predicted_tags) count = sum([len(sent) for sent in test_sequence]) print 'accuracy over %d tokens: %.2f' % (count, acc * 100)
def evaluate(self, gold): """ Score the accuracy of the tagger against the gold standard. Strip the tags from the gold standard text, retag it using the tagger, then compute the accuracy score. @type gold: C{list} of C{list} of C{(token, tag)} @param gold: The list of tagged sentences to score the tagger on. @rtype: C{float} """ tagged_sents = self.batch_tag([untag(sent) for sent in gold]) gold_tokens = sum(gold, []) test_tokens = sum(tagged_sents, []) return _accuracy(gold_tokens, test_tokens)
def accuracy(chunker, gold): """ Score the accuracy of the chunker against the gold standard. Strip the chunk information from the gold standard and rechunk it using the chunker, then compute the accuracy score. :type chunker: ChunkParserI :param chunker: The chunker being evaluated. :type gold: tree :param gold: The chunk structures to score the chunker on. :rtype: float """ gold_tags = [] test_tags = [] for gold_tree in gold: test_tree = chunker.parse(gold_tree.flatten()) gold_tags += tree2conlltags(gold_tree) test_tags += tree2conlltags(test_tree) # print 'GOLD:', gold_tags[:50] # print 'TEST:', test_tags[:50] return _accuracy(gold_tags, test_tags)
def evaluate(self, gold): tagged_sents = self.batch_tag([untag(sent) for sent in gold]) gold_tokens = sum(gold, []) test_tokens = sum(tagged_sents, []) return _accuracy(gold_tokens, test_tokens)