示例#1
0
    def test(self, test_sequence, **kwargs):
        """
        Tests the C{HiddenMarkovModelTagger} instance.

      @param test_sequence: a sequence of labeled test instances
        @type test_sequence: C{list} of C{list}
        @kwparam verbose: boolean flag indicating whether training should be
            verbose or include printed output
        @type verbose: C{bool}
        """
        
        def words(sent):
            return [word for (word, tag) in sent]
        
        def tags(sent):
            return [tag for (word, tag) in sent]
        
        test_sequence = LazyMap(self._transform.transform, test_sequence)
        predicted_sequence = LazyMap(self._tag, LazyMap(words, test_sequence))
            
        if kwargs.get('verbose', False):
            # This will be used again later for accuracy so there's no sense
            # in tagging it twice.
            test_sequence = list(test_sequence)
            predicted_sequence = list(predicted_sequence)

            for test_sent, predicted_sent in zip(test_sequence,
                                                 predicted_sequence):
                print 'Test:', \
                    ' '.join(['%s/%s' % (str(token), str(tag)) 
                              for (token, tag) in test_sent])
                print
                print 'Untagged:', \
                    ' '.join([str(token) for (token, tag) in test_sent])
                print
                print 'HMM-tagged:', \
                    ' '.join(['%s/%s' % (str(token), str(tag)) 
                              for (token, tag) in predicted_sent])
                print
                print 'Entropy:', \
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent])
                print
                print '-' * 60
        
        test_tags = LazyConcatenation(LazyMap(tags, test_sequence))
        predicted_tags = LazyConcatenation(LazyMap(tags, predicted_sequence))
                
        acc = _accuracy(test_tags, predicted_tags)

        count = sum([len(sent) for sent in test_sequence])

        print 'accuracy over %d tokens: %.2f' % (count, acc * 100)
示例#2
0
    def evaluate(self, gold):
        """
        Score the accuracy of the tagger against the gold standard.
        Strip the tags from the gold standard text, retag it using
        the tagger, then compute the accuracy score.

        @type gold: C{list} of C{list} of C{(token, tag)}
        @param gold: The list of tagged sentences to score the tagger on.
        @rtype: C{float}
        """

        tagged_sents = self.batch_tag([untag(sent) for sent in gold])
        gold_tokens = sum(gold, [])
        test_tokens = sum(tagged_sents, [])
        return _accuracy(gold_tokens, test_tokens)
示例#3
0
文件: api.py 项目: ciju/yql_hash
    def evaluate(self, gold):
        """
        Score the accuracy of the tagger against the gold standard.
        Strip the tags from the gold standard text, retag it using
        the tagger, then compute the accuracy score.

        @type gold: C{list} of C{list} of C{(token, tag)}
        @param gold: The list of tagged sentences to score the tagger on.
        @rtype: C{float}
        """

        tagged_sents = self.batch_tag([untag(sent) for sent in gold])
        gold_tokens = sum(gold, [])
        test_tokens = sum(tagged_sents, [])
        return _accuracy(gold_tokens, test_tokens)
示例#4
0
文件: util.py 项目: DrDub/nltk
def accuracy(chunker, gold):
    """
    Score the accuracy of the chunker against the gold standard.
    Strip the chunk information from the gold standard and rechunk it using
    the chunker, then compute the accuracy score.

    :type chunker: ChunkParserI
    :param chunker: The chunker being evaluated.
    :type gold: tree
    :param gold: The chunk structures to score the chunker on.
    :rtype: float
    """

    gold_tags = []
    test_tags = []
    for gold_tree in gold:
        test_tree = chunker.parse(gold_tree.flatten())
        gold_tags += tree2conlltags(gold_tree)
        test_tags += tree2conlltags(test_tree)

#    print 'GOLD:', gold_tags[:50]
#    print 'TEST:', test_tags[:50]
    return _accuracy(gold_tags, test_tags)
示例#5
0
文件: util.py 项目: Drafffffff/nltk-1
def accuracy(chunker, gold):
    """
    Score the accuracy of the chunker against the gold standard.
    Strip the chunk information from the gold standard and rechunk it using
    the chunker, then compute the accuracy score.

    :type chunker: ChunkParserI
    :param chunker: The chunker being evaluated.
    :type gold: tree
    :param gold: The chunk structures to score the chunker on.
    :rtype: float
    """

    gold_tags = []
    test_tags = []
    for gold_tree in gold:
        test_tree = chunker.parse(gold_tree.flatten())
        gold_tags += tree2conlltags(gold_tree)
        test_tags += tree2conlltags(test_tree)

    #    print 'GOLD:', gold_tags[:50]
    #    print 'TEST:', test_tags[:50]
    return _accuracy(gold_tags, test_tags)
示例#6
0
 def evaluate(self, gold):
   tagged_sents = self.batch_tag([untag(sent) for sent in gold])
   gold_tokens = sum(gold, [])
   test_tokens = sum(tagged_sents, [])
   return _accuracy(gold_tokens, test_tokens)