def test( self, test_sequence, verbose = False, **kwargs ):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """


        def words( sent ):
            return [ word for (word, tag) in sent ]


        def tags( sent ):
            return [ tag for (word, tag) in sent ]


        def flatten( seq ):
            return list( itertools.chain( *seq ) )


        test_sequence = self._transform( test_sequence )
        predicted_sequence = list( imap( self._tag, imap( words, test_sequence ) ) )

        if verbose:
            for test_sent, predicted_sent in izip( test_sequence, predicted_sequence ):
                print( 'Test:',
                       ' '.join( '%s/%s' % (token, tag)
                                 for (token, tag) in test_sent ) )
                print( )
                print( 'Untagged:',
                       ' '.join( "%s" % token for (token, tag) in test_sent ) )
                print( )
                print( 'HMM-tagged:',
                       ' '.join( '%s/%s' % (token, tag)
                                 for (token, tag) in predicted_sent ) )
                print( )
                print( 'Entropy:',
                       self.entropy( [ (token, None) for
                                       (token, tag) in predicted_sent ] ) )
                print( )
                print( '-' * 60 )

        test_tags = flatten( imap( tags, test_sequence ) )
        predicted_tags = flatten( imap( tags, predicted_sequence ) )

        acc = accuracy( test_tags, predicted_tags )
        count = sum( len( sent ) for sent in test_sequence )
        print( 'accuracy over %d tokens: %.2f' % (count, acc * 100) )
Exemplo n.º 2
0
    def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """
        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence,
                                                  predicted_sequence):
                print(
                    'Test:', ' '.join('%s/%s' % (token, tag)
                                      for (token, tag) in test_sent))
                print()
                print('Untagged:',
                      ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print(
                    'HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in predicted_sent))
                print()
                print(
                    'Entropy:',
                    self.entropy([(token, None)
                                  for (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100))
Exemplo n.º 3
0
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    if key is None:
        it = izip(iterable, count())  # decorate
        result = _nsmallest(n, it)
        return map(itemgetter(0), result)  # undecorate
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(), in2)  # decorate
    result = _nsmallest(n, it)
    return map(itemgetter(2), result)  # undecorate