Пример #1
0
def test_transforms():
    """Test reversibility of Tiger transformations."""
    from discodop.treebanktransforms import transform, reversetransform, \
      bracketings
    from discodop.treebank import NegraCorpusReader, handlefunctions
    headrules = None  # 'alpino.headrules'
    n = NegraCorpusReader('alpinosample.export', headrules=headrules)
    nn = NegraCorpusReader('alpinosample.export', headrules=headrules)
    transformations = ('S-RC', 'VP-GF', 'NP')
    trees = [
        transform(tree, sent, transformations)
        for tree, sent in zip(nn.trees().values(),
                              nn.sents().values())
    ]
    print('\ntransformed')
    correct = exact = e = 0
    for a, b, c, d in islice(
            zip(n.trees().values(),
                n.sents().values(), trees, count()), 100):
        transformc = reversetransform(c.copy(True), b, transformations)
        c1 = bracketings(canonicalize(a))
        c2 = bracketings(canonicalize(transformc))
        z = -1  # 825
        if c1 != c2 or e == z:
            precision = len(set(c1) & set(c2)) / len(set(c1))
            recall = len(set(c1) & set(c2)) / len(set(c2))
            if precision != 1.0 or recall != 1.0 or d == z:
                print(
                    d, ' '.join(':'.join((str(n), a.encode('unicode-escape')))
                                for n, a in enumerate(b)))
                print('no match', precision, recall)
                print(len(c1), len(c2), 'gold-transformed',
                      set(c2) - set(c1), 'transformed-gold',
                      set(c1) - set(c2))
                print(a)
                print(transformc)
                handlefunctions('add', a)
                print(a, '\n', b, '\n\n')
            else:
                correct += 1
        else:
            exact += 1
            correct += 1
        e += 1
    print('matches', correct, '/', e, 100 * correct / e, '%')
    print('exact', exact)
Пример #2
0
def test_transforms():
	"""Test reversibility of Tiger transformations."""
	from discodop.treebanktransforms import transform, reversetransform, \
			bracketings
	from discodop.treebank import NegraCorpusReader, handlefunctions
	headrules = None  # 'alpino.headrules'
	n = NegraCorpusReader('alpinosample.export', headrules=headrules)
	nn = NegraCorpusReader('alpinosample.export', headrules=headrules)
	transformations = ('S-RC', 'VP-GF', 'NP')
	trees = [transform(tree, sent, transformations)
			for tree, sent in zip(nn.trees().values(),
				nn.sents().values())]
	print('\ntransformed')
	correct = exact = d = 0
	for a, b, c in islice(zip(n.trees().values(),
			trees, n.sents().values()), 100):
		transformb = reversetransform(b.copy(True), transformations)
		b1 = bracketings(canonicalize(a))
		b2 = bracketings(canonicalize(transformb))
		z = -1  # 825
		if b1 != b2 or d == z:
			precision = len(set(b1) & set(b2)) / len(set(b1))
			recall = len(set(b1) & set(b2)) / len(set(b2))
			if precision != 1.0 or recall != 1.0 or d == z:
				print(d, ' '.join(':'.join((str(n),
					a.encode('unicode-escape'))) for n, a in enumerate(c)))
				print('no match', precision, recall)
				print(len(b1), len(b2), 'gold-transformed', set(b2) - set(b1),
						'transformed-gold', set(b1) - set(b2))
				print(a)
				print(transformb)
				handlefunctions('add', a)
				print(a, '\n', b, '\n\n')
			else:
				correct += 1
		else:
			exact += 1
			correct += 1
		d += 1
	print('matches', correct, '/', d, 100 * correct / d, '%')
	print('exact', exact)
Пример #3
0
 def test_transform(self):
     from discodop.treebanktransforms import transform, reversetransform, \
       bracketings
     from discodop.treebank import NegraCorpusReader
     n = NegraCorpusReader('alpinosample.export')
     for transformations in (('FUNC-NODE', ), ('MORPH-NODE', ),
                             ('LEMMA-NODE', ), ('FUNC-NODE', 'MORPH-NODE',
                                                'LEMMA-NODE')):
         nn = NegraCorpusReader('alpinosample.export')
         trees = [
             transform(tree, sent, transformations)
             for tree, sent in zip(nn.trees().values(),
                                   nn.sents().values())
         ]
         for a, b in islice(zip(n.trees().values(), trees), 100):
             before = bracketings(canonicalize(a))
             transformb = reversetransform(b.copy(True), transformations)
             after = bracketings(canonicalize(transformb))
             assert before == after, (
                 'mismatch with %r\nbefore: %r\nafter: %r' %
                 (transformations, before, after))
Пример #4
0
	def test_transform(self):
		from discodop.treebanktransforms import transform, reversetransform, \
				bracketings
		from discodop.treebank import NegraCorpusReader
		n = NegraCorpusReader('alpinosample.export')
		for transformations in (
				('FUNC-NODE', ),
				('MORPH-NODE', ),
				('LEMMA-NODE', ),
				('FUNC-NODE', 'MORPH-NODE', 'LEMMA-NODE')):
			nn = NegraCorpusReader('alpinosample.export')
			trees = [transform(tree, sent, transformations)
					for tree, sent in zip(nn.trees().values(),
						nn.sents().values())]
			for a, b in islice(zip(n.trees().values(), trees), 100):
				before = bracketings(canonicalize(a))
				transformb = reversetransform(b.copy(True), transformations)
				after = bracketings(canonicalize(transformb))
				assert before == after, (
						'mismatch with %r\nbefore: %r\nafter: %r' % (
						transformations, before, after))