Пример #1
0
def post_process(x):
    "Converts list of lists set into nicely formatted `Tree` objects."
    if not isinstance(x, tuple):
        return x
    [body, label] = x
    if isinstance(body, tuple):
        return ImmutableTree(label, list(map(post_process, body)))
    else:
        return ImmutableTree(label, [body])
Пример #2
0
def derivation(x):
    """Post-process `Viterbi` and `Point` derivations into nicely formatted `Tree`
    objects.

    """
    if not isinstance(x.d, (list, tuple)):
        return x.d
    assert len(x.d) == 2
    label = x.d[1].d
    body = x.d[0].d
    if isinstance(body, (list, tuple)):
        return ImmutableTree(label, list(map(derivation, body)))
    else:
        return ImmutableTree(label, [body])
Пример #3
0
def main():
    intree = ImmutableTree("(foo bar)")
    outtree = ImmutableTree("(bar foo)")

    mutable = Tree("(foo bar)")
    imm = immutable(mutable)
    print(imm)

    rules = loadrules("testrules.yaml")
    setofrules = set()
    for rule in rules:
        setofrules.add(rule)
    print(setofrules)
    setofrules = frozenset(setofrules)

    ## XXX(alexr): do we need to make rules be sets?
    ## ah geez. let's just use frozenrules.
    print(produce(intree, outtree, setofrules, "q0", (0), (1)))
Пример #4
0
def test_grammar():
    grammar = RuleSet()
    grammar.add(BinarizedRule("S", "NP", "VP", 0, 0, (0, 0)))
    grammar.add_unary(UnaryRule("NP", "N"))
    grammar.add_unary(UnaryRule("VP", "V"))
    grammar.finish()
    tree = Tree.fromstring("(S^2 (NP^1 (N^1 dog^1)) (VP^2 (V^2 flies^2)))")

    encoder = LexicalizedCFGEncoder(["dog", "flies"], ["N", "V"], grammar)

    parts = encoder.transform_structure(tree)
    print parts
    tree2 = encoder.from_parts(parts)
    print tree
    print tree2
    assert (tree == tree2)
Пример #5
0
def main():
	""" a basic REPL for testing """
	corpus = """(S (NP John) (VP (V likes) (NP Mary)))
(S (NP Peter) (VP (V hates) (NP Susan)))
(S (NP Harry) (VP (V eats) (NP pizza)))
(S (NP Hermione) (VP (V eats)))""".splitlines()
	corpus = """(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (JJ hungry) (NN dog))))
(S (NP (DT The) (JJ little) (NN mouse)) (VP (VBP ate) (NP (DT the) (NN cat))))""".splitlines()
	#corpus = """(S (NP mary) (VP walks) (AP quickly))""".splitlines()
	#(S (NP Harry) (VP (V likes) (NP Susan) (ADVP (RB very) (RB much))))
	corpus = [Tree(a) for a in corpus]
	#d = GoodmanDOP(corpus, rootsymbol='S')
	from bitpar import BitParChartParser
	d = GoodmanDOP(corpus, rootsymbol='TOP', wrap='TOP',
						parser=BitParChartParser)
	#d = GoodmanDOP(corpus, rootsymbol='TOP', wrap='TOP')
	#print d.grammar
	print "corpus"
	for a in corpus: print a
	w = "foo!"
	while w:
		print "sentence:",
		w = raw_input().split()
		try:
			p = FreqDist()
			for n, a in enumerate(d.parser.nbest_parse(w)):
				if n > 1000: break
				print a
				p.inc(ImmutableTree.convert(removeids(a)), a.prob())
			#for b, a in sorted((b,a) for (a,b) in p.items()):
			#	print a, b
			print
			print 'best', p.max(), p[p.max()]
			#print d.parse(w)
		except Exception: # as e:
			print "error", #e
Пример #6
0
def immutable(tr):
    """Given a Tree, make it an ImmutableTree."""
    str = tr.pprint(margin=10000)
    return ImmutableTree(str)
Пример #7
0
 def test_PreterminalJapaneseTypedVarImmutableTree(self):
     tree = ImmutableTree.fromstring(u'(学生 hello)')
     subtree = tree_or_string(u'?x0|学生')
     self.assertTrue(TreeContains(tree, subtree))