def fix_nongap_extraction(self, _, n, pred, k): node = n debug("Fixing nongap extraction: %s", pprint(node)) debug("k %s", pprint(k)) self.remove_null_element(node) index = get_trace_index_from_tag(k.tag) expr = ( r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' % { 'tags': ModifierTagsRegex, 'index': index }) # we use "<<" in the expression, because fix_*_topicalisation comes # before fix_nongap_extraction, and this can introduce an extra layer between # the phrasal tag and the trace for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) if not self.relabel_relativiser(pred): top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True) ss = context.ss debug("Creating null relativiser unary category: %s", ss.category / ss.category) replace_kid( top.parent, top, Node("NN", [top], ss.category / ss.category, head_index=0))
def fix_nongap_extraction(self, _, n, pred, k): node = n debug("Fixing nongap extraction: %s", pprint(node)) debug("k %s", pprint(k)) self.remove_null_element(node) index = get_trace_index_from_tag(k.tag) expr = (r'*=PP < { *=P < { /[NPQ]P(?:-%(tags)s)?%(index)s/=T << ^/\*T\*/ $ *=S } }' % { 'tags': ModifierTagsRegex, 'index': index }) # we use "<<" in the expression, because fix_*_topicalisation comes # before fix_nongap_extraction, and this can introduce an extra layer between # the phrasal tag and the trace for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) if not self.relabel_relativiser(pred): top, context = get_first(node, r'/[ICV]P/=TOP $ *=SS', with_context=True) ss = context.ss debug("Creating null relativiser unary category: %s", ss.category/ss.category) replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
def accept_derivation(self, bundle): tree = bundle.derivation print bundle.label() print "-" * len(bundle.label()) print pprint(tree, sep=' ') print
def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False): debug("Fixing long bei gap: %s", lrp_repr(node)) if not reduced: self.remove_null_element(top) if n: index = get_trace_index_from_tag(n.tag) else: index = r'\*' expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index trace_NP, ctx = get_first(top, expr, with_context=True) pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) self.relabel_bei_category(top, pred) top.category = top[0].category.left debug("done %s", pprint(top))
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid(p, t, Node( base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def clusterfix(self, top, pp, p, s, t): debug("Fixing argument cluster coordination: %s", pprint(top)) debug('T: %s', t) # 1. Shrink the verb (node T) self.fix_object_gap(pp, p, t, s) # 2. Reattach the verb above the TOP node new_node = Node('TAG', top.kids, top.category, head_index=0) top.kids = [t, new_node] # (Reattaching parent pointers) for kid in new_node: kid.parent = new_node # 3. Find and relabel argument clusters for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True): vp, np, qp = ctx.vp, ctx.np, ctx.qp # Now, VP should have category ((S[dcl]\NP)/QP)/NP SbNP = t.category.left.left QP, NP = qp.category, np.category # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP) new_np_category = (SbNP/QP)|((SbNP/QP)/NP) # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP)) new_qp_category = (SbNP)|((SbNP)/QP) # insert unary nodes new_np_node = Node(np.tag, [np], new_np_category, head_index=0); np.parent = new_np_node new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0); qp.parent = new_qp_node replace_kid(vp, np, new_np_node) replace_kid(vp, qp, new_qp_node) self.fix_categories_starting_from(new_np_node, top)
def clusterfix(self, top, pp, p, s, t): debug("Fixing argument cluster coordination: %s", pprint(top)) debug('T: %s', t) # 1. Shrink the verb (node T) self.fix_object_gap(pp, p, t, s) # 2. Reattach the verb above the TOP node new_node = Node('TAG', top.kids, top.category, head_index=0) top.kids = [t, new_node] # (Reattaching parent pointers) for kid in new_node: kid.parent = new_node # 3. Find and relabel argument clusters for node, ctx in find_all(top, r'/VP/=VP <1 /NP/=NP <2 /(QP|V[PV])/=QP', with_context=True): vp, np, qp = ctx.vp, ctx.np, ctx.qp # Now, VP should have category ((S[dcl]\NP)/QP)/NP SbNP = t.category.left.left QP, NP = qp.category, np.category # NP should have category ((S[dcl]\NP)/QP)\(((S[dcl]\NP)/QP)/NP) new_np_category = (SbNP / QP) | ((SbNP / QP) / NP) # QP should have category ((S[dcl]\NP)\((S[dcl]\NP)/QP)) new_qp_category = (SbNP) | ((SbNP) / QP) # insert unary nodes new_np_node = Node(np.tag, [np], new_np_category, head_index=0) np.parent = new_np_node new_qp_node = Node(qp.tag, [qp], new_qp_category, head_index=0) qp.parent = new_qp_node replace_kid(vp, np, new_np_node) replace_kid(vp, qp, new_qp_node) self.fix_categories_starting_from(new_np_node, top)
def fix_topicalisation_without_gap(self, node, p, s, t): debug("Fixing topicalisation without gap: %s", pprint(node)) new_kid = copy(t) new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False) new_category = featureless(p.category)/featureless(s.category) replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
def fix_topicalisation_without_gap(self, node, p, s, t): debug("Fixing topicalisation without gap: %s", pprint(node)) new_kid = copy(t) new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False) new_category = featureless(p.category) / featureless(s.category) replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
def view_deriv(env, start_response): global node_index node_index = 0 start_response('200 OK', [('Content-type', 'text/html')]) variables = env['selector.vars'] doc_id, deriv_id = int(variables['doc']), int(variables['deriv']) filename = 'chtb_%04d.fid' % doc_id doc = GuessReader(os.path.join(CORPORA_PATH, filename)) if doc: bundle = doc[deriv_id] body = '' if bundle: body += '<div id="tree">' body += pprint(bundle.derivation, sep=' ', newline='<br/>', node_repr=html_node_repr) body += '</div>' body += '<div id="main">' for leaf, n in izip( leaves(bundle.derivation, lambda e: not is_ignored(e)), count()): body += '''<span class="word"><span id="word%(index)d" onmouseover="$('pos').show();$('pos%(index)s').show();$('tree%(index)s').addClassName('highlighted');" onmouseout="$('tree%(index)s').removeClassName('highlighted');$('pos%(index)s').hide();$('pos').hide();">%(body)s</span></span>''' % { 'index': n, 'body': leaf.lex } body += prev_next_links(doc, doc_id, deriv_id) body += '</div>' body += '<div id="pos">' body += '<span id="pos_display">' for leaf, n in izip( leaves(bundle.derivation, lambda e: not is_ignored(e)), count()): body += '<span id="pos%d" style="display:none">%s</span>' % ( n, leaf.tag) body += '</span>' body += '</div>' yield layout(body) else: yield error_document() else: yield error_document()
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid( p, t, Node(base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
import psyco psyco.full() except ImportError: pass from munge.ccg.parse import * file = "final/%s" % sys.argv[1] t=naive_label_derivation(parse_tree(open(file).readlines()[2*int(sys.argv[2])+1])) print t print "sent:" print "-----" print ' '.join(t.text()) deps = mkdeps(t) print "deps:" print "-----" for l, r in deps: print "%s|%s" % (l, r) print "leaves:" print "-------" for leaf in leaves(t): print leaf.lex, leaf.cat print "unhandled combs:" print "----------------" for comb in unanalysed: print comb print "finished:" print pprint(t)
def show_pp_tree(match_node, bundle): print pprint(bundle.derivation, focus=match_node)
def show_pp_node(match_node, bundle): print pprint(match_node)