def fix_short_bei_io_gap(self, node, pp, bei, beis, t, p, s): debug("fixing short bei io gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s)) replace_kid(pp, p, s) self.fix_categories_starting_from(s, until=pp) bei.category = bei.category.clone_with(right=beis.category)
def fix_short_bei_obj_gap(self, node, pp, bei, beis, t, p, s): debug("fixing short bei object gap: pp:%s\np:%s\ns:%s", lrp_repr(pp), lrp_repr(p), lrp_repr(s)) # simple test case in 29:71(3) for bei with extracted NP replace_kid(pp, p, s) self.fix_categories_starting_from(s, until=bei.parent[1]) bei.category = bei.category.clone_with(right=bei.parent[1].category)
def replace_kid(node, old, new): # make sure you go through Node#__setitem__, not by modifying Node.kids directly, # otherwise parent pointers won't get updated try: i = node.kids.index(old) node[i] = new except ValueError: raise MungeException( "Tried to replace:\n\t%s\nwith:\t%s\nactual kids:\n\t%s" % (lrp_repr(old), lrp_repr(new), '\n\t'.join( (lrp_repr(kid) for kid in node.kids))))
def fix_long_bei_gap(self, node, bei, pred, top, n=None, reduced=False): debug("Fixing long bei gap: %s", lrp_repr(node)) if not reduced: self.remove_null_element(top) if n: index = get_trace_index_from_tag(n.tag) else: index = r'\*' expr = r'*=PP < { *=P < { /NP-(?:TPC|OBJ)/=T < ^/%s/a $ *=S } }' % index trace_NP, ctx = get_first(top, expr, with_context=True) pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s # remove T from P # replace P with S self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) self.relabel_bei_category(top, pred) top.category = top[0].category.left debug("done %s", pprint(top))
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid(p, t, Node( base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_ip_app(self, p, a, s): debug("Fixing IP-APP NX: %s", lrp_repr(p)) new_kid = copy(a) new_kid.tag = base_tag( new_kid.tag) # relabel to stop infinite matching replace_kid( p, a, Node("NN", [new_kid], s.category / s.category, head_index=0))
def tgrep(deriv, expression, with_context=False, nonrecursive=False, left_to_right=False): '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node yields a pair (node, context), and captured nodes are accessible by name using the dict-like context. If the user wants to keep context around, a copy must be made.''' if not expression: raise RuntimeError('No query expression given.') query = expression_cache.get(expression, None) if query is None: initialise() if _tgrep_debug: debug("Lexing %s", expression) lex.input(expression) for tok in iter(lex.token, None): debug("%s %s", tok.type, tok.value) query = yacc.parse(expression) expression_cache[expression] = query # Default traversal method is right to left traversal_method = (single if nonrecursive else nodes if left_to_right else nodes_reversed) context = Context() for node in traversal_method(deriv): context.clear() if query.is_satisfied_by(node, context): if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query) if with_context: yield node, context else: yield node
def fix_short_bei_subj_gap(self, node, bei, pp, p, t, s): debug("fixing short bei subject gap: %s", lrp_repr(pp)) # take the VP sibling of SB # replace T with S # this analysis isn't entirely correct replace_kid(pp, p, s) self.fix_categories_starting_from(s, pp) bei.category = bei.category.clone_with(right=bei.parent[1].category)
def fix_subject_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N debug("%s", reduced) node = n debug("Fixing subject extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=node) if not self.relabel_relativiser(pred): # TOP is the shrunk VP # after shrinking, we can get VV or VA here # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7)) result = get_first( node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True) if not result: debug( 'Could not find verbal category; did not create null relativiser.' ) return top, context = result SS = context.ss.category debug("Creating null relativiser unary category: %s", SS / SS) replace_kid(top.parent, top, Node("NN", [top], SS / SS, head_index=0))
def fix_object_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N node = n debug("Fixing object extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) # If we couldn't find the DEC node, this is the null relativiser case if not self.relabel_relativiser(pred): # TOP is the S node # null relativiser category comes from sibling of TOP # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9)) result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True) if result: _, ctx = result ss = ctx.ss debug("Creating null relativiser unary category: %s", ss.category / ss.category) replace_kid( top.parent, top, Node("NN", [top], ss.category / ss.category, head_index=0))
def fix_modification(self, node, p, s, t): debug("Fixing modification: %s", lrp_repr(node)) S, P = s.category, p.category # If you don't strip the tag :m from the newly created child (new_kid), # the fix_modification pattern will match infinitely when tgrep visits new_kid new_kid = copy(t) new_kid.tag = base_tag(new_kid.tag, strip_cptb_tag=False) new_category = featureless(P) / featureless(S) debug("Creating category %s", new_category) replace_kid(p, t, Node(t.tag, [new_kid], new_category, head_index=0))
def fix_subject_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N debug("%s", reduced) node = n debug("Fixing subject extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'*=PP < { *=P < { /NP-SBJ/=T << ^/\*T\*%s/ $ *=S } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): pp, p, t, s = ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=node) if not self.relabel_relativiser(pred): # TOP is the shrunk VP # after shrinking, we can get VV or VA here # left_to_right so that we find the right node (used to match against the CP 已建成的 in 4:45(7)) result = get_first(node, r'{ /([ICV]P|V[VA]|VRD|VSB|VCD)/=TOP $ *=SS } ! > /([ICV]P|V[VA]|VRD|VSB|VCD)/', with_context=True, left_to_right=True) if not result: debug('Could not find verbal category; did not create null relativiser.') return top, context = result SS = context.ss.category debug("Creating null relativiser unary category: %s", SS/SS) replace_kid(top.parent, top, Node("NN", [top], SS/SS, head_index=0))
def fix_object_extraction(self, _, n, pred, w=None, reduced=False): global use_bare_N node = n debug("Fixing object extraction: %s", lrp_repr(node)) # We only want this if we are using the N -> NP unary rule # This 'fix' lets us rewrite NP(WHNP CP) as NP(CP) with categories NP(N) if use_bare_N and pred.tag.startswith('NP'): # Fix for the NP(VP de) case: # --------------------------- # NP NP # / \ | # WHNP CP --> CP # / \ / \ # IP DEC IP DEC if not pred.is_leaf(): pred.kids.pop(0) pred.head_index = 0 else: if not reduced: self.remove_null_element(node) if w: index = get_trace_index_from_tag(w.tag) else: index = '' expr = r'/[IC]P/=TOP << { *=PP < { *=P < { /NP-(OBJ|EXT)/=T << ^/\*T\*%s/ $ *=S } } }' % index for trace_NP, ctx in find_all(node, expr, with_context=True): top, pp, p, t, s = ctx.top, ctx.pp, ctx.p, ctx.t, ctx.s self.fix_object_gap(pp, p, t, s) self.fix_categories_starting_from(s, until=top) # If we couldn't find the DEC node, this is the null relativiser case if not self.relabel_relativiser(pred): # TOP is the S node # null relativiser category comes from sibling of TOP # if TOP has no sibling, then we're likely inside a NP-PRD < CP reduced relative (cf 1:2(9)) result = get_first(top, r'* $ *=SS', with_context=True, nonrecursive=True) if result: _, ctx = result; ss = ctx.ss debug("Creating null relativiser unary category: %s", ss.category/ss.category) replace_kid(top.parent, top, Node("NN", [top], ss.category/ss.category, head_index=0))
def fix_whword_topicalisation(self, node, p, s, t): debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid(p, t, Node( base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index for top, ctx in find_all(p, expr, with_context=True): replace_kid(ctx.pp, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_whword_topicalisation(self, node, p, s, t): debug('Fixing wh-word topicalisation: node: %s', lrp_repr(node)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid( p, t, Node(base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, SbNP, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) expr = r'*=PP < { /VP/=P < { /NP-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } }' % index for top, ctx in find_all(p, expr, with_context=True): replace_kid(ctx.pp, ctx.p, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_topicalisation_with_gap(self, node, p, s, t): debug("Fixing topicalisation with gap:\nnode=%s\ns=%s\nt=%s", lrp_repr(node), pprint(s), pprint(t)) # stop this method from matching again (in case there's absorption on the top node, cf 2:22(5)) t.tag = base_tag(t.tag, strip_cptb_tag=False) # create topicalised category based on the tag of T typeraise_t_category = ptb_to_cat(t) # insert a node with the topicalised category replace_kid( p, t, Node(base_tag(t.tag, strip_cptb_tag=False), [t], typeraise(typeraise_t_category, S, TR_TOPICALISATION), head_index=0)) index = get_trace_index_from_tag(t.tag) # attested gaps: # 575 IP-TPC:t # 134 NP-TPC:t # 10 IP-Q-TPC:t # 8 CP-TPC:t # 4 NP-PN-TPC:t # 2 QP-TPC:t # 2 NP-TTL-TPC:t # 1 PP-TPC:t # 1 IP-IJ-TPC:t # 1 INTJ-TPC:t # 1 CP-Q-TPC:t # 1 CP-CND-TPC:t expr = r'/IP/=TOP << { *=PP < { *=P < { /[NICQP]P-(?:SBJ|OBJ)/=T < ^/\*T\*%s/ $ *=S } } }' % index for top, ctx in find_all(s, expr, with_context=True): debug('top: %s', pprint(top)) self.fix_object_gap(ctx.pp, ctx.p, ctx.t, ctx.s) self.fix_categories_starting_from(ctx.s, until=top)
def fix_reduced_long_bei_gap(self, node, *args, **kwargs): debug("Fixing reduced long bei gap: %s", lrp_repr(node)) return self.fix_long_bei_gap(node, *args, **update(kwargs, reduced=True))
def fix_ip_app(self, p, a, s): debug("Fixing IP-APP NX: %s", lrp_repr(p)) new_kid = copy(a) new_kid.tag = base_tag(new_kid.tag) # relabel to stop infinite matching replace_kid(p, a, Node("NN", [new_kid], s.category/s.category, head_index=0))
def replace_kid(node, old, new): # make sure you go through Node#__setitem__, not by modifying Node.kids directly, # otherwise parent pointers won't get updated try: i = node.kids.index(old) node[i] = new except ValueError: raise MungeException("Tried to replace:\n\t%s\nwith:\t%s\nactual kids:\n\t%s" % (lrp_repr(old), lrp_repr(new), '\n\t'.join((lrp_repr(kid) for kid in node.kids))))