def make_AP(self, nodes, cmpd_str, beforehead=True): if beforehead and cmpd_str == 'A': # for preverbal bare adjectives return nodes[0] ap = LabelledTree('AP') ap.add_child(nodes[0]) if cmpd_str == 'A C A': coord = LabelledTree('COORD') coord.add_child(nodes[1]) a2 = self.make_AP([nodes[2]], 'A', beforehead=False) coord.add_child(a2) ap.add_child(coord) return ap
def do_compounds(self,tree): """ systematically recognizes new_compounds """ if not tree.has_children(): return if len(tree.get_children()) > 1: for (form, nt_cat, pos) in self.new_compounds: if tree.label == nt_cat: y = tree.tree_yield_str() if y.lower() == form: # replace children with a compound, of specified cat n = LabelledTree(pos) n.add_child(LabelledTree(y.replace(' ','_'))) tree.children = [n] return for child in tree.get_children(): self.do_compounds(child)
def make_PP(self, nodes): pp = LabelledTree('PP') # prep is supposed to be the first node pp.add_child(nodes[0]) [np, tail] = self.make_NP(nodes[1:]) pp.add_child(np) if tail <> None: pp.add_child(tail) return pp
def make_VP(self, nodes): vp = LabelledTree('VP') # V is supposed to be the first node vp.add_child(nodes[0]) if self.isP(nodes[1]): vp.add_child(self.make_PP(nodes[1:])) else: # sinon on bactracke vp.children = nodes return vp
def make_COORD(self, nodes): coord = LabelledTree('COORD') # conjunction is supposed to be the first node coord.add_child(nodes[0]) # if C P ... => coordination of PPs if self.isP(nodes[1]): pp = self.make_PP(nodes[1:]) coord.add_child(pp) type = 'PP' # otherwise = coordination of NPs (APs handled differently) else: np = self.make_NP(nodes[1:]) coord.add_child(np) type = 'NP' return [coord, type]
def do_partitive(self,node,xmlnode): node.set_compound_true() val = '' if xmlnode.hasChildNodes(): for xmlchild in xmlnode.childNodes: if xmlchild.nodeType == xmlchild.TEXT_NODE and not re.match('^\s*$',xmlchild.nodeValue): val = (xmlchild.nodeValue) match = re.match("^\s*([Dd]e)\s+(l(a|'))\s*$",val) if match <> None: childone = LabelledTree("P") childone.set_compound_true() childtwo = LabelledTree("D") childtwo.set_feature("def") childtwo.set_compound_true() grandchildone = LabelledTree(match.group(1)) grandchildtwo = LabelledTree(match.group(2)) node.add_child(childone) node.add_child(childtwo) childone.add_child(grandchildone) childtwo.add_child(grandchildtwo) else: match = re.match("^\s*([Dd]([eu]|'))\s*$",val) if match <> None : childone = LabelledTree(match.group(1)) node.add_child(childone) else: sys.stderr.write("error while reading partitive(" +val+")\n")
def doNode(self,xmlnode): node = None if xmlnode.nodeName == 'w': # extracting the categories of words (terminals) if xmlnode.attributes.has_key('cat'): node = LabelledTree(self.normalise_wsp(xmlnode.attributes['cat'].value)) if xmlnode.attributes.has_key('compound'): node.set_compound_true() elif xmlnode.attributes.has_key('catint'): node = LabelledTree(self.normalise_wsp(xmlnode.attributes['catint'].value)) node.set_compound_true() if xmlnode.attributes.has_key('subcat'): val = self.normalise_wsp(xmlnode.attributes['subcat'].value) if val <> '': node.set_feature(val) # AJOUT marie : lemma useful! if xmlnode.attributes.has_key('lemma'): #val = self.normalise_wsp(unicode(xmlnode.attributes['lemma'].value)) val = self.normalise_wsp(xmlnode.attributes['lemma'].value) if val <> '': node.set_lemma(val) node = self.do_morphology(xmlnode,node) else: #default non terminal nodes node = LabelledTree(xmlnode.nodeName) if xmlnode.attributes.has_key('fct'): # marie : '-' systematically turned into '_' in functional tags #node.set_function(self.normalise_wsp(xmlnode.attributes['fct'].value)) # marie : correction of a few erroneous functional tag form : case error, and missing _ fun = re.sub('-','_',xmlnode.attributes['fct'].value) fun = string.upper(fun) fun = re.sub('DEOBJ','DE_OBJ',fun) fun = re.sub('AOBJ','A_OBJ',fun) #node.set_function(self.normalise_wsp(re.sub('-','_',xmlnode.attributes['fct'].value))) node.set_function(fun) # mathieu : mark the SENTnb in the treenum attribute for each node node.set_Treenum(self.treenum) # specific to MFT if xmlnode.attributes.has_key('type'): node.type = xmlnode.attributes['type'].value return node
def make_NP(self, nodes, beforehead=True): np = LabelledTree('NP') tail = None while nodes <> []: if nodes[0].label in ['D','N','ET']: np.add_child(nodes[0]) if nodes[0].label == 'N': beforehead = False nodes = nodes[1:] elif nodes[0].label == 'A': if len(nodes) > 2 and nodes[1].label == 'C' and nodes[2].label == 'A': ap = self.make_AP(nodes[0:3],'A C A',beforehead) np.add_child(ap) nodes = nodes[3:] else: np.add_child(self.make_AP([nodes[0]], 'A', beforehead)) nodes = nodes[1:] # if a prep is encountered # => treat all remaining nodes as a whole PP # (cf. closest attachment preferred) # (unhandled case : N1 (P N2) others : where others attaches to N1) elif self.isP(nodes[0]): pp = self.make_PP(nodes) np.add_child(pp) nodes = [] elif nodes[0].label == 'C': (coord, type) = self.make_COORD(nodes) nodes = [] if type == 'PP': tail = coord else: np.add_child(coord) return [np, tail]
def undo_compound(self, tree): if not(self.is_compound(tree)): return cstr = self.children_labels(tree) # N (N0 A1) -> N0 (AP (A1)) # N (N0 A1 A2) -> N0 (AP (A1)) (AP (A2)) if cstr == 'N A' or cstr == 'N A A': self.inject_subcats_to_compound(tree) a1 = LabelledTree("AP") a1.add_child(tree.children[1]) if cstr == 'N A A': a2 = LabelledTree("AP") a2.add_child(tree.children[2]) return [tree.children[0], a1, a2] return [tree.children[0], a1] # N (A0 N1) -> (AP (A1)) N1 if cstr == 'A N': self.inject_subcats_to_compound(tree) # n = LabelledTree("AP") # n.add_child(tree.children[0]) return [tree.children[0], tree.children[1]] # N (N0 P1 N2) -> N0 (PP (P1 (NP N2))) # N (N0 A1 P2 N3) -> N0 (AP (A1) (PP (P2 (NP N3))) elif cstr in ['N P N', 'N P+D N', 'N P D N', 'N A P N', 'N A P+D N', 'N A P D N'] : self.inject_subcats_to_compound(tree) has_adj = tree.children[1].label == 'A' np_i = 2 if has_adj: np_i = 3 n = LabelledTree("NP") n.add_child(tree.children[np_i]) if len(tree.children) == np_i + 2: n.add_child(tree.children[np_i + 1]) p = LabelledTree("PP") p.add_child(tree.children[np_i - 1]) p.add_child(n) if has_adj: a = LabelledTree("AP") a.add_child(tree.children[1]) return [tree.children[0], a, p] return [tree.children[0], p] else: return []