Пример #1
0
 def make_AP(self, nodes, cmpd_str, beforehead=True):
     if beforehead and cmpd_str == 'A':
         # for preverbal bare adjectives
         return nodes[0]
     ap = LabelledTree('AP')
     ap.add_child(nodes[0])
     if cmpd_str == 'A C A':
         coord = LabelledTree('COORD')
         coord.add_child(nodes[1])
         a2 = self.make_AP([nodes[2]], 'A', beforehead=False)
         coord.add_child(a2)
         ap.add_child(coord)
     return ap
Пример #2
0
 def do_compounds(self,tree):
     """ systematically recognizes new_compounds """
     if not tree.has_children():
         return
     if len(tree.get_children()) > 1:
         for (form, nt_cat, pos) in self.new_compounds:
             if tree.label == nt_cat:
                 y = tree.tree_yield_str()
                 if y.lower() == form:
                     # replace children with a compound, of specified cat
                     n = LabelledTree(pos)
                     n.add_child(LabelledTree(y.replace(' ','_')))
                     tree.children = [n]
                     return 
     for child in tree.get_children():
         self.do_compounds(child)
Пример #3
0
 def make_PP(self, nodes):
     pp = LabelledTree('PP')
     # prep is supposed to be the first node
     pp.add_child(nodes[0])
     [np, tail] = self.make_NP(nodes[1:])
     pp.add_child(np)
     if tail <> None:
         pp.add_child(tail)
     return pp
Пример #4
0
 def make_VP(self, nodes):
     vp = LabelledTree('VP')
     # V is supposed to be the first node
     vp.add_child(nodes[0])
     if self.isP(nodes[1]):
         vp.add_child(self.make_PP(nodes[1:]))
     else:
         # sinon on bactracke
         vp.children = nodes
     return vp
Пример #5
0
 def make_COORD(self, nodes):
     coord = LabelledTree('COORD')
     # conjunction is supposed to be the first node
     coord.add_child(nodes[0])
     # if C P ... => coordination of PPs
     if self.isP(nodes[1]):
         pp = self.make_PP(nodes[1:])
         coord.add_child(pp)
         type = 'PP'
     # otherwise = coordination of NPs (APs handled differently)
     else:
         np = self.make_NP(nodes[1:])
         coord.add_child(np)
         type = 'NP'
     return [coord, type]
Пример #6
0
 def do_partitive(self,node,xmlnode):
     node.set_compound_true()
     val = ''
     if xmlnode.hasChildNodes():
         for xmlchild in xmlnode.childNodes:
             if xmlchild.nodeType == xmlchild.TEXT_NODE and not re.match('^\s*$',xmlchild.nodeValue):
                 val = (xmlchild.nodeValue)       
         match = re.match("^\s*([Dd]e)\s+(l(a|'))\s*$",val)
         if match <> None:
             childone = LabelledTree("P")
             childone.set_compound_true()
             childtwo = LabelledTree("D")
             childtwo.set_feature("def")
             childtwo.set_compound_true()
             grandchildone = LabelledTree(match.group(1))
             grandchildtwo = LabelledTree(match.group(2))
             node.add_child(childone)
             node.add_child(childtwo)
             childone.add_child(grandchildone)
             childtwo.add_child(grandchildtwo)
         else:
             match = re.match("^\s*([Dd]([eu]|'))\s*$",val)
             if match <> None :
                 childone = LabelledTree(match.group(1))
                 node.add_child(childone)
             else: 
                 sys.stderr.write("error while reading partitive(" +val+")\n")
Пример #7
0
    def doNode(self,xmlnode):
        node = None
        if xmlnode.nodeName == 'w':                          # extracting the categories of words (terminals)
            if xmlnode.attributes.has_key('cat'):
                node = LabelledTree(self.normalise_wsp(xmlnode.attributes['cat'].value))    
                if xmlnode.attributes.has_key('compound'):
                    node.set_compound_true()
            elif xmlnode.attributes.has_key('catint'):
                 node = LabelledTree(self.normalise_wsp(xmlnode.attributes['catint'].value))
                 node.set_compound_true()
            if  xmlnode.attributes.has_key('subcat'):
                val = self.normalise_wsp(xmlnode.attributes['subcat'].value)
                if val <> '':
                     node.set_feature(val)
            # AJOUT marie : lemma useful!
            if  xmlnode.attributes.has_key('lemma'):
                #val = self.normalise_wsp(unicode(xmlnode.attributes['lemma'].value))
                val = self.normalise_wsp(xmlnode.attributes['lemma'].value)
                if val <> '':
                    node.set_lemma(val)
            node = self.do_morphology(xmlnode,node)
        else:                                                 #default non terminal nodes
             node = LabelledTree(xmlnode.nodeName)
             if xmlnode.attributes.has_key('fct'):
                 # marie : '-' systematically turned into '_' in functional tags
                 #node.set_function(self.normalise_wsp(xmlnode.attributes['fct'].value))
                 # marie : correction of a few erroneous functional tag form : case error, and missing _
                 fun = re.sub('-','_',xmlnode.attributes['fct'].value)
                 fun = string.upper(fun)
                 fun = re.sub('DEOBJ','DE_OBJ',fun)
                 fun = re.sub('AOBJ','A_OBJ',fun)
                 #node.set_function(self.normalise_wsp(re.sub('-','_',xmlnode.attributes['fct'].value)))
                 node.set_function(fun)
        # mathieu : mark the SENTnb in the treenum attribute for each node
        node.set_Treenum(self.treenum)         
        # specific to MFT
        if xmlnode.attributes.has_key('type'):
            node.type = xmlnode.attributes['type'].value

        return node
Пример #8
0
 def make_NP(self, nodes, beforehead=True):
     np = LabelledTree('NP')
     tail = None
     while nodes <> []:
         if nodes[0].label in ['D','N','ET']:
             np.add_child(nodes[0])
             if nodes[0].label == 'N':
                 beforehead = False
             nodes = nodes[1:]
         elif nodes[0].label == 'A':
             if len(nodes) > 2 and nodes[1].label == 'C' and nodes[2].label == 'A':
                 ap = self.make_AP(nodes[0:3],'A C A',beforehead)
                 np.add_child(ap)
                 nodes = nodes[3:]
             else:
                 np.add_child(self.make_AP([nodes[0]], 'A', beforehead))
                 nodes = nodes[1:]
         # if a prep is encountered
         # => treat all remaining nodes as a whole PP
         # (cf. closest attachment preferred)
         # (unhandled case : N1 (P N2) others : where others attaches to N1)
         elif self.isP(nodes[0]):
             pp = self.make_PP(nodes)
             np.add_child(pp)
             nodes = []
         elif nodes[0].label == 'C':
             (coord, type) = self.make_COORD(nodes)
             nodes = []
             if type == 'PP':
                 tail = coord
             else:
                 np.add_child(coord)
     return [np, tail]
Пример #9
0
    def undo_compound(self, tree):
        if not(self.is_compound(tree)): return
        cstr = self.children_labels(tree)
        # N (N0 A1) -> N0 (AP (A1))
        # N (N0 A1 A2) -> N0 (AP (A1)) (AP (A2))
        if cstr == 'N A' or cstr == 'N A A':
            self.inject_subcats_to_compound(tree)
            a1 = LabelledTree("AP")
            a1.add_child(tree.children[1])
            if cstr == 'N A A':
                a2 = LabelledTree("AP")
                a2.add_child(tree.children[2])
                return [tree.children[0], a1, a2]
            return [tree.children[0], a1]
        # N (A0 N1) -> (AP (A1)) N1
        if cstr == 'A N':
            self.inject_subcats_to_compound(tree)
#            n = LabelledTree("AP")
#            n.add_child(tree.children[0])
            return [tree.children[0], tree.children[1]]
        # N (N0 P1 N2) -> N0 (PP (P1 (NP N2)))
        # N (N0 A1 P2 N3) -> N0 (AP (A1) (PP (P2 (NP N3)))
        elif cstr in ['N P N', 'N P+D N', 'N P D N', 'N A P N', 'N A P+D N', 'N A P D N'] :
            self.inject_subcats_to_compound(tree)
            has_adj = tree.children[1].label == 'A'
            np_i = 2
            if has_adj: np_i = 3
            n = LabelledTree("NP")
            n.add_child(tree.children[np_i])
            if len(tree.children) == np_i + 2:
                n.add_child(tree.children[np_i + 1])
            p = LabelledTree("PP")
            p.add_child(tree.children[np_i - 1])
            p.add_child(n)
            if has_adj:
                a = LabelledTree("AP")
                a.add_child(tree.children[1])
                return [tree.children[0], a, p]
            return [tree.children[0], p]
        else:
            return []