示例#1
0
 def padTrigramFeatures(self, maxNumTrigrams, posPaddingVector,
                        depPaddingVector):
     lenTrigrams = len(self.trigramFeatures)
     while lenTrigrams < maxNumTrigrams:
         iFeatures = Features()
         iFeatures.addFeature("lemmaw1", -1)
         iFeatures.addFeature("lemmaw2", -1)
         iFeatures.addFeature("lemmaw3", -1)
         iFeatures.addFeature("posw1", posPaddingVector)
         iFeatures.addFeature("posw2", posPaddingVector)
         iFeatures.addFeature("posw3", posPaddingVector)
         iFeatures.addFeature("labelw1", depPaddingVector)
         iFeatures.addFeature("labelw2", depPaddingVector)
         iFeatures.addFeature("labelw3", depPaddingVector)
         self.trigramFeatures.append(iFeatures)
         lenTrigrams += 1
示例#2
0
 def padBigramFeatures(self, maxNumBigrams, posPaddingVector,
                       depPaddingVector):
     lenBigrams = len(self.bigramFeatures)
     while lenBigrams < maxNumBigrams:
         iFeatures = Features()
         iFeatures.addFeature("labelw1", depPaddingVector)
         iFeatures.addFeature("labelhead", depPaddingVector)
         iFeatures.addFeature("labelw2", depPaddingVector)
         iFeatures.addFeature("lemmaw1", -1)
         iFeatures.addFeature("lemmaw2", -1)
         iFeatures.addFeature("posw1", posPaddingVector)
         iFeatures.addFeature("posw2", posPaddingVector)
         iFeatures.addFeature("num-childrenw1", -1)
         iFeatures.addFeature("num-childrenw2", -1)
         iFeatures.addFeature("poshead", posPaddingVector)
         iFeatures.addFeature("poschild", posPaddingVector)
         self.bigramFeatures.append(iFeatures)
         lenBigrams += 1
示例#3
0
    def computeAtomicFeatures(self):
        for node in self.node_list:
            iFeatures = Features()

            lemma = node.conllLine.lemma
            iFeatures.addFeature("lemma", lemma)

            label = node.conllLine.deprel
            iFeatures.addFeature("label",
                                 self.getVectorRepresentation(label, "dep"))
            pos = node.conllLine.pos
            iFeatures.addFeature("pos",
                                 self.getVectorRepresentation(pos, "pos"))

            children = self.getChildren(node)
            grandChildren = self.getGrandChildren(children)

            num_children = len(children)
            iFeatures.addFeature("num-children", num_children)

            num_grandChildren = len(grandChildren)
            iFeatures.addFeature("num-grandchildren", num_grandChildren)

            labels_children = []
            pos_children = []
            for child in children:
                labels_children.append(child.conllLine.deprel)
                pos_children.append(child.conllLine.pos)

            # [numberNmods, numberPmods, .... totalNumberOfDeps] same with pos
            lenPos = len(self.dictPos)
            lenDep = len(self.dictDep)
            i = 0
            j = 0
            posVector = []
            depVector = []
            while i < lenPos:
                posVector.append(0)
                i += 1
            while j < lenDep:
                depVector.append(0)
                j += 1

            for labelChild in labels_children:
                position = self.dictDep[labelChild]
                depVector[position] += 1

            for posChild in pos_children:
                position = self.dictPos[posChild]
                posVector[position] += 1

            iFeatures.addFeature("labels-children",
                                 "".join(map(str, depVector)))
            iFeatures.addFeature("pos-children", "".join(map(str, posVector)))
            node.atomicFeatures = iFeatures
示例#4
0
    def computeGlobalFeatures(self):
        iFeatures = Features()

        labelw1 = self.node_list[0].conllLine.deprel
        iFeatures.addFeature("labelw1",
                             self.getVectorRepresentation(labelw1, "dep"))

        #GET EMBEDDING
        lemmaRoot = self.root.lemma
        iFeatures.addFeature("lemmaRoot", lemmaRoot)

        #GET EMBEDDING
        lemmaw1 = self.node_list[0].conllLine.lemma
        iFeatures.addFeature("lemmaw1", lemmaw1)

        posw1 = self.node_list[0].conllLine.pos
        iFeatures.addFeature("posw1",
                             self.getVectorRepresentation(posw1, "pos"))

        if len(self.node_list) > 1:
            posw2 = self.node_list[1].conllLine.pos
            iFeatures.addFeature("posw2",
                                 self.getVectorRepresentation(posw2, "pos"))
            poswn_1 = self.node_list[-2].conllLine.pos
            iFeatures.addFeature("poswn_1",
                                 self.getVectorRepresentation(poswn_1, "pos"))
            labelwn_1 = self.node_list[-2].conllLine.deprel
            iFeatures.addFeature(
                "labelwn_1", self.getVectorRepresentation(labelwn_1, "dep"))

            #GET EMBEDDING
            lemmawn_1 = self.node_list[-2].conllLine.lemma
            iFeatures.addFeature("lemmawn_1", lemmawn_1)
        else:
            iFeatures.addFeature(
                "posw2", self.getVectorRepresentation("<PADDING>", "pos"))
            iFeatures.addFeature(
                "poswn_1", self.getVectorRepresentation("<PADDING>", "pos"))
            iFeatures.addFeature(
                "labelwn_1", self.getVectorRepresentation("<PADDING>", "dep"))
            #EMBEDDING PADDING
            iFeatures.addFeature("lemmawn_1", "NOLEMMA")

        if len(self.node_list) > 2:
            posw3 = self.node_list[2].conllLine.pos
            iFeatures.addFeature("posw3",
                                 self.getVectorRepresentation(posw3, "pos"))
            poswn_2 = self.node_list[-3].conllLine.pos
            iFeatures.addFeature("poswn_2",
                                 self.getVectorRepresentation(poswn_2, "pos"))
        else:
            iFeatures.addFeature(
                "posw3", self.getVectorRepresentation("<PADDING>", "pos"))
            iFeatures.addFeature(
                "poswn_2", self.getVectorRepresentation("<PADDING>", "pos"))

        if len(self.node_list) > 3:
            poswn_3 = self.node_list[-4].conllLine.pos
            iFeatures.addFeature("poswn_3",
                                 self.getVectorRepresentation(poswn_3, "pos"))
        else:
            iFeatures.addFeature(
                "poswn_3", self.getVectorRepresentation("<PADDING>", "pos"))

        question = 0
        for node in self.node_list:
            if "?" in node.conllLine.form:
                question = 1
                break

        iFeatures.addFeature("question", question)
        self.globalFeatures = iFeatures
示例#5
0
    def computeTrigramFeatures(self):
        for parent in self.node_list:
            children = self.getChildren(parent)
            for child in children:
                grandChildren = self.getChildren(child)
                for grandChild in grandChildren:
                    iFeatures = Features()

                    #HEAD(w1,w2,w3) ??
                    lemmaw1 = parent.conllLine.lemma
                    iFeatures.addFeature("lemmaw1", lemmaw1)

                    lemmaw2 = child.conllLine.lemma
                    iFeatures.addFeature("lemmaw2", lemmaw2)

                    lemmaw3 = grandChild.conllLine.lemma
                    iFeatures.addFeature("lemmaw3", lemmaw3)

                    posw1 = parent.conllLine.pos
                    iFeatures.addFeature(
                        "posw1", self.getVectorRepresentation(posw1, "pos"))

                    posw2 = child.conllLine.pos
                    iFeatures.addFeature(
                        "posw2", self.getVectorRepresentation(posw2, "pos"))

                    posw3 = grandChild.conllLine.pos
                    iFeatures.addFeature(
                        "posw3", self.getVectorRepresentation(posw3, "pos"))

                    labelw1 = parent.conllLine.deprel
                    iFeatures.addFeature(
                        "labelw1",
                        self.getVectorRepresentation(labelw1, "dep"))

                    labelw2 = child.conllLine.deprel
                    iFeatures.addFeature(
                        "labelw2",
                        self.getVectorRepresentation(labelw2, "dep"))

                    labelw3 = grandChild.conllLine.deprel
                    iFeatures.addFeature(
                        "labelw3",
                        self.getVectorRepresentation(labelw3, "dep"))
                    parent.trigramFeatures.append(iFeatures)
示例#6
0
    def computeBigramFeatures(self):
        for parent in self.node_list:
            children = self.getChildren(parent)
            bigrams = []
            for child in children:
                iFeatures = Features()
                labelw1 = parent.conllLine.deprel
                iFeatures.addFeature(
                    "labelw1", self.getVectorRepresentation(labelw1, "dep"))

                labelhead = parent.conllLine.deprel
                iFeatures.addFeature(
                    "labelhead",
                    self.getVectorRepresentation(labelhead, "dep"))

                labelw2 = child.conllLine.deprel
                iFeatures.addFeature(
                    "labelw2", self.getVectorRepresentation(labelw2, "dep"))

                lemmaw1 = parent.conllLine.lemma
                iFeatures.addFeature("lemmaw1", lemmaw1)

                lemmaw2 = child.conllLine.lemma
                iFeatures.addFeature("lemmaw2", lemmaw2)

                posw1 = parent.conllLine.pos
                iFeatures.addFeature(
                    "posw1", self.getVectorRepresentation(posw1, "pos"))

                posw2 = child.conllLine.pos
                iFeatures.addFeature(
                    "posw2", self.getVectorRepresentation(posw2, "pos"))

                num_childrenw1 = len(children)
                iFeatures.addFeature("num-childrenw1", num_childrenw1)

                childrenw2 = self.getChildren(child)
                num_childrenw2 = len(childrenw2)
                iFeatures.addFeature("num-childrenw2", num_childrenw2)

                poshead = parent.conllLine.pos
                iFeatures.addFeature(
                    "poshead", self.getVectorRepresentation(poshead, "pos"))

                poschild = child.conllLine.pos
                iFeatures.addFeature(
                    "poschild", self.getVectorRepresentation(poschild, "pos"))

                parent.bigramFeatures.append(iFeatures)