示例#1
0
    def _CheckEdge(self, node1id, relation, parentid):
        Reverse = False
        if relation[0] == "~":
            logging.debug("_CheckEdge: Reverse! {}".format(relation))
            Reverse = True
            relation = relation[1:]

        relationid = FeatureOntology.GetFeatureID(relation)
        if Reverse:
            edgecandidates = [
                e for e in self.graph if e[0] == parentid and e[2] == node1id
            ]
        else:
            edgecandidates = [
                e for e in self.graph if e[0] == node1id and e[2] == parentid
            ]

        for edge in sorted(edgecandidates, key=operator.itemgetter(2, 1, 0)):
            if relationid == edge[3]:
                return True
            else:
                edgerelationnode = FeatureOntology.SearchFeatureOntology(
                    edge[3])
                if edgerelationnode and relationid in edgerelationnode.ancestors:
                    if logging.root.isEnabledFor(logging.DEBUG):
                        logging.debug("   Found ontology ancesstor relation!")
                    return True
        return False
示例#2
0
    def _AddEdge(self, node1id, relation, parentid):
        #find the write relation to add, if already have a child relation for the same nodes.
        self.graph.add((node1id, relation, parentid,
                        FeatureOntology.GetFeatureID(relation)))
        # Use ontology to find the ancestors of the relation
        relationid = FeatureOntology.GetFeatureID(relation)
        if FeatureOntology.SearchFeatureOntology(relationid):
            for ancestor in FeatureOntology.SearchFeatureOntology(
                    relationid).ancestors:
                ancestorname = FeatureOntology.GetFeatureName(ancestor)
                if (node1id, ancestorname, parentid, ancestor) in self.graph:
                    self.graph.remove(
                        (node1id, ancestorname, parentid, ancestor))

        #Set the parent to have the relation.
        hasFeatureID = FeatureOntology.GetFeatureID("has" + relation)
        if hasFeatureID >= 0:
            self.nodes[parentid].ApplyFeature(hasFeatureID)
        else:
            logging.error(
                "There is no has{} feature in the feature.txt!".format(
                    relation))
示例#3
0
    def _RemoveEdge(self, node1id, relation, parentid):
        if relation[0] == "~":  #revert
            self._RemoveEdge(parentid, relation[1:], node1id)
            return

        relationid = FeatureOntology.GetFeatureID(relation)

        for edge in [
                e for e in self.graph if e[0] == node1id and e[2] == parentid
        ]:
            if relationid == edge[
                    3] or relationid in FeatureOntology.SearchFeatureOntology(
                        edge[3]).ancestors:
                self.graph.remove(edge)
示例#4
0
    def newnode(self, start, count, compound=False):
        #logging.info("new node: start=" + str(start) + " count=" + str(count))
        if not self.head:
            raise RuntimeError(
                "This SentenceLinkedList is null! Can't combine.")
        if start + count > self.size:
            logging.error(self.__str__())
            raise RuntimeError("Can't get " + str(count) +
                               " items start from " + str(start) +
                               " from the sentence!")

        startnode = self.get(start)
        endnode = self.get(start + count - 1)
        p = startnode
        sons = []
        EndOffset = p.StartOffset
        NewText = ""
        NewNorm = ""
        NewAtom = ""
        hasUpperRelations = []
        for i in range(count):
            if i == 0:
                spaces = ""
            else:
                if compound:
                    spaces = "_"
                else:
                    spaces = " " * (p.StartOffset - EndOffset)
            EndOffset = p.EndOffset
            NewText += spaces + p.text
            NewNorm += spaces + p.norm
            NewAtom += spaces + p.atom
            if p.UpperRelationship and p.UpperRelationship != 'H':
                hasUpperRelations.append(
                    FeatureOntology.GetFeatureID("has" + p.UpperRelationship))
            sons.append(p)
            p = p.next

        NewNode = SentenceNode(NewText)
        NewNode.norm = NewNorm
        NewNode.atom = NewAtom
        NewNode.sons = sons
        NewNode.StartOffset = startnode.StartOffset
        NewNode.EndOffset = endnode.EndOffset
        Lexicon.ApplyWordLengthFeature(NewNode)
        for haverelation in hasUpperRelations:
            NewNode.ApplyFeature(haverelation)
        return NewNode, startnode, endnode
示例#5
0
def InitGlobalFeatureID():
    global FeatureID_JS, FeatureID_JS2, FeatureID_JM2, FeatureID_JM, FeatureID_0
    global FeatureID_CD, FeatureID_punc, FeatureID_SYM, FeatureID_NNP, FeatureID_External
    global FeatureID_OOV, FeatureID_CM, FeatureID_NEW, FeatureID_SpaceQ, FeatureID_SpaceH, FeatureID_FULLSTRING
    global FeatureID_VB, FeatureID_Ved, FeatureID_Ving
    global FeatureID_H, FeatureID_Subj, FeatureID_Obj, FeatureID_Pred
    global FeatureID_AC, FeatureID_NC, FeatureID_VC, FeatureID_comPair
    global FeatureID_HIT, FeatureID_HIT2, FeatureID_HIT3
    if not FeatureID_JS2:
        import FeatureOntology
        FeatureID_JS = FeatureOntology.GetFeatureID("JS")
        FeatureID_JS2 = FeatureOntology.GetFeatureID("JS2")
        FeatureID_JM2 = FeatureOntology.GetFeatureID("JM2")
        FeatureID_JM = FeatureOntology.GetFeatureID("JM")
        FeatureID_0 = FeatureOntology.GetFeatureID("0")
        FeatureID_CD = FeatureOntology.GetFeatureID("CD")
        FeatureID_punc = FeatureOntology.GetFeatureID("punc")
        FeatureID_SYM = FeatureOntology.GetFeatureID("SYM")
        FeatureID_NNP = FeatureOntology.GetFeatureID("NNP")
        FeatureID_External = FeatureOntology.GetFeatureID("External")
        FeatureID_OOV = FeatureOntology.GetFeatureID("OOV")
        FeatureID_CM = FeatureOntology.GetFeatureID("CM")
        FeatureID_NEW = FeatureOntology.GetFeatureID("NEW")
        FeatureID_SpaceQ = FeatureOntology.GetFeatureID("spaceQ")
        FeatureID_SpaceH = FeatureOntology.GetFeatureID("spaceH")
        FeatureID_FULLSTRING = FeatureOntology.GetFeatureID("FULLSTRING")
        FeatureID_VB = FeatureOntology.GetFeatureID("VB")
        FeatureID_Ved = FeatureOntology.GetFeatureID("Ved")
        FeatureID_Ving = FeatureOntology.GetFeatureID("Ving")

        FeatureID_H = FeatureOntology.GetFeatureID("H")
        FeatureID_Subj = FeatureOntology.GetFeatureID("Subj")
        FeatureID_Obj = FeatureOntology.GetFeatureID("Obj")
        FeatureID_Pred = FeatureOntology.GetFeatureID("Pred")

        FeatureID_AC = FeatureOntology.GetFeatureID("AC")
        FeatureID_NC = FeatureOntology.GetFeatureID("NC")
        FeatureID_VC = FeatureOntology.GetFeatureID("VC")

        FeatureID_HIT = FeatureOntology.GetFeatureID("HIT")
        FeatureID_HIT2 = FeatureOntology.GetFeatureID("HIT2")
        FeatureID_HIT3 = FeatureOntology.GetFeatureID("HIT3")

        FeatureID_comPair = FeatureOntology.GetFeatureID("comPair")

        FeatureOntology.BarTagIDs = [[
            FeatureOntology.GetFeatureID(t) for t in row
        ] for row in FeatureOntology.BarTags]
        for IDList in FeatureOntology.BarTagIDs:
            FeatureOntology.BarTagIDSet.update(set(IDList))
        FeatureOntology.SentimentTagIDSet = [
            FeatureOntology.GetFeatureID(t)
            for t in FeatureOntology.SentimentTags
        ]
        FeatureOntology.SentimentTagIDSet = set(
            FeatureOntology.SentimentTagIDSet)
示例#6
0
def OutputStringTokens_onelinerSA_ben(dag):
    sentimentfeature = [
        "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key",
        "Value"
    ]
    sentimentfeatureids = [
        FeatureOntology.GetFeatureID(f) for f in sentimentfeature
    ]
    sentimentfeatureidset = set(sentimentfeatureids)
    nodes = dag.nodes
    nodelist = list(nodes.values())
    nodelist.sort(key=lambda x: x.StartOffset)
    FeatureID_Key = FeatureOntology.GetFeatureID("Key")
    FeatureID_Value = FeatureOntology.GetFeatureID("Value")
    outputdict = []
    for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)):
        node1 = nodes.get(edge[2])
        node2 = nodes.get(edge[0])
        sentimentnode = {}

        if FeatureID_Key in node1.features and FeatureID_Value in node2.features:
            sentimentnode["keyid"] = node1.ID
            sentimentnode["key"] = node1.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valuyeid"] = node2.ID
            sentimentnode["value"] = node2.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

        if FeatureID_Key in node2.features and FeatureID_Value in node1.features:
            sentimentnode["keyid"] = node2.ID
            sentimentnode["key"] = node2.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valueid"] = node1.ID
            sentimentnode["value"] = node1.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

    for nid in dag.nodes:
        node = dag.nodes[nid]
        if sentimentfeatureidset.intersection(node.features):
            Existed = False
            for snode in outputdict:
                if snode["keyid"] == node.ID or snode["valueid"] == node.ID:
                    Existed = True
                    break
            if not Existed:
                sentimentnode = {}
                sentimentnode["keyid"] = -1
                sentimentnode["key"] = "_Emo"
                sentimentnode["keyfeatures"] = []
                sentimentnode["valueid"] = node.ID
                sentimentnode["value"] = node.text
                sentimentnode["valuefeatures"] = [
                    FeatureOntology.GetFeatureName(f) for f in node.features
                    if f in sentimentfeatureids
                ]
                outputdict.append(sentimentnode)

    return json.dumps(outputdict,
                      default=lambda o: o.__dict__,
                      sort_keys=True,
                      ensure_ascii=False)
示例#7
0
    def ApplyActions(self, actinstring):
        #self.FailedRuleTokens.clear()
        Actions = actinstring.split()
        #logging.debug("Word:" + self.text)

        if "NEW" in Actions:
            self.features = set()
        if "NEUTRAL" in Actions:
            FeatureOntology.ProcessSentimentTags(self.features)

        HasBartagAction = False
        for Action in Actions:
            # if Action == "NEW":
            #     continue  # already process before.
            # if Action == "NEUTRAL":
            #     continue  # already process before.

            if Action[-1] == "-":
                if Action[0] == "^":  #Remove UpperRelationship
                    if "." in Action:
                        if self.UpperRelationship == Action.split(".",
                                                                  1)[1][-1]:
                            # TODO:  actually break the token. not just delattr
                            delattr(self, "UpperRelationship")
                            logging.warning(
                                " TODO:  actually break the token. not just delattr Remove Relationship:"
                                + Action)
                    else:
                        logging.warning("This Action is not right:" + Action)
                    continue

                FeatureID = FeatureOntology.GetFeatureID(Action.strip("-"))
                if FeatureID in self.features:
                    self.features.remove(FeatureID)
                continue

            if Action[-1] == "+":
                if Action[-2] == "+":
                    if Action[-3] == "+":  #"+++"
                        self.ApplyFeature(utils.FeatureID_0)
                        self.sons = []  #remove the sons of this
                        self.Head0Text = ''  #remove Head0Text.

                    else:  #"X++":
                        #this should be in a chunk, only apply to the new node
                        HasBartagAction = True
                        FeatureID = FeatureOntology.GetFeatureID(
                            Action.strip("++"))
                        self.ApplyFeature(FeatureID)
                else:  #"X+"
                    #MajorPOSFeatures = ["A", "N", "P", "R", "RB", "X", "V"]
                    #Feb 20, 2018: use the BarTagIDs[0] as the MajorPOSFeatures.
                    for bar0id in FeatureOntology.BarTagIDs[0]:
                        if bar0id in self.features:
                            self.features.remove(bar0id)

                    for bar0id in [
                            utils.FeatureID_AC, utils.FeatureID_NC,
                            utils.FeatureID_VC
                    ]:
                        if bar0id in self.features:
                            self.features.remove(bar0id)

                    FeatureID = FeatureOntology.GetFeatureID(Action.strip("+"))
                    self.ApplyFeature(FeatureID)
                continue

            if Action[0] == "^":
                if "." in Action:
                    self.UpperRelationship = Action.split(".")[-1]
                    RelationActionID = FeatureOntology.GetFeatureID(
                        self.UpperRelationship)
                    if RelationActionID != -1:
                        self.ApplyFeature(RelationActionID)
                    else:
                        logging.warning("Wrong Relation Action to apply:" +
                                        self.UpperRelationship +
                                        " in action string: " + actinstring)
                    # apply this "has" to the parent (new) node (chunk)
                    # RelationActionID = FeatureOntology.GetFeatureID("has" + self.UpperRelationship)
                    # if RelationActionID != -1:
                    #     self.ApplyFeature(RelationActionID)
                    # else:
                    #     logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring)

                else:
                    logging.error(
                        "The Action is wrong: It does not have dot to link to proper pointer"
                    )
                    logging.error("  actinstring:" + actinstring)
                    self.UpperRelationship = Action[1:]
                continue

            if Action[0] == '\'':
                #Make the norm of the token to this key
                self.norm = Action[1:-1]
                continue
            if Action[0] == '%':
                #Make the pnorm of the token to this key
                self.pnorm = Action[1:-1]
                continue
            if Action[0] == '/':
                #Make the atom of the token to this key
                self.atom = Action[1:-1]
                continue
            ActionID = FeatureOntology.GetFeatureID(Action)
            if ActionID != -1:
                self.ApplyFeature(ActionID)
            else:
                logging.warning("Wrong Action to apply:" + Action +
                                " in action string: " + actinstring)

                # strtokens[StartPosition + i + GoneInStrTokens].features.add(ActionID)
        if HasBartagAction:  #only process bartags if there is new bar tag, or trunking (in the combine() function)
            FeatureOntology.ProcessBarTags(self.features)
示例#8
0
    def ApplyDagActions(self, OpenNode, node, actinstring, rule):
        iepairmatch = re.search("(#.*#)", actinstring)
        if iepairmatch:
            ieaction = iepairmatch.group(1)
            actinstring = actinstring.replace(iepairmatch.group(1), '')
            self.ApplyDagActions_IEPair(OpenNode, node, rule, ieaction)

        Actions = actinstring.split()

        for Action in copy.copy(Actions):
            if "---" in Action:
                ParentPointer = Action[:Action.rfind(
                    '.')]  #find pointer up the the last dot "."
                parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer,
                                                    rule, node.ID)
                if not parentnodeid:
                    return

                if "~---" in Action:
                    self.graph = set([
                        edge for edge in self.graph
                        if edge[0] != parentnodeid or edge[2] != node.ID
                    ])
                    logging.debug(
                        "Dag Action {}: Removed all edge from {} to {}".format(
                            Action, parentnodeid, node.ID))
                else:
                    self.graph = set([
                        edge for edge in self.graph
                        if edge[0] != node.ID or edge[2] != parentnodeid
                    ])
                    logging.debug(
                        "Dag Action {}: Removed all edge from {} to {}".format(
                            Action, parentnodeid, node.ID))
                Actions.pop(Actions.index(Action))

        for Action in sorted(Actions, key=lambda d: (d[-1])):
            if Action[0] == '^':
                ParentPointer = Action[:Action.rfind(
                    '.')]  #find pointer up the the last dot "."
                parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer,
                                                    rule, node.ID)
                if not parentnodeid:
                    return

                #logging.warning("DAG Action: This action {} to apply, parent id={}".format(Action, parentnodeid))
                if Action[-1] == "-":  # remove
                    relation = Action[Action.rfind('.') + 1:-1]
                    self._RemoveEdge(node.ID, relation, parentnodeid)
                else:
                    relation = Action[Action.rfind('.') + 1:]
                    newedge = [node.ID, relation, parentnodeid]
                    if logging.root.isEnabledFor(logging.DEBUG):
                        logging.debug(
                            "DAG Action:Adding new edge: {}".format(newedge))

                    self._AddEdge(node.ID, relation, parentnodeid)

                    RelationActionID = FeatureOntology.GetFeatureID(relation)
                    if RelationActionID != -1:
                        node.ApplyFeature(RelationActionID)
                    else:
                        logging.warning(
                            "Wrong Relation Action to apply: {} in action string: {}"
                            .format(relation, actinstring))
                continue

            if Action[-1] == "-":
                FeatureID = FeatureOntology.GetFeatureID(Action.strip("-"))
                if FeatureID in node.features:
                    node.features.remove(FeatureID)
                continue

            if Action[-1] == "+":
                if Action[-2] == "+":
                    if Action[-3] == "+":  #"+++"
                        logging.error(
                            "There should be no +++ operation in DAG.")
                    else:  #"X++":
                        FeatureID = FeatureOntology.GetFeatureID(
                            Action.strip("++"))
                        node.ApplyFeature(FeatureID)
                else:  #"X+"
                    for bar0id in FeatureOntology.BarTagIDs[0]:
                        if bar0id in node.features:
                            node.features.remove(bar0id)

                    for bar0id in [
                            utils.FeatureID_AC, utils.FeatureID_NC,
                            utils.FeatureID_VC
                    ]:
                        if bar0id in node.features:
                            node.features.remove(bar0id)

                    FeatureID = FeatureOntology.GetFeatureID(Action.strip("+"))
                    node.ApplyFeature(FeatureID)
                continue

            if Action[0] == '\'':
                #Make the norm of the token to this key
                node.norm = Action[1:-1]
                continue
            if Action[0] == '%':
                # Make the pnorm of the token to this key
                node.pnorm = Action[1:-1]
                continue
            if Action[0] == '/':
                #Make the atom of the token to this key
                node.atom = Action[1:-1]
                continue
            ActionID = FeatureOntology.GetFeatureID(Action)
            if ActionID != -1:
                node.ApplyFeature(ActionID)
            else:
                logging.warning("Wrong Action to apply:" + Action +
                                " in action string: " + actinstring)

            if Action == "NEUTRAL":
                FeatureOntology.ProcessSentimentTags(node.features)
示例#9
0
    def FindPointerNode(self, openID, SubtreePointer, rule, CurrentNodeID):
        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("Dag.FindPointerNode for {}".format(SubtreePointer))
        if (openID, SubtreePointer, rule.ID) in self.FindPointerNode_Cache:
            #logging.debug("FindPointerNode_Cache: hit!")
            return self.FindPointerNode_Cache[(openID, SubtreePointer,
                                               rule.ID)]

        if len(SubtreePointer) >= 1 and SubtreePointer[0] == '^':
            SubtreePointer = SubtreePointer[1:]

        nodeID = None
        if "+" in SubtreePointer:
            logging.warning(
                "There is + sign in SubtreePointer of FindPointerNode(): {} ".
                format(rule))
        for AndCondition in SubtreePointer.split("+"):
            Negation = False
            if len(AndCondition) > 1 and AndCondition[0] == "!":
                #logging.warning("FindPointerNode: Negation! {}".format(SubtreePointer))
                Negation = True
                AndCondition = AndCondition[1:]

            if "." in AndCondition:
                pointer, relations = AndCondition.split(".", 1)
            else:
                pointer, relations = [AndCondition, ""]
            #pointers = SubtreePointer.split(".")  # Note: here Pointer (subtreepointer) does not have "^"
            #logging.debug("tree:{}".format(pointers))
            # if len(pointers) <=1:
            #     #logging.error("Should have more than 1 pointers! Can't find {} in graph {}".format(SubtreePointer, self.graph))
            #     return openID
            nodeID = None
            if pointer == '':
                nodeID = openID
            elif pointer == '~':
                #logging.warning("THIS POINTER in {}".format(rule))
                nodeID = CurrentNodeID
            else:
                if pointer.isdigit():
                    pointer_num = int(pointer)
                    try:
                        nodeID = rule.Tokens[pointer_num].MatchedNodeID
                    except AttributeError as e:  #AttributeError: 'RuleToken' object has no attribute 'MatchedNodeID'
                        logging.error(e)
                        logging.error(
                            "FindPointerNode: The rule is written error, because the reference token is not yet matched. Please rewrite!"
                        )
                        logging.info(rule)
                        return None
                    except IndexError as e:
                        logging.error(e)
                        logging.error(
                            "FindPointerNode: The rule is written error, failed to find pointer {} IndexError!"
                            .format(pointer_num))
                        logging.info(rule)
                        return None

                else:
                    pointer = "^" + pointer
                    #logging.info("Finding pointer node {} from TempPointer".format(pointer))
                    for nodeid in sorted(self.nodes):
                        #logging.debug("DAG.FindPointerNode: evaluating temppointer {} in {} with pointer {}".format(self.nodes[nodeid].TempPointer, self.nodes[nodeid].text, pointer))
                        if self.nodes[nodeid].TempPointer == pointer:
                            #logging.debug("Matched nodeid {}".format(nodeid))
                            nodeID = nodeid
                            break
                #logging.warning("after looping over the nodes, nodeID={}".format(nodeID))
            if nodeID and relations:
                for relation in relations.split("."):
                    Found = False
                    # if relation == "LEFT":
                    #     nodeID = self.LinearNodeOffset(nodeID, -1)
                    #     if nodeID:
                    #         Found = True
                    # elif relation == "RIGHT":
                    #     nodeID = self.LinearNodeOffset(nodeID, 1)
                    #     if nodeID:
                    #         Found = True
                    # else:
                    relationid = FeatureOntology.GetFeatureID(relation)
                    for edge in sorted(self.graph,
                                       key=operator.itemgetter(2, 1, 0)):
                        #logging.debug("Evaluating edge{} with relation {}, node {}".format(edge, relation, nodeID))
                        if edge[2] == nodeID:
                            if relationid == edge[
                                    3]:  # or relationid in FeatureOntology.SearchFeatureOntology(edge[3]):
                                nodeID = edge[0]
                                Found = True
                                if logging.root.isEnabledFor(logging.DEBUG):
                                    logging.debug("   Found!")
                                break
                            else:
                                edgerelationnode = FeatureOntology.SearchFeatureOntology(
                                    edge[3])
                                if edgerelationnode and relationid in edgerelationnode.ancestors:
                                    nodeID = edge[0]
                                    Found = True
                                    if logging.root.isEnabledFor(
                                            logging.DEBUG):
                                        logging.debug(
                                            "   Found ontology ancesstor relation!"
                                        )
                                    break

                    if not Found:
                        if not Negation:
                            return None
                        #logging.warning("Failed to find pointer {} in graph {}".format(SubtreePointer, self))
                        #return None     #Can't find the pointers.

                    #logging.info("Found this node {} for these pointers:{}".format(nodeID, pointers))

        if nodeID:
            self.FindPointerNode_Cache[(openID, SubtreePointer,
                                        rule.ID)] = nodeID
            return nodeID
        else:
            logging.warning("Can't find {} pointer in this rule{}".format(
                SubtreePointer, rule))
            return None
示例#10
0
def GetFeatureID(word):
    return jsonpickle.encode(FeatureOntology.GetFeatureID(word))