Пример #1
0
def findiobjextract(patt, node, root):
    whole, head = "", ""
    parent = treeops.getparentphrase(node, root)
    current = parent
    while current.getnext() is not None:
        current = current.getnext()
        text = treeops.text(current)
        if "advp" in text or "qual" in text or "timex" in text or "pp" in text or "vp" in text or text in "obj" or text in "np" or text in "nps" or text in "ap" or text in "aps":
            continue
        elif "iobj" in text:
            whole, head, excase = treeops.getextractphrase(current, root)
            break
        else:
            break
    if not head:
        current = parent
        while current.getprevious() is not None:
            current = current.getprevious()
            text = treeops.text(current)
            if "advp" in text or "pp" in text or "qual" in text or "timex" in text or "vp" in text or text in "obj" or text in "np" or text in "nps" or text in "ap" or text in "aps":
                continue
            elif "iobj" in text:
                whole, head, excase = treeops.getextractphrase(current, root)
                break
            else:
                break
    if not head:
        return "", ""
    pattcase = helpers.getpatterncase(patt)
    if pattcase not in excase and excase not in pattcase:
        return "", ""
    else:
        return whole, head
Пример #2
0
def getobjtrigger(node, root):
    verbtype, myverb = "active", ""
    current = node
    if "<" in treeops.text(node):
        while current.getprevious() is not None:
            current = current.getprevious()
            text = treeops.text(current)
            if "iobj" in text or "advp" in text or "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
                continue
            elif "vp" in text:
                myverb = treeops.gettriggerverb(current)
                if "vpp-comp" in text:
                    verbtype = "passive"
                    continue
                break
            elif "pp" in text:
                continue
            else:
                break
    elif ">" in node.text:
        while current.getnext() is not None:
            current = current.getnext()
            text = current.text
            if "iobj" in text or "advp" in text or "np" in text or text in "ap" or text in "aps":
                continue
            elif "vp" in text:
                myverb = treeops.gettriggerverb(current)
                loopcurrent = current
                ltext = treeops.text(loopcurrent)
                while loopcurrent.getnext() is not None:
                    loopcurrent = loopcurrent.getnext()
                    if "advp" in ltext or "np" in ltext or ltext in "ap" or ltext in "aps":
                        continue
                    elif "vpi" in ltext or "vps" in ltext or "vpp" in ltext or "vpg" in ltext:
                        myverb = treeops.gettriggerverb(loopcurrent)
                        continue  #might be many in a row, only want the last one
                    elif "pp" in ltext:
                        continue
                    else:
                        break
            elif "pp" in text:
                continue
            else:
                break
    if not myverb:
        return "", ""
    return myverb, verbtype
Пример #3
0
 def patternfinder(self, sentence, lemmas):
     root = treeops.getroot(sentence)
     trigger, triggertype = "", ""
     root = treeops.addlemmas(root, lemmas)
     for node in root.iter():  #Looking for NPs
         text = treeops.text(node)
         yesterfound = False
         if "done" in helpers.cleantag(
                 node.tag) or "sentence" in helpers.cleantag(node.tag):
             continue
         elif "np" in text or "ap-obj" in text or "ap-comp" in text or treeops.yester(
                 node):  #to account for ap-obj and ap-comp
             if treeops.yester(node):
                 trigger, triggertype = triggerops.getyestertrigger(
                     node, root)
                 yesterfound = True
             myphrase, myhead, case = treeops.getnounphrase(node, root)
             if not case:
                 if yesterfound:
                     pass
                 else:
                     continue  #Don't want a pattern with no case, unless timex
             elif "qual" in text or "timex" in text:
                 continue
             elif "subj" in text:
                 trigger, triggertype = triggerops.getsubjtrigger(
                     node, root)
                 extract = "subj"
             elif "comp" in text:
                 trigger, triggertype = triggerops.getcomptrigger(
                     node, root)
                 extract = "comp"
             elif "iobj" in text:
                 trigger, triggertype = triggerops.getiobjtrigger(
                     node, root)
                 extract = "iobj"
             elif "obj" in text:  #objnom is included in "obj". AP-OBJ is covered here
                 trigger, triggertype = triggerops.getobjtrigger(node, root)
                 extract = "obj"
             elif not case:
                 continue  #nothing to collect
             else:  #NPs inside PPs
                 trigger, triggertype = triggerops.getpreptrigger(
                     node, root)
                 extract = "noun"
             if yesterfound:
                 extract = "noun"
                 case = "nom"
                 myphrase = "gær"
                 myhead = "gær"
             if not trigger or not triggertype or not case:
                 continue
             idkey = extract + "|" + case + "|" + trigger + "|" + triggertype
             if idkey not in self.allpatterns:
                 self.allpatterns[idkey] = [
                     0, []
                 ]  # NOTE that the freq and extract list will be filled out in extractfinder()
     return
Пример #4
0
def findsubjextract(patt, node, root):
    whole, head = "", ""
    parent = treeops.getparentphrase(node, root)
    current = parent
    while current.getprevious() is not None:
        current = current.getprevious()
        text = treeops.text(current)
        if "advp" in text or "pp" in text or "vp" in text or "qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
            continue
        elif "subj" in text:
            whole, head, excase = treeops.getextractphrase(current, root)
            break
        elif "scp" in text:  # =Maðurinn= sem *beit* hestinn
            newcurrent = current.getprevious()
            ntext = treeops.text(current)
            if "advp" in ntext or "pp" in ntext or "qual" in ntext or "timex" in ntext or ntext in "np" or ntext in "nps" or ntext in "ap" or ntext in "aps":
                continue
            elif "subj" in ntext:
                whole, head, excase = treeops.getextractphrase(current, root)
                break
        else:
            break
    if not head:
        current = parent
        while current.getnext() is not None:
            current = current.getnext()
            text = treeops.text(current)
            if "advp" in text or "pp" in text or "qual" in text or "timex" in text or "vp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
                continue
            elif "subj" in text:
                whole, head, excase = treeops.getextractphrase(current, root)
                break
            else:
                break
    if not head:  # Looking  for AP-COMP acting as subj
        current = parent
        while current.getprevious() is not None:
            current = current.getprevious()
            text = treeops.text(current)
            if "advp" in text or "pp" in text or "vp" in text or "qual" in text or "timex" in text or text in "np" or text in "nps":
                continue
            elif text in "ap-comp" or text in "aps-comp":
                whole, head, excase = treeops.getextractphrase(current, root)
                break
            elif "scp" in text:  # =Maðurinn= sem *beit* hestinn
                newcurrent = current.getprevious()
                ntext = treeops.text(current)
                if "advp" in ntext or "pp" in ntext or "qual" in ntext or "timex" in ntext or ntext in "np" or ntext in "nps":
                    continue
                elif ntext in "ap-comp" or ntext in "aps-comp":
                    whole, head, excase = treeops.getextractphrase(
                        current, root)
                    break
            else:
                break
        if head:
            print "yay, ap-comp acting as np-subj! {}".format(whole)
    if not head:
        return "", ""
    pattcase = helpers.getpatterncase(patt)
    if pattcase not in excase and excase not in pattcase:
        return "", ""
    else:
        return whole, head
Пример #5
0
def findppextract(patt, node, root):
    whole, head, case = "", "", ""
    parts, prep = [], []
    parent = treeops.getparentphrase(node, root)
    current = parent
    while current.getnext() is not None:
        current = current.getnext()
        text = treeops.text(current)
        if treeops.yester(
                current
        ):  # Deals with "í gær" being categorized as ADVP, not PP
            pp = "í"
            whole = "gær"
            head = "gær"
            case = "nom"
            pattcase = helpers.getpatterncase(patt)
            if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                head, case = "", ""
                continue
            pattprep = helpers.getpatternprep(patt)
            if pp not in pattprep or pattprep not in pp:
                pp, head, case = "", "", ""
                continue
            break
        elif "advp" in text:
            for word in current.findall(".//WORD"):
                mytag = treeops.tag(word)
                if "a" in mytag[0]:
                    prep.append(treeops.text(word))
                    continue
            nowcurrent = current
            while nowcurrent.getnext() is not None:
                nowcurrent = nowcurrent.getnext()
                nowtext = treeops.text(nowcurrent)
                nowtext = nowtext.replace("[", "")
                if nowtext in "np" or nowtext in "nps" or "timex" in nowtext:  #Want to treat as a pp phrase
                    for word in nowcurrent.findall(".//WORD"):
                        parts.append(treeops.lemma(word))
                        if not head:
                            if treeops.isnoun(word):
                                head = treeops.lemma(word)
                                case = treeops.headcase(treeops.tag(word))
                    continue
                else:
                    break
            if not parts or not prep:  #Shouldn't be treated as pp phrase
                head, case, parts, prep = "", "", [], []
                continue
            pattcase = helpers.getpatterncase(patt)
            if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                head, case, parts, prep = "", "", [], []
                continue
            pp = " ".join(prep)
            pattprep = helpers.getpatternprep(patt)
            if pp not in pattprep or pattprep not in pp:
                pp, head, case, parts, prep = "", "", "", [], []
                continue
            whole = " ".join(parts)
            break
        elif "qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text:
            continue
        elif "pp" in text:
            for word in current.findall(".//WORD"):
                mytag = treeops.tag(word)
                if "a" in mytag[0]:
                    prep.append(treeops.text(word))
                    continue
                else:
                    parts.append(treeops.lemma(word))
                    if not head:
                        if treeops.isnoun(word):
                            head = treeops.lemma(word)
                            case = treeops.headcase(treeops.tag(word))
            if not parts or not prep:  #Weird pp phrase, continue search
                head, case, parts, prep = "", "", [], []
                continue
            pattcase = helpers.getpatterncase(patt)
            if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                head, case, parts, prep = "", "", [], []
                continue
            pp = " ".join(prep)
            pattprep = helpers.getpatternprep(patt)
            if pp not in pattprep or pattprep not in pp:  #wrong preposition phrase
                pp, head, case, parts, prep = "", "", "", [], []
                continue
            if current.getnext(
            ) is not None and not head:  # If I haven't found the head yet
                mynext = current.getnext()
                mytext = treeops.text(mynext)
                if "np" in mytext:
                    for word in mynext.findall(".//WORD"):
                        parts.append(treeops.lemma(word))
                        if treeops.isnoun(word):
                            head = treeops.lemma(word)
                            case = treeops.headcase(treeops.tag(word))
                    pp = " ".join(prep)
                    whole = " ".join(parts)
                    break
                else:
                    break
            if not head:
                return "", ""
            pp = " ".join(prep)
            whole = " ".join(parts)
            break
        else:
            current = parent
            if current.getprevious() is not None:
                current = current.getprevious()
                text = treeops.text(current)
                if "scp" in text:
                    if current.getprevious() is not None:
                        current = current.getprevious()
                        text = treeops.text(current)
                        if "pp" in text:
                            for word in current.findall(".//WORD"):
                                mytag = treeops.tag(word)
                                if "a" in mytag[0]:
                                    prep.append(treeops.text(word))
                                    continue
                                else:
                                    parts.append(treeops.lemma(word))
                                    if not head:
                                        if treeops.isnoun(word):
                                            head = treeops.lemma(word)
                                            case = treeops.headcase(
                                                treeops.tag(word))
                            if not parts or not prep:
                                return "", ""
                            pattcase = helpers.getpatterncase(patt)
                            if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                                return "", ""
                            pp = " ".join(prep)
                            pattprep = helpers.getpatternprep(patt)
                            if pp not in pattprep or pattprep not in pp:
                                return "", ""
                            if current.getnext(
                            ) is not None and not head:  # If I haven't found the head yet
                                mynext = current.getnext()
                                mytext = treeops.text(mynext)
                                if "np" in text:
                                    for word in current.findall(".//WORD"):
                                        parts.append(treeops.lemma(word))
                                        if treeops.isnoun(word):
                                            head = treeops.lemma(word)
                                            case = treeops.headcase(
                                                treeops.tag(word))
                                    break
                                else:
                                    break
                            whole = " ".join(parts)
                            break
                        else:
                            break
                    else:
                        break
                else:
                    break
            else:
                break
    if not head:
        current = parent
        while current.getprevious() is not None:
            current = current.getprevious()
            text = treeops.text(current)
            text = text.replace("[", "")
            if treeops.yester(current):
                pp = "í"
                whole = "gær"
                head = "gær"
                case = "nom"
                pattcase = helpers.getpatterncase(patt)
                if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                    head, case = "", ""
                    continue
                pattprep = helpers.getpatternprep(patt)
                if pp not in pattprep or pattprep not in pp:
                    pp, head, case = "", "", ""
                    continue
                break
            elif "advp" in text:
                for word in current.findall(".//WORD"):
                    mytag = treeops.tag(word)
                    if "a" in mytag[0]:
                        prep.append(treeops.text(word))
                        continue
                nowcurrent = current
                while nowcurrent.getnext() is not None:
                    nowcurrent = nowcurrent.getnext()
                    nowtext = treeops.text(nowcurrent)
                    nowtext = nowtext.replace("[", "")
                    if nowtext in "np" or nowtext in "nps" or "timex" in nowtext:  #Want to treat as a pp phrase
                        for word in nowcurrent.findall(".//WORD"):
                            parts.append(treeops.lemma(word))
                            if not head:
                                if treeops.isnoun(word):
                                    head = treeops.lemma(word)
                                    case = treeops.headcase(treeops.tag(word))
                        continue
                    else:
                        break
                if not parts or not prep:  #Shouldn't be treated as pp phrase
                    head, case, parts, prep = "", "", [], []
                    continue
                pattcase = helpers.getpatterncase(patt)
                if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                    head, case, parts, prep = "", "", [], []
                    continue
                pp = " ".join(prep)
                pattprep = helpers.getpatternprep(patt)
                if pp not in pattprep or pattprep not in pp:
                    pp, head, case, parts, prep = "", "", "", [], []
                    continue
                whole = " ".join(parts)
                break
            elif "qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text:
                continue
            elif "pp" in text:
                for word in current.findall(".//WORD"):
                    mytag = treeops.tag(word)
                    if "a" in mytag[0]:
                        prep.append(treeops.text(word))
                        continue
                    else:
                        parts.append(treeops.lemma(word))
                        if not head:
                            if treeops.isnoun(word):
                                head = treeops.lemma(word)
                                case = treeops.headcase(treeops.tag(word))
                if not parts or not prep:  #Weird pp phrase, continue search
                    head, case, prep = "", "", []
                    continue
                pattcase = helpers.getpatterncase(patt)
                if pattcase not in case and case not in pattcase:  #Wrong pp phrase
                    head, case, parts, prep = "", "", [], []
                    continue
                pp = " ".join(prep)
                pattprep = helpers.getpatternprep(patt)
                if pp not in pattprep or pattprep not in pp:  #wrong preposition phrase
                    pp, head, case, parts, prep = "", "", "", [], []
                    continue
                whole = " ".join(parts)
                break
    if not head:
        return "", ""
    return whole, head
Пример #6
0
def getyestertrigger(node, root):
    prep = "í"
    trigger, triggertype = "", ""
    parent = treeops.getparentphrase(node, root)
    current = parent
    while current.getprevious() is not None:
        current = current.getprevious()
        text = treeops.text(current)
        if "np-qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text:
            continue
        elif "np" in text:
            triggertype = "noun pp|" + prep
            trigger = treeops.gettriggernoun(current)
            break
        elif "vp" in text:
            if "vpp-comp" in text:
                triggertype = "passive pp|" + prep
                trigger = treeops.gettriggerverb(current)
                break
            elif "vpi" in text or "vps" in text or "vpg" in text:
                triggertype = "active pp|" + prep
                trigger = treeops.gettriggerverb(current)
                break
            else:  # This verb might be the only one, maybe there's something else to check
                triggertype = "active pp|" + prep
                trigger = treeops.gettriggerverb(current)
                loopcurrent = current
                while loopcurrent.getprevious() is not None:
                    loopcurrent = loopcurrent.getprevious()
                    ltext = treeops.text(loopcurrent)
                    if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                        continue
                    elif "vpp-comp" in ltext:
                        triggertype = "passive pp|" + prep
                        trigger = treeops.gettriggerverb(loopcurrent)
                        break
                    elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                        triggertype = "active pp|" + prep
                        trigger = treeops.gettriggerverb(loopcurrent)
                        break
                    elif "pp" in ltext:
                        continue
                    else:
                        break  #this verb is the only one we have.
                    break
                break
        elif "pp" in text:
            continue
        else:
            break  #nothing found here
    if not trigger:
        current = parent
        while current.getnext() is not None:
            current = current.getnext()
            text = treeops.text(current)
            if "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "np-timex" in text:
                continue
            elif "np" in text:
                triggertype = "noun pp|" + prep
                trigger = treeops.gettriggernoun(current)
                break
            elif "vp" in text:
                if "vpp-comp" in text:
                    triggertype = "passive pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    break
                elif "vpi" in text or "vps" in text or "vpg" in text:
                    triggertype = "active pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    break
                else:
                    triggertype = "active pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    loopcurrent = current
                    while loopcurrent.getnext() is not None:
                        loopcurrent = loopcurrent.getnext()
                        ltext = treeops.text(loopcurrent)
                        if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                            continue
                        elif "vpp-comp" in ltext:
                            triggertype = "passive pp|" + prep
                            trigger = treeops.gettriggerverb(loopcurrent)
                            break
                        elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                            triggertype = "active pp|" + prep
                            trigger = treeops.gettriggerverb(loopcurrent)
                            break
                        elif "pp" in ltext:
                            continue
                        else:
                            break
                    break
            elif "pp" in text:
                continue
            else:
                break
    if not trigger:
        return "", ""
    return trigger, triggertype
Пример #7
0
def getpreptrigger(node, root):
    parts = []  #To account for multi word prepositions
    trigger, triggertype = "", ""
    parent = treeops.getparentphrase(node, root)
    for word in parent.findall(".//WORD"):
        newword = treeops.text(word)
        mytag = treeops.tag(word)
        if "a" in mytag[0]:  #Found preposition
            parts.append(newword)
    if not parts:  #No preposition found, have unmarked np to find correct trigger for.
        if parent.getprevious() is not None:
            previousnode = parent.getprevious()
            ptext = treeops.text(previousnode)
            if "pp" in ptext or "advp" in ptext:
                found = False  #Checking if I find a noun in the phrase
                for aword in previousnode.findall(".//WORD"):
                    newword = treeops.text(aword)
                    mytag = treeops.tag(aword)
                    if "a" in mytag[0]:  #Found preposition
                        parts.append(newword)
                    elif "n" in mytag[0]:  #Only applies to PP phrases
                        found = True
                if found:
                    return "", ""
            elif "iobj" in ptext:
                trigger, triggertype = getiobjtrigger(previousnode, root)
                return trigger, triggertype
            elif "obj" in ptext:
                trigger, triggertype = getobjtrigger(previousnode, root)
                return trigger, triggertype
            elif "subj" in ptext:
                trigger, triggertype = getsubjtrigger(previousnode, root)
                return trigger, triggertype
            elif "ap" in ptext:
                if previousnode.getprevious() is not None:
                    moreprevious = previousnode.getprevious()
                    mtext = treeops.text(moreprevious)
                    found = False
                    if not "np" in mtext:
                        return "", ""
                    else:
                        for every in moreprevious.findall(".//WORD"):
                            etag = treeops.tag(every)
                            if "a" in etag[0]:
                                getpreptrigger(moreprevious, root)
                            elif "n" in etag[0]:
                                found = True
                            else:
                                continue
                        if not found:  #No noun in NP, can add ap and unmarked np to it
                            if "subj" in mtext:
                                trigger, triggertype = getsubjtrigger(
                                    moreprevious, root)
                                return trigger, triggertype
                            elif "iobj" in mtext:
                                trigger, triggertype = getiobjtrigger(
                                    moreprevious, root)
                                return trigger, triggertype
                            elif "obj" in mtext:
                                trigger, triggertype = getobjtrigger(
                                    moreprevious, root)
                                return trigger, triggertype
                        else:
                            return "", ""
                else:
                    return "", ""
            else:
                return "", ""
        else:
            return "", ""
    if len(parts) == 1:
        prep = parts[0]
    else:
        prep = " ".join(parts)
    # Getting the trigger
    current = parent
    while current.getprevious() is not None:
        current = current.getprevious()
        text = treeops.text(current)
        if "np-qual" in text or "timex" in text or "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
            continue
        elif "np" in text:
            triggertype = "noun pp|" + prep
            trigger = treeops.gettriggernoun(current)
            break
        elif "vp" in text:
            if "vpp-comp" in text:
                triggertype = "passive pp|" + prep
                trigger = treeops.gettriggerverb(current)
                break
            elif "vpi" in text or "vps" in text or "vpg" in text:
                triggertype = "active pp|" + prep
                trigger = treeops.gettriggerverb(current)
                break
            else:  # This verb might be the only one, maybe there's something else to check
                triggertype = "active pp|" + prep
                trigger = treeops.gettriggerverb(current)
                loopcurrent = current
                while loopcurrent.getprevious() is not None:
                    loopcurrent = loopcurrent.getprevious()
                    ltext = treeops.text(loopcurrent)
                    if "advp" in ltext or "np-qual" in ltext or "timex" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps":
                        continue
                    elif "vpp-comp" in ltext:
                        triggertype = "passive pp|" + prep
                        trigger = treeops.gettriggerverb(loopcurrent)
                        break
                    elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                        triggertype = "active pp|" + prep
                        trigger = treeops.gettriggerverb(loopcurrent)
                        break
                    elif "pp" in ltext:
                        continue
                    else:
                        break  #this verb is the only one we have.
                    break
                break
        elif "pp" in text:
            continue
        else:
            break  #nothing found here
    if not trigger:  # Looking after the PP
        current = parent
        while current.getnext() is not None:
            current = current.getnext()
            text = treeops.text(current)
            if "np-qual" in text or "nps-qual" in text or "timex" in text or "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":  #Change: advp and unmarked NPs added
                continue
            elif "np" in text:
                triggertype = "noun pp|" + prep
                trigger = treeops.gettriggernoun(current)
                break
            elif "vp" in text:
                if "vpp-comp" in text:
                    triggertype = "passive pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    break
                elif "vpi" in text or "vps" in text or "vpg" in text:
                    triggertype = "active pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    break
                else:
                    triggertype = "active pp|" + prep
                    trigger = treeops.gettriggerverb(current)
                    loopcurrent = current
                    while loopcurrent.getnext() is not None:
                        loopcurrent = loopcurrent.getnext()
                        ltext = treeops.text(loopcurrent)
                        if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                            continue
                        elif "vpp-comp" in ltext:
                            triggertype = "passive pp|" + prep
                            trigger = treeops.gettriggerverb(loopcurrent)
                            break
                        elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                            triggertype = "active pp|" + prep
                            trigger = treeops.gettriggerverb(loopcurrent)
                            break
                        elif "pp" in ltext:
                            continue
                        else:
                            break
                    break
            elif "scp" in text:
                newnext = current.getnext()
                newtext = treeops.text(newnext)
                if "subj" in newtext:
                    newnext = newnext.getnext()
                    newtext = treeops.text(newnext)
                if "vp" in newtext:
                    if "vpp-comp" in newtext:
                        triggertype = "passive pp|" + prep
                        trigger = treeops.gettriggerverb(current)
                        break
                    elif "vpi" in newtext or "vps" in newtext or "vpg" in newtext:
                        triggertype = "active pp|" + prep
                        trigger = treeops.gettriggerverb(current)
                        break
                    else:
                        triggertype = "active pp|" + prep
                        trigger = treeops.gettriggerverb(current)
                        loopcurrent = current
                        while loopcurrent.getnext() is not None:
                            loopcurrent = loopcurrent.getnext()
                            ltext = treeops.text(loopcurrent)
                            if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                                continue
                            elif "vpp-comp" in ltext:
                                triggertype = "passive pp|" + prep
                                trigger = treeops.gettriggerverb(loopcurrent)
                                break
                            elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                                triggertype = "active pp|" + prep
                                trigger = treeops.gettriggerverb(loopcurrent)
                                break
                            elif "pp" in ltext:
                                continue
                            else:
                                break
            elif "pp" in text:
                continue
            else:
                break
    if not trigger:
        return "", ""
    return trigger, triggertype
Пример #8
0
def getcomptrigger(node, root):
    #The flag points to the verb, so no use. The -COMP flag comes on the second NP, so the NP-SUBJ should always be in front.
    current = node
    trigger = ""
    triggertype = "compnoun"
    while current.getprevious() is not None:
        current = current.getprevious()
        text = treeops.text(current)
        if "np-subj" in text:
            trigger = treeops.gettriggernoun(current)
            break
        elif "ap-comp" in text:
            trigger = treeops.gettriggeradj(current)
            break
    if not trigger:  # Dealing with AP-COMP acting as subj
        triggertype = "active"
        while current.getnext() is not None:
            current = current.getnext()
            text = treeops.text(current)
            if "advp" in text or "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
                continue
            elif "scp" in text:
                newcurrent = current.getnext()
                newtext = treeops.text(newcurrent)
                if "advp" in newtext or newtext in "np" or newtext in "nps" or newtext in "ap" or newtext in "aps" or "np-qual" in newtext or "timex" in text:
                    newcurrent = newcurrent.getnext()
                    newtext = treeops.text(newcurrent)
                elif "vp" in newtext:
                    myverb = treeops.gettriggerverb(newcurrent)
                    if "vpb" in newtext:
                        loopcurrentnow = current
                        while loopcurrentnow.getnext() is not None:
                            loopcurrentnow = loopcurrentnow.getnext()
                            ltext = treeops.text(loopcurrentnow)
                            if "advp" in text or "pp" in text:
                                continue
                            elif "vpp-comp" in ltext:
                                verbtype = "passive"
                                myverb = treeops.gettriggerverb(loopcurrentnow)
                                break
                            else:
                                break
                    else:
                        loopcurrentnow = current
                        while loopcurrentnow.getnext() is not None:
                            loopcurrentnow = loopcurrentnow.getnext()
                            ltext = treeops.text(loopcurrentnow)
                            if "advp" in ltext:
                                continue
                            elif "vpi" in ltext:
                                verbtype = "active"
                                myverb = treeops.gettriggerverb(loopcurrentnow)
                                break
                            else:
                                break
                elif "pp" in newtext:
                    newcurrent = newcurrent.getnext()
                    newtext = treeops.text(newcurrent)
                else:
                    break
            elif "vp" in text:
                myverb = treeops.gettriggerverb(
                    current)  #the lemma of the main verb
                # I assume vpp-comp always comes with a vpb in the order vpb, vpp-comp.
                if "vpb" in text:
                    loopcurrent = current
                    while loopcurrent.getnext() is not None:
                        loopcurrent = loopcurrent.getnext()
                        ltext = treeops.text(loopcurrent)
                        if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                            continue
                        elif "vpp-comp" in ltext:
                            verbtype = "passive"
                            myverb = treeops.gettriggerverb(loopcurrent)
                            return myverb, verbtype
                        elif "pp" in ltext:  #Need to check last because "pp" is in "vpp-comp" !
                            continue
                        else:
                            break
                continue
            elif "pp" in text:  #Need to check last because "pp" is in "vpp-comp" !
                continue
            else:
                break
    return trigger, triggertype
Пример #9
0
def getsubjtrigger(node, root):
    verbtype, myverb = "active", ""
    current = node
    if ">" in treeops.text(node):
        while current.getnext() is not None:
            current = current.getnext()
            text = treeops.text(current)
            if "advp" in text or "np-qual" in text or "timex" in text or "subj" in text or text in "np" or text in "nps" or text in "ap" or text in "aps":
                continue
            elif "scp" in text:
                newcurrent = current.getnext()
                newtext = treeops.text(newcurrent)
                if "advp" in newtext or newtext in "np" or newtext in "nps" or newtext in "ap" or newtext in "aps" or "np-qual" in newtext or "timex" in text:
                    newcurrent = newcurrent.getnext()
                    newtext = treeops.text(newcurrent)
                elif "vp" in newtext:
                    myverb = treeops.gettriggerverb(newcurrent)
                    if "vpb" in newtext:
                        loopcurrentnow = current
                        while loopcurrentnow.getnext() is not None:
                            loopcurrentnow = loopcurrentnow.getnext()
                            ltext = treeops.text(loopcurrentnow)
                            if "advp" in text or "pp" in text:
                                continue
                            elif "vpp-comp" in ltext:
                                verbtype = "passive"
                                myverb = treeops.gettriggerverb(loopcurrentnow)
                                break
                            else:
                                break
                    else:
                        loopcurrentnow = current
                        while loopcurrentnow.getnext() is not None:
                            loopcurrentnow = loopcurrentnow.getnext()
                            ltext = treeops.text(loopcurrentnow)
                            if "advp" in ltext:
                                continue
                            elif "vpi" in ltext:
                                verbtype = "active"
                                myverb = treeops.gettriggerverb(loopcurrentnow)
                                break
                            else:
                                break
                elif "pp" in newtext:
                    newcurrent = newcurrent.getnext()
                    newtext = treeops.text(newcurrent)
                else:
                    break
            elif "vp" in text:
                myverb = treeops.gettriggerverb(
                    current)  #the lemma of the main verb
                # I assume vpp-comp always comes with a vpb in the order vpb, vpp-comp.
                if "vpb" in text:
                    loopcurrent = current
                    while loopcurrent.getnext() is not None:
                        loopcurrent = loopcurrent.getnext()
                        ltext = treeops.text(loopcurrent)
                        if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext:
                            continue
                        elif "vpp-comp" in ltext:
                            verbtype = "passive"
                            myverb = treeops.gettriggerverb(loopcurrent)
                            return myverb, verbtype
                        elif "pp" in ltext:  #Need to check last because "pp" is in "vpp-comp" !
                            continue
                        else:
                            break
                continue
            elif "pp" in text:  #Need to check last because "pp" is in "vpp-comp" !
                continue
            else:
                break
    elif "<" in treeops.text(node):
        while current.getprevious() is not None:
            current = current.getprevious()
            text = treeops.text(current)
            if "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "np-qual" in text or "timex" in text:
                continue
            elif "vp" in text:
                myverb = treeops.gettriggerverb(current)
                if "vpp-comp" in text:
                    verbtype = "passive"  # This is the only verb phrase I need.
                loopcurrent = node  # Checking if more to verb phrase
                while loopcurrent.getnext() is not None:
                    loopcurrent = loopcurrent.getnext()
                    ltext = treeops.text(loopcurrent)
                    if "advp" in ltext or "np-qual" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "timex" in ltext:
                        continue
                    elif "vpp" in ltext:
                        myverb = treeops.gettriggerverb(loopcurrent)
                        verbtype = "passive"
                    elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext:
                        myverb = treeops.gettriggerverb(loopcurrent)
                        continue  # Because might be many in a row, want the last one
                    elif "pp" in ltext:
                        continue
                    else:
                        break  # Nothing to be found, verb has non-auxiliary meaning
            elif "pp" in text:
                continue
            else:
                break
    else:  # If no flag to point to trigger
        # First case: The phrase isn't really subj, it's the obj from active when the iobj is made subject of the passive
        # But I will call it a subj, but make sure I can use two subjs when merging and making active...
        # I can't see the difference between obj and iobj in the passive when acting as subj. Will have to deal with it anyway.
        # Second case: The object in an expletive sentence (leppsetning). Should look directly before.
        # Third case: Two or more NP-SUBJ in a row, only flag on one of them.
        if current.getprevious() is not None:
            before = current.getprevious()
            text = treeops.text(before)
            if "vpp-comp" in text:
                verbtype = "passive"
                myverb = treeops.gettriggerverb(before)
            elif "vpi" in text:
                verbtype = "active"
                myverb = treeops.gettriggerverb(before)
            elif "subj<" in text or "subj>" in text or "comp<" in text or "comp>" in text:
                myverb, verbtype = getsubjtrigger(before, root)
        current = node
        if current.getnext() is not None and not myverb:
            after = current.getnext()
            text = treeops.text(after)
            if "subj<" in text or "subj>" in text or "comp<" in text or "comp>" in text:
                myverb, verbtype = getsubjtrigger(after, root)
    if not myverb:
        return "", ""
    return myverb, verbtype