Пример #1
0
def findNP(tree, sentNum, ngdata, mention_list, mentionID):
    np_rels = ['su', 'app', 'body', 'sat', 'predc', 'obj1', 'cnj']
    for mention_node in tree.findall(".//node[@cat='np']"):
        len_ment = int(mention_node.attrib['end']) - int(
            mention_node.attrib['begin'])
        if mention_node.attrib['rel'] in np_rels and len_ment < 7:
            name = 'np_' + mention_node.attrib['rel']
            new_mention = make_mention(
                mention_node.attrib['begin'],
                mention_node.attrib['end'],
                tree,
                name,
                sentNum,
                next(mentionID),
                ngdata,
            )
            subtrees = tree.findall('.//node[@begin="' +
                                    str(new_mention.begin) + '"][@end="' +
                                    str(new_mention.end) + '"]')
            firstChild = subtrees[0].find('.//node[@word]')
            if (('pron' in firstChild.attrib
                 and firstChild.attrib['pron'] == 'true')
                    or ('buiging' in firstChild.attrib
                        and firstChild.attrib['buiging'] == 'zonder')
                    or ('pos' in firstChild.attrib
                        and firstChild.attrib['pos'] == 'num')
                    or ('index' in firstChild.attrib)
                    or ('sc' in firstChild.attrib)):
                pass
            elif ',' in new_mention.tokenList:
                handleComma(new_mention, tree, sentNum, ngdata, mention_list,
                            mentionID)
            elif allWordsHaveAlpha(new_mention.tokenList):
                add_mention(mention_list, new_mention)
Пример #2
0
def findPron(tree, sentNum, ngdata, mention_list, mentionID):
    for mention_node in tree.findall(".//node[@pdtype='pron']") + tree.findall(
            ".//node[@frame='determiner(pron)']"):
        new_mention = make_mention(
            mention_node.attrib['begin'],
            mention_node.attrib['end'],
            tree,
            'Pronoun',
            sentNum,
            next(mentionID),
            ngdata,
        )
        add_mention(mention_list, new_mention)
Пример #3
0
def findObj(tree, sentNum, ngdata, mention_list, mentionID):
    for mention_node in tree.findall(
            ".//node[@word][@ntype='soort'][@rel='obj1']"):
        new_mention = make_mention(
            mention_node.attrib['begin'],
            mention_node.attrib['end'],
            tree,
            'noun',
            sentNum,
            next(mentionID),
            ngdata,
        )
        add_mention(mention_list, new_mention)
Пример #4
0
def findSubj(tree, sentNum, ngdata, mention_list, mentionID):
    for mention_node in tree.findall(".//node[@rel='su']"):
        if 'cat' not in mention_node.attrib:
            new_mention = make_mention(
                mention_node.attrib['begin'],
                mention_node.attrib['end'],
                tree,
                'su',
                sentNum,
                next(mentionID),
                ngdata,
            )
            add_mention(mention_list, new_mention)
Пример #5
0
def handleComma(new_mention, tree, sentNum, ngdata, mention_list, mentionID):
    if new_mention.tokenList[1] == ',' and len(new_mention.tokenList) > 3:
        add_mention(mention_list, new_mention)
    elif (len(new_mention.tokenList) == 3 and new_mention.tokenList[1] == ','
          and 'neclass' in new_mention.tokenAttribs[0]
          and new_mention.tokenAttribs[0]['neclass'] == 'LOC'):
        add_mention(mention_list, new_mention)
        new_mention1 = make_mention(
            new_mention.tokenList.index(',') + new_mention.begin + 1,
            new_mention.end,
            tree,
            'np_comma',
            sentNum,
            next(mentionID),
            ngdata,
        )
        add_mention(mention_list, new_mention)
    else:
        new_mention1 = make_mention(
            new_mention.begin,
            new_mention.begin + new_mention.tokenList.index(','),
            tree,
            'np_comma',
            sentNum,
            next(mentionID),
            ngdata,
        )
        new_mention2 = make_mention(
            new_mention.tokenList.index(',') + new_mention.begin + 1,
            new_mention.end,
            tree,
            'np_comma',
            sentNum,
            next(mentionID),
            ngdata,
        )
        add_mention(mention_list, new_mention1)
        add_mention(mention_list, new_mention2)
Пример #6
0
def findMWU(tree, sentNum, ngdata, mention_list, mentionID):
    mwu_rels = ['obj1', 'su', 'cnj', 'hd']  # hd 14/65
    for mention_node in tree.findall(".//node[@cat='mwu']"):
        if mention_node.attrib['rel'] in mwu_rels:
            name = 'mwu_' + mention_node.attrib['rel']
            new_mention = make_mention(
                mention_node.attrib['begin'],
                mention_node.attrib['end'],
                tree,
                name,
                sentNum,
                next(mentionID),
                ngdata,
            )
            add_mention(mention_list, new_mention)
Пример #7
0
def findMWU2(tree, sentNum, ngdata, mention_list, mentionID):
    mwu_rels = ['obj1', 'su', 'cnj', 'hd']  # hd 14/65
    for mention_node in tree.findall(".//node[@cat='mwu']"):
        if mention_node.attrib['rel'] in mwu_rels:
            prevDet = tree.find(".//node[@pos='det'][@end='" +
                                mention_node.attrib['begin'] + "']")
            if prevDet is not None:
                name = 'mwu_' + mention_node.attrib['rel']
                new_mention = make_mention(
                    int(mention_node.attrib['begin']) - 1,
                    mention_node.attrib['end'],
                    tree,
                    name,
                    sentNum,
                    next(mentionID),
                    ngdata,
                )
                add_mention(mention_list, new_mention)
Пример #8
0
def findNP2(tree, sentNum, ngdata, mention_list, mentionID):
    np_rels = ['obj1', 'su', 'app', 'cnj', 'body', 'sat', 'predc']
    for mention_node in tree.findall(".//node[@cat='np']"):
        len_ment = (int(mention_node.attrib['end']) -
                    int(mention_node.attrib['begin']))
        if (mention_node.attrib['rel'] in np_rels
                and len_ment > 4):  # and len_ment < 10:
            for die in tree.findall(
                    ".//node[@word='die']"):  # @word='die' werkt beter
                if (int(die.attrib['begin']) > int(
                        mention_node.attrib['begin'])
                        and int(die.attrib['end']) < int(
                            mention_node.attrib['end'])):
                    new_mention = make_mention(mention_node.attrib['begin'],
                                               die.attrib['begin'], tree,
                                               'die_np', sentNum,
                                               next(mentionID), ngdata)
                    if allWordsHaveAlpha(new_mention.tokenList):
                        add_mention(mention_list, new_mention)