示例#1
0
def genVerbnetFeatures(word, pos, features):
    if pos != 'V':
        return
    vids=vn.classids(word)
    for vid in vids:
        v=vn.vnclass(vid)
        types=[t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')]
        for type in types:
            fstr = "THEMETYPE_"+type
            features.append(fstr)
    def getThematicRoles(self, verb):
        thematicRoles = []
        for verbClass in self.getClasses(verb):
            for themrole in vn.vnclass(verbClass).findall(
                    'THEMROLES/THEMROLE'):
                role = themrole.attrib['type']
                for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
                    role += '[%(Value)s%(type)s]' % selrestr.attrib
                thematicRoles.append(role)

        return thematicRoles
示例#3
0
def is_transitive(lemma):
    try:
        cids = verbnet.classids(lemma)
        frames = verbnet.frames(verbnet.vnclass(cids[0]))
        ret = False
        # for frame in frames:
        #     print "primary:", frame['description']['primary']
        #     ret = ret or "Transitive" in frame['description']['primary']

        ret = "Transitive" in frames[0]['description']['primary']
        return ret
    except:
        return False
    def test_remove_before_verb(self):
        """
        Whenever we detect that the sentence starts with a verb, we'll remove it from
        the VerbNet syntax
        """
        from nltk.corpus import verbnet

        buy_first_classid = verbnet.classids('buy')[0]
        buy_first_syntax = verbnet.vnclass(buy_first_classid).find('FRAMES/FRAME/SYNTAX')

        altered_syntax = remove_before_v(buy_first_syntax)
        wanted_syntax = ET.fromstring("""<SYNTAX><VERB /><NP value="Theme" /></SYNTAX>""")

        self.assertEqual(syntax_to_str(altered_syntax), syntax_to_str(wanted_syntax))
示例#5
0
def getFrames(verb, frames):
    for classid in verbnet.classids(verb):
        vnclass = verbnet.pprint(verbnet.vnclass(classid))
        members = re.compile("\s+").split(membersPattern.search(vnclass).group("members"))
        for i in framePattern.finditer(vnclass):
            frame = mergeintrans(mergeNPs("%s"%(i.group("frame"))))
            frame = scomp.sub("SCOMP", frame)
            frame = german.sub("VERB", frame)
            frame = shifted.sub("NP VERB NP", frame)
            frame = finalPPs.sub("", frame)
            if frame in frames:
                frames[frame] += members
            else:
                frames[frame] = members
    return frames
示例#6
0
def GetVerbnetRestrictions(vnclass):
  role_restrictions = {}

  while True:
    for role in vnclass.findall('THEMROLES/THEMROLE'):
      restrictions = role.find('SELRESTRS')
      if restrictions:
        restriction_set = set()
        for restriction in restrictions.findall('SELRESTR'):
          predicate = restriction.attrib
          restriction_set.add((predicate['Value'], predicate['type']))

        total = (restrictions.get('logic', 'and'), list(restriction_set))
        role_restrictions[role.attrib['type']] = total

    if vnclass.tag == 'VNCLASS':
      break
    else:
      parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
      vnclass = verbnet.vnclass(parent_class)

  return role_restrictions
示例#7
0
文件: nlputils.py 项目: ping543f/KGen
    def get_verbnet_args(verb, verbose=False):
        lemmatizer = WordNetLemmatizer()
        lemmatized_verb = lemmatizer.lemmatize(verb.lower(), 'v')

        classids = verbnet.classids(lemma=lemmatized_verb)
        if verbose:
            print('Class IDs for "{}": {}'.format(lemmatized_verb, classids))

        if len(classids) < 1:
            if verbose:
                print(
                    'No entry found on verbnet for "{}". Attempting WordNet synsets!'
                    .format(lemmatized_verb))

            wn_synsets = wordnet.synsets(lemmatized_verb)
            for synset in wn_synsets:
                if len(synset.lemmas()) < 1:
                    continue

                candidate = str(synset.lemmas()[0].name())
                classids = verbnet.classids(lemma=candidate)
                if verbose:
                    print('Class IDs for "{}": {}'.format(candidate, classids))

                if len(classids) > 0:
                    break

            if len(classids) < 1:
                if verbose:
                    print(
                        'Unable to find entries on verbnet for neither of the synsets... Will go recursive now (which is not a good thing!)'
                    )

                for synset in wn_synsets:
                    if len(synset.lemmas()) < 1:
                        continue

                    candidate = str(synset.hypernyms()[0].lemmas()[0].name())
                    return NLPUtils.get_verbnet_args(candidate,
                                                     verbose=verbose)

                if verbose:
                    print('Exhausted attempts... returning an empty list.')
                return []

        for id in classids:
            class_number = id[id.find('-') + 1:]
            try:
                v = verbnet.vnclass(class_number)
                roles = [
                    t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE')
                ]
                pass
            except ValueError:
                print('VN class number not found: {}'.format(class_number))

                # Will handle these both below
                v = [None]
                roles = []
                pass

            while len(roles) < 1 and len(v) > 0:
                fallback_class_number = class_number[:class_number.rfind('-')]
                if verbose:
                    print('No roles found for class {}, falling back to {}.'.
                          format(class_number, fallback_class_number))
                class_number = fallback_class_number

                try:
                    v = verbnet.vnclass(class_number)
                    roles = [
                        t.attrib['type']
                        for t in v.findall('THEMROLES/THEMROLE')
                    ]
                    pass
                except ValueError:
                    # Go on with the loop
                    v = [None]
                    roles = []
                    pass

            if len(roles) > 0:
                if verbose:
                    print('Roles found: {}'.format(roles))

                return roles

        return None
# #     print(i)
# random.shuffle(featuresset)
# classifier = nltk.NaiveBayesClassifier.train(featuresset)
# save_classifier_NBC(classifier)

#-----------------------------------------testing---------------------------------------------------
input = "He need a ride from his home."
verb_list, frames_list = prim_fram(input)
print(frames_list)
print(nltk.pos_tag(nltk.word_tokenize(input)))
print(verb_list)
for r in range(len(verb_list)):
    keys = []
    ids = vb.classids(verb_list[r])
    for i in ids:
        u = vb.vnclass(i)
        for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')]:
            keys.append(j)
        for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE')]:
            keys.append(j)
        for j in [l.attrib['value'] for l in u.findall('FRAMES/FRAME/SEMANTICS/PRED')]:
            keys.append(j)
    f = open("tmp/features_verbs.txt","r")
    word_features = []

    for l,i in enumerate(f):
        word_features.append(i)
    f.close()

    def find_features(document, input):
        words = set(document)
示例#9
0
def GetVerbnetRestrictions(vnclass):
    role_restrictions = {}

    while True:
        for role in vnclass.findall('THEMROLES/THEMROLE'):
            restrictions = role.find('SELRESTRS')
            if restrictions:
                restriction_set = set()
                for restriction in restrictions.findall('SELRESTR'):
                    predicate = restriction.attrib
                    restriction_set.add(
                        (predicate['Value'], predicate['type']))

                total = (restrictions.get('logic',
                                          'and'), list(restriction_set))
                role_restrictions[role.attrib['type']] = total

        if vnclass.tag == 'VNCLASS':
            break
        else:
            parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
            vnclass = verbnet.vnclass(parent_class)

    return role_restrictions


vnclasses = verbnet.classids('drink')
v = verbnet.vnclass('39.1-2')
GetVerbnetRestrictions(v)
示例#10
0
from nltk.corpus import verbnet

my_classids = verbnet.classids(lemma='take')
print(my_classids)
# my_lemmas = verbnet.lemmas(my_classids)
# my_longid = longid(my_shortid)
# my_shortid = shortid(my_longid)
for i in my_classids:
    my_vnclass = verbnet.vnclass(i)
    # my_wordnetids = verbnet.wordnetids(mi)
    # Human-friendly methods
    verbnet.pprint(my_vnclass)
    # vnframe = my_vnclass.findall('FRAMES/FRAME')
    # print(verbnet.pprint_description(vnframe))
    # print(verbnet.pprint_frames(vnframe))
    print(verbnet.pprint_members(my_vnclass))
    # print(verbnet.pprint_semantics(vnframe))
    print(verbnet.pprint_subclasses(my_vnclass))
    # print(verbnet.pprint_syntax(vnframe))
    # x = verbnet.pprint_themroles(my_vnclass)
    print(verbnet.pprint_themroles(my_vnclass))
    '''for j in x.split("]"):
        print(j)'''
示例#11
0
def process_srl(srl_output, actual_data, just_phrases):
    porter_stemmer = PorterStemmer()
    wn_lem = WordNetLemmatizer()
    file_open = open (srl_output, "r")
    output    = file_open.read()
    srl_output = output.split("\n================\n")
    srl_list = []
    [srl_list.append(line.strip()) for line in srl_output]

    phrase_sentence = create_vector(just_phrases)

    corpus_data = create_vector(actual_data)
    number = 0
    for line in corpus_data:
        sline       = line.split("\t")
        sense       = sline[2] # figurative or literal
        metaphor    = sline[1] # along the line <- the metaphor itself
        try:
            current_srl = srl_list[number].split("\n") # semantic role labeling of give sentece
        except:
            import pdb; pdb.set_trace()

        #mtokens = metaphor.split(" ")
        mtokens_t = word_tokenize(phrase_sentence[number])
        mtokens_t = [w for w in mtokens_t if not w.decode('utf8') in nlcor.stopwords.words('english')]
        mtokens   = filter(lambda word: word not in ",-'", mtokens_t)
        sane_mt = [mt.decode('utf8') for mt in mtokens]
        pos_mtokens = nltk.pos_tag(sane_mt)
        only_verbs = [tkn[0] for tkn in pos_mtokens if 'VB' in tkn[1]]
        #print "==============================================="
        line_score = 0
        token_count = 1
        number += 1
        #print "phrase tokens: %s" % mtokens_t
        #print "only verbs: %s" % only_verbs

        for mtoken in only_verbs:
            vnclasses = verbnet.classids(mtoken)
            if not vnclasses:
                vnclasses = verbnet.classids(wn_lem.lemmatize(mtoken))
                if not vnclasses:
                    continue
            #print "vnclasses: %s" % vnclasses

            mindex = [index for index, sl in enumerate(current_srl) if porter_stemmer.stem(mtoken) in sl.decode('utf8')]
            if not mindex:
         #       print 0
                continue
            token_count += 1

            class_score = 0
            class_count = 1
            #print '----- %s -----' % mtoken
            for vn in vnclasses:
                v=verbnet.vnclass(vn)
                try:
                    restrictions = GetVerbnetRestrictions(v)
                except:
                    continue

             #   print restrictions
                if restrictions:
                    class_score = check_validity(current_srl, mindex[0], restrictions)
                    class_count += 1
                    #print class_score
                else:
                    #print "No restrictions for %s" % vn
                    pass
            if class_count < 2:
                avg_class_score = class_score / class_count
            else:
                avg_class_score = class_score / (class_count - 1)
            #print '---------------'

            line_score += avg_class_score
            token_count += 1
        if token_count < 2:
            avg_line_score = line_score / token_count
        else:
            avg_line_score = line_score / (token_count - 1)

#        print "%s - %s - %s" % (sline[1], sline[2], line_score)
        print avg_line_score
示例#12
0
from nltk.corpus import wordnet as wn
from itertools import product
from nltk.stem.wordnet import WordNetLemmatizer
from pathlib import Path
import pandas as pd
import os
import nltk
import re
from nltk.corpus import verbnet as vn
from xml.etree import ElementTree

from stanfordcorenlp import StanfordCoreNLP

datapath = Path(__file__).resolve().parents[2]
nlp = StanfordCoreNLP(
    '/home/ruta/master-thesis/tools/stanford-corenlp-full-2018-10-05')

vn_31_2 = ElementTree.tostring(vn.vnclass('escape-51.1'))
示例#13
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from nltk.corpus import verbnet
verbnet.lemmas()[20:25]
verbnet.classids()[:5]
verbnet.classids('accept')
verbnet.vnclass('remove-10.1')  # doctest: +ELLIPSIS
verbnet.vnclass('10.1')  # doctest: +ELLIPSIS
vn_31_2 = verbnet.vnclass('admire-31.2')
for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
    print(themrole.attrib['type'])
    for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
        print('[%(Value)s%(type)s]' % selrestr.attrib)
    print()

print(verbnet.pprint('57'))
示例#14
0
print(rte.fileids())  # doctest: +ELLIPSIS
rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
print(rtepairs)  # doctest: +ELLIPSIS
print(rtepairs[5])
print(rtepairs[5].text)  # doctest: +NORMALIZE_WHITESPACE
print(rtepairs[5].hyp)
print(rtepairs[5].value)
xmltree = rte.xml('rte3_dev.xml')
print(xmltree)  # doctest: +SKIP
print(xmltree[7].findtext('t'))  # doctest: +NORMALIZE_WHITESPACE
# verbnet
# nltk.download('verbnet')
print(verbnet.lemmas()[20:25])
print(verbnet.classids()[:5])
print(verbnet.classids('accept'))
print(verbnet.vnclass('remove-10.1'))  # doctest: +ELLIPSIS
print(verbnet.vnclass('10.1'))  # doctest: +ELLIPSIS
vn_31_2 = verbnet.vnclass('admire-31.2')
for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
    print(themrole.attrib['type'])
    for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
        print('[%(Value)s%(type)s]' % selrestr.attrib)
    print()
print(verbnet.pprint('57'))
# nps_chat
# nltk.download('nps_chat')
print(nltk.corpus.nps_chat.words())
print(nltk.corpus.nps_chat.tagged_words())
print(nltk.corpus.nps_chat.tagged_posts())  # doctest: +NORMALIZE_WHITESPACE
print(nltk.corpus.nps_chat.xml_posts())  # doctest: +ELLIPSIS
posts = nltk.corpus.nps_chat.xml_posts()
示例#15
0
from nltk.corpus import verbnet

def GetVerbnetRestrictions(vnclass):
  role_restrictions = {}

  while True:
    for role in vnclass.findall('THEMROLES/THEMROLE'):
      restrictions = role.find('SELRESTRS')
      if restrictions:
        restriction_set = set()
        for restriction in restrictions.findall('SELRESTR'):
          predicate = restriction.attrib
          restriction_set.add((predicate['Value'], predicate['type']))

        total = (restrictions.get('logic', 'and'), list(restriction_set))
        role_restrictions[role.attrib['type']] = total

    if vnclass.tag == 'VNCLASS':
      break
    else:
      parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
      vnclass = verbnet.vnclass(parent_class)

  return role_restrictions

vnclasses = verbnet.classids('drink')
v=verbnet.vnclass('39.1-2')
GetVerbnetRestrictions(v)