def genVerbnetFeatures(word, pos, features): if pos != 'V': return vids=vn.classids(word) for vid in vids: v=vn.vnclass(vid) types=[t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')] for type in types: fstr = "THEMETYPE_"+type features.append(fstr)
def getThematicRoles(self, verb): thematicRoles = [] for verbClass in self.getClasses(verb): for themrole in vn.vnclass(verbClass).findall( 'THEMROLES/THEMROLE'): role = themrole.attrib['type'] for selrestr in themrole.findall('SELRESTRS/SELRESTR'): role += '[%(Value)s%(type)s]' % selrestr.attrib thematicRoles.append(role) return thematicRoles
def is_transitive(lemma): try: cids = verbnet.classids(lemma) frames = verbnet.frames(verbnet.vnclass(cids[0])) ret = False # for frame in frames: # print "primary:", frame['description']['primary'] # ret = ret or "Transitive" in frame['description']['primary'] ret = "Transitive" in frames[0]['description']['primary'] return ret except: return False
def test_remove_before_verb(self): """ Whenever we detect that the sentence starts with a verb, we'll remove it from the VerbNet syntax """ from nltk.corpus import verbnet buy_first_classid = verbnet.classids('buy')[0] buy_first_syntax = verbnet.vnclass(buy_first_classid).find('FRAMES/FRAME/SYNTAX') altered_syntax = remove_before_v(buy_first_syntax) wanted_syntax = ET.fromstring("""<SYNTAX><VERB /><NP value="Theme" /></SYNTAX>""") self.assertEqual(syntax_to_str(altered_syntax), syntax_to_str(wanted_syntax))
def getFrames(verb, frames): for classid in verbnet.classids(verb): vnclass = verbnet.pprint(verbnet.vnclass(classid)) members = re.compile("\s+").split(membersPattern.search(vnclass).group("members")) for i in framePattern.finditer(vnclass): frame = mergeintrans(mergeNPs("%s"%(i.group("frame")))) frame = scomp.sub("SCOMP", frame) frame = german.sub("VERB", frame) frame = shifted.sub("NP VERB NP", frame) frame = finalPPs.sub("", frame) if frame in frames: frames[frame] += members else: frames[frame] = members return frames
def GetVerbnetRestrictions(vnclass): role_restrictions = {} while True: for role in vnclass.findall('THEMROLES/THEMROLE'): restrictions = role.find('SELRESTRS') if restrictions: restriction_set = set() for restriction in restrictions.findall('SELRESTR'): predicate = restriction.attrib restriction_set.add((predicate['Value'], predicate['type'])) total = (restrictions.get('logic', 'and'), list(restriction_set)) role_restrictions[role.attrib['type']] = total if vnclass.tag == 'VNCLASS': break else: parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0] vnclass = verbnet.vnclass(parent_class) return role_restrictions
def get_verbnet_args(verb, verbose=False): lemmatizer = WordNetLemmatizer() lemmatized_verb = lemmatizer.lemmatize(verb.lower(), 'v') classids = verbnet.classids(lemma=lemmatized_verb) if verbose: print('Class IDs for "{}": {}'.format(lemmatized_verb, classids)) if len(classids) < 1: if verbose: print( 'No entry found on verbnet for "{}". Attempting WordNet synsets!' .format(lemmatized_verb)) wn_synsets = wordnet.synsets(lemmatized_verb) for synset in wn_synsets: if len(synset.lemmas()) < 1: continue candidate = str(synset.lemmas()[0].name()) classids = verbnet.classids(lemma=candidate) if verbose: print('Class IDs for "{}": {}'.format(candidate, classids)) if len(classids) > 0: break if len(classids) < 1: if verbose: print( 'Unable to find entries on verbnet for neither of the synsets... Will go recursive now (which is not a good thing!)' ) for synset in wn_synsets: if len(synset.lemmas()) < 1: continue candidate = str(synset.hypernyms()[0].lemmas()[0].name()) return NLPUtils.get_verbnet_args(candidate, verbose=verbose) if verbose: print('Exhausted attempts... returning an empty list.') return [] for id in classids: class_number = id[id.find('-') + 1:] try: v = verbnet.vnclass(class_number) roles = [ t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE') ] pass except ValueError: print('VN class number not found: {}'.format(class_number)) # Will handle these both below v = [None] roles = [] pass while len(roles) < 1 and len(v) > 0: fallback_class_number = class_number[:class_number.rfind('-')] if verbose: print('No roles found for class {}, falling back to {}.'. format(class_number, fallback_class_number)) class_number = fallback_class_number try: v = verbnet.vnclass(class_number) roles = [ t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE') ] pass except ValueError: # Go on with the loop v = [None] roles = [] pass if len(roles) > 0: if verbose: print('Roles found: {}'.format(roles)) return roles return None
# # print(i) # random.shuffle(featuresset) # classifier = nltk.NaiveBayesClassifier.train(featuresset) # save_classifier_NBC(classifier) #-----------------------------------------testing--------------------------------------------------- input = "He need a ride from his home." verb_list, frames_list = prim_fram(input) print(frames_list) print(nltk.pos_tag(nltk.word_tokenize(input))) print(verb_list) for r in range(len(verb_list)): keys = [] ids = vb.classids(verb_list[r]) for i in ids: u = vb.vnclass(i) for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')]: keys.append(j) for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE')]: keys.append(j) for j in [l.attrib['value'] for l in u.findall('FRAMES/FRAME/SEMANTICS/PRED')]: keys.append(j) f = open("tmp/features_verbs.txt","r") word_features = [] for l,i in enumerate(f): word_features.append(i) f.close() def find_features(document, input): words = set(document)
def GetVerbnetRestrictions(vnclass): role_restrictions = {} while True: for role in vnclass.findall('THEMROLES/THEMROLE'): restrictions = role.find('SELRESTRS') if restrictions: restriction_set = set() for restriction in restrictions.findall('SELRESTR'): predicate = restriction.attrib restriction_set.add( (predicate['Value'], predicate['type'])) total = (restrictions.get('logic', 'and'), list(restriction_set)) role_restrictions[role.attrib['type']] = total if vnclass.tag == 'VNCLASS': break else: parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0] vnclass = verbnet.vnclass(parent_class) return role_restrictions vnclasses = verbnet.classids('drink') v = verbnet.vnclass('39.1-2') GetVerbnetRestrictions(v)
from nltk.corpus import verbnet my_classids = verbnet.classids(lemma='take') print(my_classids) # my_lemmas = verbnet.lemmas(my_classids) # my_longid = longid(my_shortid) # my_shortid = shortid(my_longid) for i in my_classids: my_vnclass = verbnet.vnclass(i) # my_wordnetids = verbnet.wordnetids(mi) # Human-friendly methods verbnet.pprint(my_vnclass) # vnframe = my_vnclass.findall('FRAMES/FRAME') # print(verbnet.pprint_description(vnframe)) # print(verbnet.pprint_frames(vnframe)) print(verbnet.pprint_members(my_vnclass)) # print(verbnet.pprint_semantics(vnframe)) print(verbnet.pprint_subclasses(my_vnclass)) # print(verbnet.pprint_syntax(vnframe)) # x = verbnet.pprint_themroles(my_vnclass) print(verbnet.pprint_themroles(my_vnclass)) '''for j in x.split("]"): print(j)'''
def process_srl(srl_output, actual_data, just_phrases): porter_stemmer = PorterStemmer() wn_lem = WordNetLemmatizer() file_open = open (srl_output, "r") output = file_open.read() srl_output = output.split("\n================\n") srl_list = [] [srl_list.append(line.strip()) for line in srl_output] phrase_sentence = create_vector(just_phrases) corpus_data = create_vector(actual_data) number = 0 for line in corpus_data: sline = line.split("\t") sense = sline[2] # figurative or literal metaphor = sline[1] # along the line <- the metaphor itself try: current_srl = srl_list[number].split("\n") # semantic role labeling of give sentece except: import pdb; pdb.set_trace() #mtokens = metaphor.split(" ") mtokens_t = word_tokenize(phrase_sentence[number]) mtokens_t = [w for w in mtokens_t if not w.decode('utf8') in nlcor.stopwords.words('english')] mtokens = filter(lambda word: word not in ",-'", mtokens_t) sane_mt = [mt.decode('utf8') for mt in mtokens] pos_mtokens = nltk.pos_tag(sane_mt) only_verbs = [tkn[0] for tkn in pos_mtokens if 'VB' in tkn[1]] #print "===============================================" line_score = 0 token_count = 1 number += 1 #print "phrase tokens: %s" % mtokens_t #print "only verbs: %s" % only_verbs for mtoken in only_verbs: vnclasses = verbnet.classids(mtoken) if not vnclasses: vnclasses = verbnet.classids(wn_lem.lemmatize(mtoken)) if not vnclasses: continue #print "vnclasses: %s" % vnclasses mindex = [index for index, sl in enumerate(current_srl) if porter_stemmer.stem(mtoken) in sl.decode('utf8')] if not mindex: # print 0 continue token_count += 1 class_score = 0 class_count = 1 #print '----- %s -----' % mtoken for vn in vnclasses: v=verbnet.vnclass(vn) try: restrictions = GetVerbnetRestrictions(v) except: continue # print restrictions if restrictions: class_score = check_validity(current_srl, mindex[0], restrictions) class_count += 1 #print class_score else: #print "No restrictions for %s" % vn pass if class_count < 2: avg_class_score = class_score / class_count else: avg_class_score = class_score / (class_count - 1) #print '---------------' line_score += avg_class_score token_count += 1 if token_count < 2: avg_line_score = line_score / token_count else: avg_line_score = line_score / (token_count - 1) # print "%s - %s - %s" % (sline[1], sline[2], line_score) print avg_line_score
from nltk.corpus import wordnet as wn from itertools import product from nltk.stem.wordnet import WordNetLemmatizer from pathlib import Path import pandas as pd import os import nltk import re from nltk.corpus import verbnet as vn from xml.etree import ElementTree from stanfordcorenlp import StanfordCoreNLP datapath = Path(__file__).resolve().parents[2] nlp = StanfordCoreNLP( '/home/ruta/master-thesis/tools/stanford-corenlp-full-2018-10-05') vn_31_2 = ElementTree.tostring(vn.vnclass('escape-51.1'))
#!/usr/bin/python # -*- coding: utf-8 -*- from nltk.corpus import verbnet verbnet.lemmas()[20:25] verbnet.classids()[:5] verbnet.classids('accept') verbnet.vnclass('remove-10.1') # doctest: +ELLIPSIS verbnet.vnclass('10.1') # doctest: +ELLIPSIS vn_31_2 = verbnet.vnclass('admire-31.2') for themrole in vn_31_2.findall('THEMROLES/THEMROLE'): print(themrole.attrib['type']) for selrestr in themrole.findall('SELRESTRS/SELRESTR'): print('[%(Value)s%(type)s]' % selrestr.attrib) print() print(verbnet.pprint('57'))
print(rte.fileids()) # doctest: +ELLIPSIS rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml']) print(rtepairs) # doctest: +ELLIPSIS print(rtepairs[5]) print(rtepairs[5].text) # doctest: +NORMALIZE_WHITESPACE print(rtepairs[5].hyp) print(rtepairs[5].value) xmltree = rte.xml('rte3_dev.xml') print(xmltree) # doctest: +SKIP print(xmltree[7].findtext('t')) # doctest: +NORMALIZE_WHITESPACE # verbnet # nltk.download('verbnet') print(verbnet.lemmas()[20:25]) print(verbnet.classids()[:5]) print(verbnet.classids('accept')) print(verbnet.vnclass('remove-10.1')) # doctest: +ELLIPSIS print(verbnet.vnclass('10.1')) # doctest: +ELLIPSIS vn_31_2 = verbnet.vnclass('admire-31.2') for themrole in vn_31_2.findall('THEMROLES/THEMROLE'): print(themrole.attrib['type']) for selrestr in themrole.findall('SELRESTRS/SELRESTR'): print('[%(Value)s%(type)s]' % selrestr.attrib) print() print(verbnet.pprint('57')) # nps_chat # nltk.download('nps_chat') print(nltk.corpus.nps_chat.words()) print(nltk.corpus.nps_chat.tagged_words()) print(nltk.corpus.nps_chat.tagged_posts()) # doctest: +NORMALIZE_WHITESPACE print(nltk.corpus.nps_chat.xml_posts()) # doctest: +ELLIPSIS posts = nltk.corpus.nps_chat.xml_posts()
from nltk.corpus import verbnet def GetVerbnetRestrictions(vnclass): role_restrictions = {} while True: for role in vnclass.findall('THEMROLES/THEMROLE'): restrictions = role.find('SELRESTRS') if restrictions: restriction_set = set() for restriction in restrictions.findall('SELRESTR'): predicate = restriction.attrib restriction_set.add((predicate['Value'], predicate['type'])) total = (restrictions.get('logic', 'and'), list(restriction_set)) role_restrictions[role.attrib['type']] = total if vnclass.tag == 'VNCLASS': break else: parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0] vnclass = verbnet.vnclass(parent_class) return role_restrictions vnclasses = verbnet.classids('drink') v=verbnet.vnclass('39.1-2') GetVerbnetRestrictions(v)