def addRoleSet(name): global doneCount roleSetMainName = name.split('.')[0] roleSetMainKatum = roleset.get(roleSetMainName) roleSetNewKatum = roleSetMainKatum.get(roleSetMainKatum.countI) rolesetsensenum = senseNumber.get(name.split('.')[1]) roleSetNewKatum._is(rolesetsensenum, False) if (pb.roleset(name) != None): verbnetCls = pb.roleset(name).get('vncls') if (verbnetCls != None): verbnetID = verbClassID.find(verbnetCls) if (verbnetID != None): for vnClass in verbnetID.I: roleSetNewKatum._is(vnClass, False) print doneCount, vnClass.a0.O, vnClass.O, roleSetMainKatum.O, roleSetNewKatum.O doneCount += 1 roleSetMeaning = pb.roleset(name).get('name') if (roleSetMeaning != None): roleSetMeaningKatum = meaning.get(roleSetMeaning) roleSetNewKatum._is(roleSetMeaningKatum, False) for role in pb.roleset(name).findall("roles/role"): argCount = role.attrib['n'] argName = role.attrib['descr'] if argCount != None and argName != None: argumentKatum = argument.get(argName) argumentNameNumKatum = argumentKatum.get(argumentKatum.countI) argNumKatum = argumentNumber.get(argCount) argumentNameNumKatum._is(argNumKatum, False) roleSetNewKatum._is(argumentNameNumKatum, False)
def fileGenerator(rolesetString): file = open("propbank-examples.xml", "a") if (ElementTree.tostring(pb.roleset(rolesetString).find('example')) != None): file.write( ElementTree.tostring(pb.roleset(rolesetString).find( 'example')).decode('utf8').strip()) file.write("\n") file.close()
def __init__(self, verb_string, sent_string, pb_inst, label, mismatch): self.label = label self.mismatch = mismatch self.sent_string = sent_string self.verb_string = verb_string self.tree = pb_inst.tree self.arg0 = None self.arg1 = None self.arg2 = None self.rs_arg0 = None self.rs_arg1 = None if pb_inst.roleset[-1:].isnumeric(): self.rs = propbank.roleset(pb_inst.roleset) for arg in pb_inst.arguments: if isinstance(arg[0], PropbankSplitTreePointer): arg = arg[0].pieces if isinstance(arg[0], PropbankChainTreePointer): arg = arg[0].pieces if isinstance(arg[0], PropbankSplitTreePointer): arg = arg[0].pieces string_rep = str(self.tree[arg[0].treepos( self.tree)].productions()) li = string_rep.strip('[]').split() noun_list = [] for index, elem in enumerate(li): if (elem[:2] == 'NN') and (li[index + 1] == '->'): noun_list.append(li[index + 2]) elif (elem[:3] == 'PRP') and (li[index + 1] == '->'): noun_list.append(li[index + 2]) if (arg[1] == 'ARG0') and (len(noun_list) > 1): self.arg0 = noun_list[0].strip(',\"') #print('arg0', self.arg0) elif (arg[1] == 'ARG1') and (len(noun_list) > 1): self.arg1 = noun_list[0].strip(',\"') #print('arg1', self.arg1) elif (arg[1] == 'ARG2') and (len(noun_list) > 1): self.arg2 = noun_list[0].strip(',\"') #print('arg2', self.arg2) for role in self.rs.findall('roles/role'): if role.attrib['n'][0] == '0': self.rs_arg0 = role.attrib['descr'].split()[0] if role.attrib['n'][0] == '1': self.rs_arg1 = role.attrib['descr'].split()[0]
def rs_args(id, cache={}): if id in cache: return cache[id] print 'roleset %s (cache miss)' % id args = {} try: rs = propbank.roleset(id) roles = rs[0] for i, role in enumerate(roles.findall('role')): args['ARG'+role.attrib['n']] = role.attrib['descr'] except ValueError as e: print e cache[id] = args return args
def rs_args(id, cache={}): if id in cache: return cache[id] print 'roleset %s (cache miss)' % id args = {} try: rs = propbank.roleset(id) roles = rs[0] for i, role in enumerate(roles.findall('role')): args['ARG' + role.attrib['n']] = role.attrib['descr'] except ValueError as e: print e cache[id] = args return args
def findexamples(self): self.allexamples=[] processed=0 for instance in pb.instances(): if self.testing>5: print instance.roleset print instance.arguments try: roleset=pb.roleset(instance.roleset) examples=roleset.findall('example') for e in examples: #print ElementTree.tostring(e).decode('utf8').strip() self.allexamples.append(e) except: pass processed+=1 if self.testing > 2 and processed>10: break print "Number of examples: ",len(self.allexamples)
def roleFinder(verb): #--converting verb into its present tense print "verb**",verb if verb=="find" or verb=="found" : tverb="find" else: tverb=en.verb.present(verb) print "targetverb **** ",tverb if tverb=="emerge": propVerb=tverb+".02" else: propVerb=tverb+".01" print propVerb if propVerb=="re-cover.01": propVerb="recover.01" allroles={} if propVerb=="vaccinate.01": allroles={'A0':'Vaccinator','A1':'Vaccinated','A2':'Against_what/disease'} else: roles=propbank.roleset(propVerb) for role in roles.findall('roles/role'): role.attrib['descr']=role.attrib['descr'].replace(" ","-") allroles["A"+str(role.attrib['n'])]=role.attrib['descr'] return allroles
def get_role_descriptions(self: Proposition) -> Dict[str, str]: """Retrieves the role descriptions of a specific roleset from PropBank/NomBank """ if self.pred_roleset is None: pred_roleset = self.generate_roleset() else: pred_roleset = self.pred_roleset roleset_id, pos = pred_roleset.rsplit("-", 1) dict_semroles = DICT_MODIFIERS.copy() general_dict = DICT_CORE_ROLES.copy() if pos == "v": roleset = pb.roleset(roleset_id) elif pos == "n": roleset = nb.roleset(roleset_id) else: dict_semroles.update(general_dict) return dict_semroles for role in roleset.findall("roles/role"): number, description = role.attrib["n"], role.attrib["descr"] dict_semroles[f"A{number}"] = description dict_semroles.update(general_dict) return dict_semroles
def readFile(): input_file = open( "C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoices.txt", "r") #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiagnosis.txt", "r") #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiabetesWhole.txt", "r") lines = input_file.readlines() input_file.close() annotationsX = [] annotationsSLR = [] annotationsNER = [] for x in lines: annotationX = x annotationSLR = annotator.getAnnotations(x, dep_parse=True)['srl'] #annotationNER = annotator.getAnnotations(x,dep_parse=True)['ner'] annotationsX.append(annotationX) annotationsSLR.append(annotationSLR) #annotationsNER.append(annotationNER) size = len(annotationsSLR) print size A0 = 0 A1 = 0 pbroles = [] annotationsA0 = [] annotationsA1 = [] for an in range(5): print annotationsX[an] print annotationsSLR[an] sizeIn = len(annotationsSLR[an]) #print sizeIn for an2 in range(sizeIn): print "--------------------------------------------------------------------------------------------------------" print annotationsSLR[an][an2]["V"] w = Word(annotationsSLR[an][an2]["V"]).lemmatize("v") #print w #print wn.synset(w+'.v.01') try: for role in propbank.roleset(w + '.01').findall("roles/role"): print(role.attrib['f'], role.attrib['n'], role.attrib['descr']) pbroles.append(role.attrib['descr']) #for role in propbank.roleset(w+'.01').findall("aliases/alias"): #print(role.attrib['framenet'], role.attrib['pos'], role.attrib['verbnet']) except: pass try: print( wn.lemma(w + '.v.01.' + w).derivationally_related_forms()) except: pass if "A0" in annotationsSLR[an][an2]: print annotationsSLR[an][an2]["A0"] A0 = annotationsSLR[an][an2]["A0"] #try: #A0 = TextBlob(A0, np_extractor=extractor) #A0 = A0.noun_phrases[0] #print A0 #except: #pass try: annotationsA0 = WordNet.spotlightSearch(A0) annotationsA0 = annotationsA0[0].get('URI') except: annotationsA0 = "unknown" pass if "A1" in annotationsSLR[an][an2]: print annotationsSLR[an][an2]["A1"] A1 = annotationsSLR[an][an2]["A1"] #try: #A1 = TextBlob(A1, np_extractor=extractor) #A1 = A1.noun_phrases[0] #print A1 #except: #pass try: annotationsA1 = WordNet.spotlightSearch(A1) annotationsA1 = annotationsA1[0].get('URI') except: annotationsA1 = "unknown" pass print pbroles print "--------------------------------------------------------------------------------------------------------" CreateGraphNeo4J.createGraph(w, A0, A1, pbroles, annotationsA0, annotationsA1) del pbroles[:] annotationsA0 = [] annotationsA1 = [] A0 = 0 A1 = 0
def get_srl_dict(sense,srl_dict,objects, coref_chain_list): # non_action_verbs = ['is', 'are'] # #above verbs used for attributes # if srl_dict['predicate'] in non_action_verbs: # return # # #get base form of predicate # #lmtzr = WordNetLemmatizer() # #lemma = lmtzr.lemmatize(srl_dict['predicate'],'v') # props={'annotators': 'lemma','pipelineLanguage':'en','outputFormat':'json'} # annotation = nlp.annotate(text, properties=props) # #print('lemma', annotated) # lemma_annotation = json.loads(annotation) # text_split = text.split() # predicate_index = [i for i in range(len(text_split)) if text_split[i] == srl_dict['predicate']][0] # # print ('found predicate ', text_split[predicate_index]) # lemma = lemma_annotation['sentences'][0]['tokens'][predicate_index]['lemma'] #get semantic role names from propbank '''with codecs.open('/Users/thilinicooray/sem_img/propbank-frames/frames/' + lemma +'.xml', 'r', 'latin-1') as fd: doc = xmltodict.parse(fd.read()) verb = lemma predicates = doc['frameset']['predicate'] role_dict = {} for predicate in predicates: #print(predicate) if predicate['@lemma'] == verb: #print(predicate['roleset']) #ignore several verb senses roleset = None if isinstance(predicate['roleset'], list): roleset = predicate['roleset'][0] else: roleset = predicate['roleset'] roles = roleset['roles'] for role in roles['role']: name = role['@descr'].split(',')[0] arg = 'ARG' + role['@n'] role_dict[arg] = name break''' region_tag_tuples = [] try: roleset = propbank.roleset(sense) except ValueError: print('no matching frames for predicate ', sense) return None, None #only continue if propbank has a frame role_dict = {} for role in roleset.findall("roles/role"): arg = 'A'+ role.attrib['n'] name = role.attrib['descr'].split(',')[0] role_dict[arg] = name print ('role dict from srl tool :' , srl_dict) #add place, time and manner role_dict[u'AM-LOC'] = 'place' role_dict[u'AM-TMP'] = 'time' role_dict[u'AM-MNR'] = 'manner' role_dict[u'AM-DIR'] = 'direction' role_dict[u'AM-ADV'] = 'adverbial modification' role_dict[u'AM-DIS'] = 'doscourse marker' role_dict[u'AM-EXT'] = 'extent' role_dict[u'AM-MOD'] = 'general modification' role_dict[u'AM-NEG'] = 'negation' role_dict[u'AM-PNC'] = 'proper noun component' role_dict[u'AM-PRD'] = 'secondary predicate' role_dict[u'AM-PRP'] = 'purpose' role_dict[u'AM-REC'] = 'reciprocal' mapped_srl_dict = OrderedDict() mapped_srl_dict['predicate'] = sense.split('.')[0] #keep only NN in value fields of roles for key, comp_value in srl_dict.iteritems(): if key != 'predicate' and key != 'V': #to avoid issues where srl tool given roles which do not actually exist in propbank frame roles if key not in role_dict: return None, None key = role_dict[key] #without this, can't map objects later print('tags needs for ', comp_value, region_tag_tuples) tag_tuples, region_tag_tuples = get_tag_tuples(comp_value, region_tag_tuples) if tag_tuples is None: return None, None print(tag_tuples) # if value contains a predicate, split value from that place and only consider before that # if the remaining string is blank, remove that key from the srl verb_removed_tag_tuples = [] verb_removed_comp = '' verb_found = False #preposition = '' for (word,tag) in tag_tuples: if tag in verb_tags: verb_found = True #pos tagger tag down as adverb, but is it needed for direction '''if tag in preposition_tag or word in ['down', 'up']: preposition = word''' if not verb_found: verb_removed_comp = verb_removed_comp + word + ' ' verb_removed_tag_tuples.append((word,tag)) if len(verb_removed_tag_tuples) == 0: continue if len(verb_removed_tag_tuples) > 1 : value = [word for (word, tag) in verb_removed_tag_tuples if tag in noun_tags] if len(value) > 0 : value = value[0] # try to map values to actual annotated objects in the image # if value == annotated entitiry or value part of annotated entity, add the annotated entity object_names = [obj['names'][0] for obj in objects] if value in object_names: mapped_srl_dict[key] = value else: #no exact match found - use coref details from flickr30k similar_entities = [] for chain in coref_chain_list: if value in chain and len(chain) > 1: for ent in chain: if ent is not value: similar_entities.append(ent) if len(similar_entities) > 0: ob_set = set(object_names) coref_set = set(similar_entities) matching_ent = ob_set.intersection(coref_set) mapped_srl_dict[key] = ",".join(matching_ent) #when element changes from the original token in the caption, it needs to be updated in PoS tag entry also region_tag_tuples_updated = [] for tuple1 in region_tag_tuples: if tuple1[0] == value: region_tag_tuples_updated.append((",".join(matching_ent),tuple1[1])) else: region_tag_tuples_updated.append((tuple1[0],tuple1[1])) region_tag_tuples = region_tag_tuples_updated '''matching_elements = [obj for obj in object_names if (value in obj or obj in value)] print('matching elements in the image', matching_elements) if len(matching_elements)> 0 : mapped_srl_dict[key] = matching_elements[0] else : # todo :word similarity needs to be checked if exact match cannot be found return None, None''' else: mapped_srl_dict[key] = verb_removed_comp.strip() else: mapped_srl_dict[key] = verb_removed_comp.strip() #if preposition: #removed preposition part for now #mapped_srl_dict[key] = preposition + ',' + mapped_srl_dict[key] if len(mapped_srl_dict.keys()) == 1: return None, None print('each srl dict ::::' , mapped_srl_dict) return mapped_srl_dict, region_tag_tuples
def getPropbankInfo(wordWithSense): try: word = propbank.roleset(wordWithSense) except ValueError: return None return word.findall("roles/role")
__author__ = 'juliewe' from nltk.corpus import propbank from xml.etree import ElementTree n=103 if __name__=='__main__': pb_instances=propbank.instances() #print(pb_instances) instance=pb_instances[n] print instance.roleset,instance.predicate,instance.arguments send_01=propbank.roleset('send.01') for role in send_01.findall("roles/role"): print role.attrib['n'],role.attrib['descr'] #print (ElementTree.tostring(send_01.find('example')).decode('utf8').strip()) examples=send_01.findall('example') print len(examples) for e in examples: print ElementTree.tostring(e).decode('utf8').strip()
from nltk.corpus import propbank turn_01 = propbank.roleset('turn.01') for role in turn_01.findall("roles/role"): print(role.attrib['n'], role.attrib['descr'])
from nltk.corpus import treebank from nltk.corpus import propbank #Detailing the Propbank print("Creating Propbank Objects...") pb_instances = propbank.instances() pb_verbs = propbank.verbs() print() print("Number of items defined in the propbank: ", len(pb_instances)) print("Number of verbs defined in the propbank: ", len(pb_verbs)) print() #Argument Structure of 'agree' and 'fall' #'agree' agreeCount = 0 print("Argument Structure of 'agree': ") for role in propbank.roleset('join.01').findall('roles/role'): print(role.attrib['n'],role.attrib['descr']) print() print("Treebank Sentences with 'agree': ") for (n,i) in enumerate(pb_instances[:9353]): if i.roleset=='agree.01' and len(i.tree.leaves())<=30: print("Sentence ", n , ":",' '.join(i.tree.leaves())) #This prints ALL of the sentences print("Arguments of Sentence #", (agreeCount + 1)) for(argloc, argid) in i.arguments: print(argid) print(argloc.select(i.tree)) #print(pb_instances[n].tree.draw()) #draws a LOT of trees agreeCount = agreeCount + 1 print("There were ", agreeCount, " normally-sized sentences with 'agree'.") #'fall' fallCount = 0
# print "P :",p # print "X train : ",x_train[0] # print "x_test : ",x_test[0] # print "x_test : ",x_test[1] # print "x_test : ",x_test[2] a=[] main_role=""; max_count=0; for x in xrange(0,len(y_test)): pred=svc.predict(x_test[x]) # print x,pred a.append(pred[0]) print pred[0],y_test[x] # p = svc.predict_proba( x_test[x] ) # print max(p[0]) if(a.count(pred[0])>max_count): main_role=pred[0] max_count+=1 print "Role of verb : ",main_role role_01=propbank.roleset(main_role) # auc = AUC( y_test, p[:,1] ) # print "SVM AUC", auc turn_01 = propbank.roleset('turn.01') for role in role_01.findall("roles/role"): print(role.attrib['n'], role.attrib['descr'])