def frame_chose(): fs = fn.frames() fs_dic = {} fs_ID = [] for f in fs: fs_ID.append(f.ID) fs_dic[f.name] = [] lexes = f.lexUnit for lex in lexes: fs_dic[f.name].append(lexes[lex].name) fs_ID_copy = fs_ID result = [] for f1 in fs_ID: fs_ID_copy.remove(f1) f1_name = fn.frame(f1).name set1 = set(fs_dic[f1_name]) for f2 in fs_ID_copy: f2_name = fn.frame(f2).name set2 = set(fs_dic[f2_name]) r = list(set1 & set2) result.append((f1_name, f2_name, r, len(r))) result = sorted(result, key=lambda x: (x[3]), reverse=True) frame_chose = [] for r in result: if r[3] >= 10: frame_chose.append(r) return frame_chose
def map_terms_to_senses(frames): # for each frame for frame in frames: f = fn.frame(list(frame[0].keys())[0]) # for each section in a single frame for index in range(len(frame)): # for a term in a single section for value in frame[index].keys(): # if contains . or pos tag w = value if not is_normalidez(value): w = normalizes_string(value) # FRAME NAME if index == FRAME_NAME: # Ctx(w) f.definition best_sense = wsd.lesk_algorithm(w, f.definition) frame[index][value] = str(best_sense).strip() # FRAME ELEMENT if index == FRAME_ELEMENT: # Ctx(w) = f.FE[value].definition best_sense = wsd.lesk_algorithm(w, f.FE[value].definition) frame[index][value] = str(best_sense).strip() # LEXICAL UNIT if index == LEXICAL_UNIT: # Ctx(w) = f.lexUnit[value].definition best_sense = wsd.lesk_algorithm( w, f.lexUnit[value].definition) frame[index][value] = str(best_sense).strip() return frames
def getFrameSynset(self, synset): try: if synset in self.frame_dict: #print synset,self.frame_dict[synset] frame = fn.frame(self.frame_dict[synset].capitalize()) return frame except: pass return None
def main(): frame1 = [] frame2 = [] accuracy = [] fs = frame_chose() for r in fs: f1 = fn.frame(r[0]) f2 = fn.frame(r[1]) lex_list = r[2] acc = sent_chosen(f1, f2, lex_list) frame1.append(r[0]) frame2.append(r[1]) accuracy.append(acc) ja_dict = {"Frame1": frame1, "Frame2": frame2, "Accuracy": accuracy} df_ja = pd.DataFrame(data=ja_dict) df_ja.to_csv('lesk_output/frame_random.csv')
def findCoreType(self, wordList): dictim = [] for word in wordList: word_ = '^{}$'.format(word) if len(fn.lus(word_)) > 0: ID = fn.lus(word_)[0].frame.ID dicti = [fename for fename, fe in fn.frame(ID).FE.items() if fe.coreType == 'Core'] if len(dicti) > 0: dictim.append(dicti[0]) return dictim
def get_frame_definition(self, frame): if nltk == True: frame_idx = self.fn17_idx[frame] f = fn.frame(frame_idx) definition = f.definition return definition else: print( 'please install nltk FrameNet first. refer: http://www.nltk.org/howto/framenet.html' ) return False
def compare_frames(frame1, frame2): if frame1 == frame2: return 2 frame_relations = [] for relation in fn.frame(frame1).frameRelations: frame_relations.extend( [relation.superFrameName.lower(), relation.subFrameName.lower()]) print(frame_relations) if frame2 in frame_relations: return 1 else: return 0
def map(self, frame_name, slot_value, slot_type): """ Map a FrameNet frame to a wordnet sense. Args: frame_name (str): exact name of the frame (no regex expression) slot_value (str): value of the slot to map. slot_type (FrameNetSlotType): type of the slot to map Returns: nltk.corpus.wordnet.Synset: best wordnet sense for the given frame slot value. """ synset_lemma = slot_value.split('.')[ 0] # get rid of POS (eg. existence.n) frame = fn.frame(frame_name) return self._best_sense(frame, slot_value, slot_type, synset_lemma)
def getFrameSetForStudent(surname, list_len=5): frameList = [] nof_frames = len(fn.frames()) base_idx = ( abs(int(hashlib.sha512(surname.encode('utf-8')).hexdigest(), 16)) % nof_frames) print('\nstudent: ' + surname) framenet_IDs = get_frams_IDs() i = 0 offset = 0 seed(1) while i < list_len: fID = framenet_IDs[(base_idx + offset) % nof_frames] f = fn.frame(fID) fNAME = f.name print('\tID: {a:4d}\tframe: {framename}'.format(a=fID, framename=fNAME)) offset = randint(0, nof_frames) frameList.append(fID) i += 1 return frameList
doccollections = ['NYT_19980407','NYT_19980403','NYT_19980315','APW_19980429','APW_19980424','APW_19980314'] IN = re.compile(r'.*\bin\b(?!\b.+ing)') for doccol in doccollections: for doc in nltk.corpus.ieer.parsed_docs(doccol): relations = nltk.sem.extract_rels('PER', 'LOC', doc, corpus='ieer', pattern = IN) for relation in relations: print nltk.sem.relextract.rtuple(relation) f = fn.frames(r'(?i)perception') len(fn.frames()) f = fn.frame(66) f.ID f.definition set(f.lexUnit.keys()) [x.name for x in f.FE] f.frameRelations fn.frames_by_lemma(r'(?i)a little')
def demo(): from pprint import pprint from nltk.corpus import framenet as fn # # It is not necessary to explicitly build the indexes by calling # buildindexes(). We do this here just for demo purposes. If the # indexes are not built explicitely, they will be built as needed. # print('Building the indexes...') fn.buildindexes() # # Get some statistics about the corpus # print('Number of Frames:', len(fn.frames())) print('Number of Lexical Units:', len(fn.lexical_units())) print('Number of annotated documents:', len(fn.documents())) print() # # Frames # print( 'getting frames whose name matches the (case insensitive) regex: "(?i)medical"' ) medframes = fn.frames(r'(?i)medical') print('Found {0} Frames whose name matches "(?i)medical":'.format( len(medframes))) print([(f.name, f.ID) for f in medframes]) # # store the first frame in the list of frames # tmp_id = medframes[0].ID m_frame = fn.frame(tmp_id) # reads all info for the frame # # get the frame relations # print( '\nNumber of frame relations for the "{0}" ({1}) frame:'.format( m_frame.name, m_frame.ID), len(m_frame.frameRelation)) for fr in m_frame.frameRelation: print(' ', fr.type + ":", fr.relatedFrame) # # get the names of the Frame Elements # print( '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name), len(m_frame.FE)) print(' ', [x.name for x in m_frame.FE]) # # get the names of the "Core" Frame Elements # print('\nThe "core" Frame Elements in the "{0}" frame:'.format( m_frame.name)) print(' ', [x.name for x in m_frame.FE if x.coreType == "Core"]) # # get all of the Lexical Units that are incorporated in the # 'Ailment' FE of the 'Medical_conditions' frame (id=239) # print('\nAll Lexical Units that are incorporated in the "Ailment" FE:') m_frame = fn.frame(239) ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment'] print([x.name for x in ailment_lus]) # # get all of the Lexical Units for the frame # print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name), len(m_frame.lexUnit)) print(' ', [x.name for x in m_frame.lexUnit[:5]], '...') # # get basic info on the second LU in the frame # tmp_id = m_frame.lexUnit[1].ID # grab the id of the second LU luinfo = fn.lu_basic(tmp_id) # get basic info on the LU print('\nInformation on the LU: {0}'.format(luinfo.name)) pprint(luinfo) # # Get a list of all of the corpora used for fulltext annotation # print('\nNames of all of the corpora used for fulltext annotation:') allcorpora = set([x.corpname for x in fn.documents()]) pprint(list(allcorpora)) # # Get the names of the annotated documents in the first corpus # firstcorp = list(allcorpora)[0] firstcorp_docs = fn.documents(firstcorp) print('\nNames of the annotated documents in the "{0}" corpus:'.format( firstcorp)) pprint([x.filename for x in firstcorp_docs]) # # Search for frames containing LUs whose name attribute matches a # regexp pattern. # # Note: if you were going to be doing a lot of this type of # searching, you'd want to build an index that maps from # lemmas to frames because each time frames_by_lemma() is # called, it has to search through ALL of the frame XML files # in the db. print( '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":' ) pprint(fn.frames_by_lemma(r'^run.v$'))
list(range(len(lu_nameID_dict.keys()))), key=i) for LU in LU_choice_number: selectedLU = lu_list[LU] if selectedLU.split( )[3][0:2] == "ID": #To account for more than one word LU lu_ID = int(selectedLU.split()[3][:-1].replace( "ID=", "")) else: lu_ID = int(selectedLU.split()[4][:-1].replace( "ID=", "")) lu_name = lu_nameID_dict[lu_ID] st.write("You selected LU: ", LU, ".", lu_name) associatedFrame = fn.lu(lu_ID).frame.name lu_frame = fn.frame(associatedFrame) st.write("Frame: ", associatedFrame) st.write("Reference: ", lu_frame.URL) FE_list = [] for element in lu_frame.FE: FE_list.append(element) st.write("Frame Elemenet(s): ") st.write(FE_list) st.write("Annotator Summary:") st.write("File Annotated:", filename.name) st.write("Selected Key:", key_list[q]) st.write("JSON PATH: ", jpath[q - 1][0]) st.write("Selected LU:", lu_name) st.write("LU's Frame:", associatedFrame, "(", lu_frame.URL, ")")
import json import kfn import pprint from nltk.corpus import framenet as fn #get all lus lus = kfn.lus() print(len(lus)) #get lus lus = kfn.lus_by_lemma('나누다') print(lus) #get lu by lu_id lu = kfn.lu(lus[0]['lu_id']) pprint.pprint(lu) frame_id = lu['fid'] f = fn.frame(frame_id) print(f.name) print(f.definition) #get annotations by lu_id annotations = kfn.annotation(lus[0]['lu_id']) print(annotations)
result.append((f1_name, f2_name, r, len(r))) result = sorted(result, key=lambda x: (x[3]), reverse=True) frame_chose = [] for r in result: if r[3] >= 10: frame_chose.append(r) return frame_chose if __name__ == '__main__': fs = frame_chose() for r in fs: f1 = fn.frame(r[0]) f2 = fn.frame(r[1]) lex_list = r[2] ls1 = f1.lexUnit ls2 = f2.lexUnit sents = [] frames = [] for l in lex_list: l_list = l.split('.') ss1 = ls1[l].exemplars ss2 = ls2[l].exemplars for s in ss1: sents.append(s.text) for s in ss2:
try: # Query FrameNet -- frame names frames = fn.frames_by_lemma(lemma) ; framenames = "" for frame in frames: # print frame.name, # print len(frames) # continue # Cutoff point if len(frames) > numframes : continue framenames = "".join([framenames,"|",frame.name]) if framenames != "": print "".join([field[0],"|",field[1],"|FRM_01|",lemma,framenames]) # Core Frame Elements for frame in frames: if len(frames) > numframes : continue ID = frame.ID ; framecores = "" cores = [(fename,fe.ID) for fename,fe in fn.frame(ID).FE.items() if fe.coreType=='Core'] for core in cores: framecores = "".join([framecores,"|",core[0]]) if framecores != "": print "".join([field[0],"|",field[1],"|FRM_02|",frame.name,framecores]) except (AttributeError, nltk.corpus.reader.framenet.FramenetError): continue except (UnicodeDecodeError, UnicodeEncodeError, IndexError, AssertionError): # Tag failed UTF-8 lines NA to enable repair print "".join([field[0],"|",field[1],"|FRM_01","|NA"]) continue # Clean up fp.close() # EOF
def addframeRelations(frame): for frameRelation in frame.frameRelations: if 'Parent' in frameRelation: parentKatum=frame_.get(frameRelation.Parent.name) childKatum=frame_.get(frameRelation.Child.name) childKatum._is(frameRelations,False) childKatum._is(parentKatum,False) parentKatum._is(frameRelations,False) katum.load('wordnet-verbnet-framenet-noframerelations.datum', atum()) generalThing = datum.thing framenetRoot=generalThing.find("framenet") frame_=framenetRoot.find("frame") for frame in frame_.I: framenetFrame=fn.frame(frame.O) for frameRelation in framenetFrame.frameRelations: if 'Parent' in frameRelation: parentKatum=frame_.find(frameRelation.Parent.name) if 'Child' in frameRelation: childKatum=frame_.find(frameRelation.Child.name) if(parentKatum!=None and childKatum!=None): childKatum._is(parentKatum,False) generalThing.save('wordnet-verbnet-framenet-fr.datum')
framenames = "".join( [framenames, "|", frame.name]) if framenames != "": print "".join([ field[0], "|", field[1], "|FRM_01|", lemma, framenames ]) # Core Frame Elements for frame in frames: if len(frames) > numframes: continue ID = frame.ID framecores = "" cores = [ (fename, fe.ID) for fename, fe in fn.frame(ID).FE.items() if fe.coreType == 'Core' ] for core in cores: framecores = "".join( [framecores, "|", core[0]]) if framecores != "": print "".join([ field[0], "|", field[1], "|FRM_02|", frame.name, framecores ]) except (AttributeError, nltk.corpus.reader.framenet.FramenetError): continue except (UnicodeDecodeError, UnicodeEncodeError, IndexError, AssertionError):
fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality') fn.lus('look.n')[0].frame fn.lus('look.n')[1].frame for f in fn.lus('look.n'): print f.frame.name result = fn.frames(r'(?i)erception') print result f = fn.frame(1301) f.ID f.definition for u in f.lexUnit: print u fn.lexical_units('r(?i)look') from pattern.en import wordnet [x for x in f.FE] f.frameRelations
def get_coretype(frame_name, fe): """Return the core type of the given FE from the set of string types {'Core', 'Peripheral', 'Extra-Thematic'} """ return framenet.frame(frame_name).FE[fe].coreType
def demo(): from pprint import pprint from nltk.corpus import framenet as fn # # It is not necessary to explicitly build the indexes by calling # buildindexes(). We do this here just for demo purposes. If the # indexes are not built explicitely, they will be built as needed. # print('Building the indexes...') fn.buildindexes() # # Get some statistics about the corpus # print('Number of Frames:', len(fn.frames())) print('Number of Lexical Units:', len(fn.lexical_units())) print('Number of annotated documents:', len(fn.documents())) print() # # Frames # print('getting frames whose name matches the (case insensitive) regex: "(?i)medical"') medframes = fn.frames(r'(?i)medical') print( 'Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes))) print([(f.name, f.ID) for f in medframes]) # # store the first frame in the list of frames # tmp_id = medframes[0].ID m_frame = fn.frame(tmp_id) # reads all info for the frame # # get the frame relations # print( '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(m_frame.name, m_frame.ID), len(m_frame.frameRelation)) for fr in m_frame.frameRelation: print(' ', fr.type + ":", fr.relatedFrame) # # get the names of the Frame Elements # print( '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name), len(m_frame.FE)) print(' ', [x.name for x in m_frame.FE]) # # get the names of the "Core" Frame Elements # print( '\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name)) print(' ', [x.name for x in m_frame.FE if x.coreType == "Core"]) # # get all of the Lexical Units that are incorporated in the # 'Ailment' FE of the 'Medical_conditions' frame (id=239) # print('\nAll Lexical Units that are incorporated in the "Ailment" FE:') m_frame = fn.frame(239) ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment'] print([x.name for x in ailment_lus]) # # get all of the Lexical Units for the frame # print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name), len(m_frame.lexUnit)) print(' ', [x.name for x in m_frame.lexUnit[:5]], '...') # # get basic info on the second LU in the frame # tmp_id = m_frame.lexUnit[1].ID # grab the id of the second LU luinfo = fn.lu_basic(tmp_id) # get basic info on the LU print('\nInformation on the LU: {0}'.format(luinfo.name)) pprint(luinfo) # # Get a list of all of the corpora used for fulltext annotation # print('\nNames of all of the corpora used for fulltext annotation:') allcorpora = set([x.corpname for x in fn.documents()]) pprint(list(allcorpora)) # # Get the names of the annotated documents in the first corpus # firstcorp = list(allcorpora)[0] firstcorp_docs = fn.documents(firstcorp) print( '\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp)) pprint([x.filename for x in firstcorp_docs]) # # Search for frames containing LUs whose name attribute matches a # regexp pattern. # # Note: if you were going to be doing a lot of this type of # searching, you'd want to build an index that maps from # lemmas to frames because each time frames_by_lemma() is # called, it has to search through ALL of the frame XML files # in the db. print('\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":') pprint(fn.frames_by_lemma(r'^run.v$'))