def sense_profile(raw_text): verb_to_synsets = clausIE(text) # get conllu and corefs var_dict = nlp(text=text, property=['conllu', 'json']) conllu = var_dict['conllu'] corefs = list(var_dict['json']['corefs'].values()) sentnums_with_coref = [[item['sentNum'] for item in corefs] for corefs in corefs] # get frames for verbs and noun chunks sem_output = semafor(sock=None, text=conllu, reconnect=1) frame_list_dict = semafor_util(sem_output) sent_eval = zip(verb_to_synsets, frame_list_dict) action_senses = [] # synset_dict is list of verb_dicts for each sentence # frame_dict is {frame text: targetFrame(target_frame=name, descendants=[framedText(text, name),...,]} for sent_num, (synset_dict, frame_dict) in enumerate(sent_eval): #synset_dict.values(): (clause_to_synsets(clause), match_triples_to_clause(clause.dict, triple_list)) for verb, (synset_list, arg_list) in synset_dict.items(): verb_lemma = lemmatize(verb) if verb_lemma not in frame_dict.keys(): continue # verb part frame = fn.frame_by_name(frame_dict[verb_lemma].target_frame) synsets = narrow_synsets(synset_list, list(frame.lexUnit.keys())) # arg_frames = [] arg_frames = [ frame for _, arg, arg_phrase in arg_list for frame_text, frame in frame_dict.items() if arg in frame_text or frame_text in arg_phrase ] verb_frame = None for frame_text, frame in frame_dict.items(): if frame_text in verb or verb in frame_text: verb_frame = frame break # add corefs #for coref_item in sentnums_with_coref: # for k, sent_nums in enumerate(sentnums_with_coref): # if sent_num not in sent_nums: # continue # corefs[k] # ] #coref_zip = zip(corefs, sentnums_with_coref) #coref_items_ = [corefs for corefs, sentnums in coref_zip if sent_num in sentnums] # k in range(corefs) if sent_num in u[k]] for u,t in enumerate(sentnums_with_coref)] sp = senseProfile(verb_lemma, synsets, arg_list, verb_frame, arg_frames) action_senses.append(sp) return action_senses
def fe_coreTypes(frame_name): f = fn.frame_by_name(frame_name) fedict = {} for feName, fe in f.FE.items(): fedict[feName] = fe.coreType print(feName) print(fe) return fedict
def isFirstBeginner(framename): #A first beginner is a frame X such as the superFrameName of all its Inheritance relations is X (because it is alway the parent of the Inheritance) f = fn.frame_by_name(framename) for rel in f['frameRelations']: if (rel['type']['name']) in ['Inheritance','Subframe','Using']: if rel['superFrameName'] != framename: return False #print(framename,rel['superFrameName'],rel['subFrameName']) return True
def sense_profile(raw_text): # no longer reducing with clausie # verb_to_synsets_dict = clausIE(raw_text) # get conllu (dependency parse) var_dict = nlp(text=raw_text, property=['conllu', 'json']) conllu = var_dict['conllu'] # get frames for verbs and noun chunks sem_output = semafor(sock=None, text=conllu, reconnect=1) frame_list_dict = semafor_util(sem_output) # as set of verbs for each sentence sent_verb_dict = conll_to_verb_map(conllu) action_senses = [] # frame_dict is {frame text: targetFrame(target_frame=name, descendants=[framedText(text, name),...,]} for sent_num, frame_dict in enumerate(frame_list_dict): # the verbs found in this sentence verbs = sent_verb_dict[sent_num] for verb in verbs: # ignore verbs for which no frame is identified if verb not in frame_dict.keys(): continue # create dictionary of form {synset: frames} for each synset of verb synset_frameid_dict = verb_to_frames(verb) # get the frame of the verb given output from Semafor try: frame = fn.frame_by_name(frame_dict[verb].target_frame) except: print('did not have this frame:') print(verb) print(frame_dict[verb].target_frame) continue # narrow the set of synsets to just those whose associated frames include the one we got as output synsets = narrow_synsets(synset_frameid_dict, frame) # given frame_dict[verb].descendants, collect the args and their frames arg_frames = [[ft.text, ft.frame] for ft in frame_dict[verb].descendants] # compile sense profile sp = [ verb, [frame.name, frame.ID], [str(synset) for synset in synsets], arg_frames ] action_senses.append(sp) return action_senses
def get_frame_from_name(frame_name): try: frame = fn.frame_by_name(frame_name) except: if len(FDD[frame_name]) == 1: frame = fn.frame_by_name(FDD[frame_name][0]) elif len(FDD[frame_name]) > 1: print('this frame has more den one extensions: {}'.format( frame_name)) found = False for fname in FDD[frame_name]: if fname.split('_')[-1] != 'activity': frame = fn.frame_by_name(fname) found = True print('resolved') break if not found: print('just chose first item: {}'.format(FDD[frame_name][0])) frame = fn.frame_by_name(FDD[frame_name][0]) else: print('did not have frame {}'.format(frame_name)) return False return frame
def read_annotations(path): # Metodo che permette di leggere le tre colonne # contenute nel file con le annotazioni annotations = [] with open(path) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: frame = fn.frame_by_name(row[0].strip()) parola = row[1].strip() synset = row[2].strip() if synset == 'None': synset = None else: synset = wn.synset(synset) annotations.append((frame, parola, synset)) return annotations
)) for r in DBPedia().search(sparql, start=1, count=1000): print '%s (%s)' % (r.person.name, r.place.name) ##______________________________Framenet______________________________ from nltk.corpus import framenet as fn fn.lu(3238).frame.lexUnit['glint.v'] is fn.lu(3238) fn.frame_by_name('Replacing') is fn.lus('replace.v')[0].frame fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality') fn.lus('look.n')[0].frame fn.lus('look.n')[1].frame for f in fn.lus('look.n'): print f.frame.name result = fn.frames(r'(?i)erception') print result
def NumLexU(framename): f = fn.frame_by_name(framename) if f['lexUnit']: return(" ".join(f['lexUnit'])) else: return "_"
def items(): f = fn.frame_by_name(frame_name) FE = f.FE items = FE.items() pass
def fekeys(frame_name): f = fn.frame_by_name(frame_name) FE = f.FE keys = FE.keys() pass
def getFE(frame_name): f = fn.frame_by_name(frame_name) FE = f.FE pass
def getFrame_by_Name(frame_name): f = fn.frame_by_name(frame_name) pass
def lu_ids(frame_name): f = fn.frame_by_name(frame_name) ids = [] for v in f.lexUnit.values(): ids.append(v.ID) return ids