def __init__(self, frames_text={}, tokens_text=[], chunks_text=[], frames_hypo={}, tokens_hypo=[], chunks_hypo=[], e_type='simple', verbose=1, entailment=-1): self.e_type = e_type self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.chunks_hypo = chunks_hypo self.chunks_text = chunks_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.edit_score = 0 self.verb_score = 0 self.chunk_score = 0 self.oper_type = {'del': 0, 'in': 0, 'sub': 0} self.verbose = verbose self.entailment = entailment return
def __init__(self, frames_text = {}, tokens_text = [], chunks_text = [], frames_hypo = {}, tokens_hypo = [], chunks_hypo = [], e_type = 'simple', verbose = 1, entailment = -1): self.e_type = e_type self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.chunks_hypo = chunks_hypo self.chunks_text = chunks_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.edit_score = 0 self.verb_score = 0 self.chunk_score = 0 self.oper_type = {'del':0, 'in':0, 'sub':0} self.verbose = verbose self.entailment = entailment return
def verb_proc(self, id, point, sep): self.v_predicates = [] if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): lex_score = verb['lex'] srl_score = verb['srl'] combo_score = verb['combo'] (vt, vh) = verb['tokens'] #TODO VN isec, expand VO vt_tool = VerbTools(vt) i = '%s%s%s' % (self.clean_str(vt), sep, self.clean_str(vh)) vh_tool = VerbTools(vh) vt_classes = vt_tool.vn_classes() vh_classes = vh_tool.vn_classes() vt_classes.extend(vh_classes) set_classes = set(vt_classes) vt_vh_rel = vt_tool.verb_relations(vh) verb_sim = VerbMetrics(text_v=vt, hypo_v=vh) vn = verb_sim.vn_isec() d = verb_sim.direct() vo = verb_sim.vo() #token_verb = 'TokenVerb(%s, "%s%s%s")'%(i, self.clean_str(vt), sep, self.clean_str(vh)) sim_vn = '>sim_vn\n"%s" %s' % (i, vn) sim_vo = '>sim_vo\n"%s" %s' % (i, vo) sim_d = '>sim_d\n"%s" %s' % (i, d) str_classes = sep.join(set_classes) bow_vn = '>bow_vn\n"%s" "%s"' % (i, self.clean_str(str_classes)) if srl_score > lex_score: st = 1 else: st = 0 strong_context = '>strong_con\n"%s" %s' % (i, st) verb_id = '>verb\n"%s" %s' % (i, id) #if str_classes != "": self.v_predicates.append(bow_vn) self.v_predicates.append(strong_context) self.v_predicates.append(sim_vn) self.v_predicates.append(sim_vo) self.v_predicates.append(sim_d) #self.v_predicates.append(token_verb) self.v_predicates.append(verb_id) return self.v_predicates
def verb_proc(self, id, point, sep): self.v_predicates = [] if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): lex_score = verb['lex'] srl_score = verb['srl'] combo_score = verb['combo'] (vt, vh) = verb['tokens'] #TODO VN isec, expand VO vt_tool = VerbTools(vt) i = '%s%s%s'%(self.clean_str(vt), sep, self.clean_str(vh)) vh_tool = VerbTools(vh) vt_classes = vt_tool.vn_classes() vh_classes = vh_tool.vn_classes() vt_classes.extend(vh_classes) set_classes = set(vt_classes) vt_vh_rel = vt_tool.verb_relations(vh) verb_sim = VerbMetrics(text_v=vt, hypo_v=vh) vn = verb_sim.vn_isec() d = verb_sim.direct() vo = verb_sim.vo() #token_verb = 'TokenVerb(%s, "%s%s%s")'%(i, self.clean_str(vt), sep, self.clean_str(vh)) sim_vn = '>sim_vn\n"%s" %s'%(i, vn) sim_vo = '>sim_vo\n"%s" %s'%(i, vo) sim_d = '>sim_d\n"%s" %s'%(i, d) str_classes = sep.join(set_classes) bow_vn = '>bow_vn\n"%s" "%s"'%(i, self.clean_str(str_classes)) if srl_score > lex_score: st = 1 else: st = 0 strong_context = '>strong_con\n"%s" %s'%(i, st) verb_id = '>verb\n"%s" %s'%(i, id) #if str_classes != "": self.v_predicates.append(bow_vn) self.v_predicates.append(strong_context) self.v_predicates.append(sim_vn) self.v_predicates.append(sim_vo) self.v_predicates.append(sim_d) #self.v_predicates.append(token_verb) self.v_predicates.append(verb_id) return self.v_predicates
def verb_proc(self, id, point, sep): self.v_predicates = [] if 'verbs' in point: verbs = point['verbs'] for i, verb in verbs.iteritems(): lex_score = verb['lex'] srl_score = verb['srl'] combo_score = verb['combo'] (vt, vh) = verb['tokens'] vt_tool = VerbTools(vt) i = '%s.%s'%(id, i) vh_tool = VerbTools(vh) vt_classes = vt_tool.vn_classes() vh_classes = vh_tool.vn_classes() vt_classes.extend(vh_classes) set_classes = set(vt_classes) vt_vh_rel = vt_tool.verb_relations(vh) verb_sim = VerbMetrics(text_v=vt, hypo_v=vh) vn = verb_sim.vn_isec() d = verb_sim.direct() vo = verb_sim.vo() token_verb = 'TokenVerb(%s, %s, "%s%s%s")'%(i, id, self.clean_str(vt), sep, self.clean_str(vh)) sim_vn = 'SimVN(%s, %s, %s)'%(i, id, vn) sim_vo = 'SimVO(%s, %s, %s)'%(i, id, vo) sim_d = 'SimD(%s, %s, %s)'%(i, id, d) str_classes = sep.join(set_classes) bow_vn = 'BowVN(%s, %s, "%s")'%(i, id, self.clean_str(str_classes)) if srl_score > lex_score: st = 1 else: st = 0 strong_context = 'StrongCon(%s, %s, %s)'%(i, id, st) self.v_predicates.append(bow_vn) self.v_predicates.append(strong_context) self.v_predicates.append(sim_vn) self.v_predicates.append(sim_vo) self.v_predicates.append(sim_d) self.v_predicates.append(token_verb) return self.v_predicates
def __init__(self, frames_text = {}, tokens_text = [], frames_hypo = {}, tokens_hypo = [], sim_type = 'Lin', verbose = 1): self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.tine_score = 0 self.verb_score = 0 self.arg_score = 0 self.sim_type = sim_type self.verbose = verbose #self.pos_text = pos_text #self.pos_hypo = pos_hypo return
def main(args): pickle_file = args[0] print 'loading file:',pickle_file with open(pickle_file, 'r') as pf: pairs = pickle.load(pf) k = 0 for pair in pairs: print 'id:', pair.get_id() print 's1:', pair.get_text() print 's2:', pair.get_hypo() print 'features:', pair.get_features_text_type() print 'set-metrics, cos test' lemmas_text = pair.get_feature_text('lemmas') lemmas_hypo = pair.get_feature_hypo('lemmas') set_th = SetMetrics(lemmas_text, lemmas_hypo) cos = set_th.cosine() #print cos print 'SRL tools' frames_text = pair.get_feature_text('frames') print frames_text print '################' srl = SRLTools(lemmas_text, frames_text) word_to_frame = srl.get_words_frame() print word_to_frame print '################' print srl.get_verbs() print '################' #print 'verb-metrics, ' pos_text = pair.get_feature_text('pos') pos_hypo = pair.get_feature_hypo('pos') verbs = VerbMetrics() lin = Lin() vectors = VectorMetrics() hyper = WNTools() for i, pos_tuple_t in enumerate(pos_text): (token, pos_t) = pos_tuple_t if pos_t.startswith('V'): for j, pos_tuple_h in enumerate(pos_hypo): (token, pos_h) = pos_tuple_h if pos_h.startswith('V'): verbs.set_text_verb(lemmas_text[i]) verbs.set_hypo_verb(lemmas_hypo[j]) #print 'verbs test t:%s h:%s'%(lemmas_text[i], lemmas_hypo[j]) vn_isec = verbs.vn_isec() #print 'verb net isec: %d'%vn_isec #print 'lin(%s):'%lemmas_text[i], '\n', lin.n_similar_words(lemmas_text[i]) #print 'lin(%s):'%lemmas_hypo[j], '\n', lin.n_similar_words(lemmas_hypo[j]) t_sim = lin.n_similar_words(lemmas_text[i]) h_sim = lin.n_similar_words(lemmas_hypo[j]) t_score = [float(score) for word,score in t_sim] h_score = [float(score) for word,score in h_sim] vectors.set_vectors(t_score, h_score) #print 'cos_vect: ', vectors.cosine() elif pos_h.startswith('N'): #print 'wn test hypernyms' trees = hyper.get_mfs_hypernyms((lemmas_hypo[j], pos_h)) #print trees k += 1 if k >= 10: break pf.close return
class Edistance: def __init__(self, frames_text={}, tokens_text=[], chunks_text=[], frames_hypo={}, tokens_hypo=[], chunks_hypo=[], e_type='simple', verbose=1, entailment=-1): self.e_type = e_type self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.chunks_hypo = chunks_hypo self.chunks_text = chunks_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.edit_score = 0 self.verb_score = 0 self.chunk_score = 0 self.oper_type = {'del': 0, 'in': 0, 'sub': 0} self.verbose = verbose self.entailment = entailment return def get_edistance_micai(self, frames_text={}, tokens_text=[], chunks_text=[], frames_hypo={}, tokens_hypo=[], chunks_hypo=[], entailment=-1): if frames_text: self.frames_text = frames_text if frames_hypo: self.frames_hypo = frames_hypo if tokens_text: self.tokens_text = tokens_text if tokens_hypo: self.tokens_hypo = tokens_hypo if chunks_text: self.chunks_text = chunks_text if chunks_hypo: self.chunks_hypo = chunks_hypo if entailment: self.entailment = entailment self.srl_t.set_frames(self.frames_text) self.srl_h.set_frames(self.frames_hypo) self.srl_t.set_tokens(self.tokens_text) self.srl_h.set_tokens(self.tokens_hypo) self.args_text = self.srl_t.get_words_frame() self.args_hypo = self.srl_h.get_words_frame() sum_verb = 0 num_verbs_h = len(self.args_text.keys()) self.__p_stderr( '###V(%s)###\nT:%s\nH:%s\n' % (self.entailment, ' '.join(tokens_text), ' '.join(tokens_hypo))) self.edit_score = 0 for verb_t, args_t in self.args_text.items(): for verb_h, args_h in self.args_hypo.items(): sim_verbs = self.__simVerbs(verb_t, verb_h) if sim_verbs == 1: #sim verbs self.__p_stderr('\tverbs(%s, %s)\n' % (verb_t, verb_h)) if self.e_type == 'simple': self.edit_score += self.__simpleARG(args_t, args_h) self.edit_score = float(self.edit_score) / num_verbs_h self.__p_stderr('\ted_score(%s)\n' % (self.edit_score)) if self.edit_score == 0.0: self.edit_score = self.__back_off_order(self.chunks_text, self.chunks_hypo) #if edit_score == 0 go for chunk backoff return self.edit_score def __simpleARG(self, args_t=[], args_h=[]): score = 1.0 oper_sum = 0 tags_t = self.__extract_tags(args_t) tags_h = self.__extract_tags(args_h) self.oper_type = {'del': 0, 'in': 0, 'sub': 0} #look for sub for tag_t, tokens_t in tags_t.items(): for tag_h, tokens_h in tags_h.items(): if tag_t == tag_h: # same tag if ' '.join(tokens_t) != ' '.join(tokens_h): self.oper_type['sub'] += 1 #subtitution self.__p_stderr('\t\toper(sub): [%s] %s -> %s\n' % (tag_t, tokens_t, tokens_h)) oper_sum += 1 #look for insertion for tag_t, tokens_t in tags_t.items(): if not tag_t in tags_h: # insertion self.oper_type['in'] += 1 self.__p_stderr('\t\toper(in): [%s] %s\n' % (tag_t, tags_t[tag_t])) oper_sum += 1 #look for deletion for tag_h, tokens_h in tags_h.items(): if not tag_h in tags_t: # deletion self.oper_type['del'] += 1 self.__p_stderr('\t\toper(del): [%s] %s\n' % (tag_h, tags_h[tag_h])) oper_sum += 1 self.__p_stderr('num oper: %s\n' % self.oper_type) self.__p_stderr('sum oper: %s\n' % oper_sum) if oper_sum == 0: return 0 else: score = score / float(oper_sum) self.__p_stderr('simp_score: %s\n' % score) return score def __extract_tags(self, args): tags = {} for tag, tokens in args: tags[tag] = tokens return tags def __simVerbs(self, verb_t='', verb_h=''): if verb_t == verb_h: return 1 self.verb_net.set_text_verb(verb_t) self.verb_net.set_hypo_verb(verb_h) isec = self.verb_net.vn_isec() if isec == 0: vo = self.verb_net.vo() return vo else: return isec def __back_off_order(self, chunks_t=[], chunks_h=[]): score = 0 sum_ch = 0 self.__p_stderr('chunk back-off\n') chunks_t = chunks_t.pos() chunks_h = chunks_h.pos() for i, chunk_h in enumerate(chunks_h): try: (node_t, tag_t) = chunks_t[i] (node_h, tag_h) = chunk_h if tag_t == tag_h: (word_t, pos_t) = node_t (word_h, pos_h) = node_h if word_t == word_h: sum_ch += 1.0 self.__p_stderr('\t[%s]: %s\n' % (tag_t, word_t)) else: sum_ch += 0.5 self.__p_stderr('\t[%s|%s]: %s %s \n' % (tag_t, sum_ch, word_t, word_h)) except: sum_ch += 0.0 score = float(sum_ch) / len(chunks_h) return score def __p_stderr(self, text=''): if self.verbose == 1: sys.stderr.write(text) return
class TineVN: def __init__(self, frames_text = {}, tokens_text = [], frames_hypo = {}, tokens_hypo = [], sim_type = 'Lin', verbose = 1): self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.tine_score = 0 self.verb_score = 0 self.arg_score = 0 self.sim_type = sim_type self.verbose = verbose #self.pos_text = pos_text #self.pos_hypo = pos_hypo return def get_tine_score(self, frames_text = {}, tokens_text = [], frames_hypo = {}, tokens_hypo = []): if frames_text: self.frames_text = frames_text if frames_hypo: self.frames_hypo = frames_hypo if tokens_text: self.tokens_text = tokens_text if tokens_hypo: self.tokens_hypo = tokens_hypo self.srl_t.set_frames(self.frames_text) self.srl_h.set_frames(self.frames_hypo) self.srl_t.set_tokens(self.tokens_text) self.srl_h.set_tokens(self.tokens_hypo) self.args_text = self.srl_t.get_words_frame() self.args_hypo = self.srl_h.get_words_frame() sum_verb = 0 num_verbs_h = len(self.args_text.keys()) self.__p_stderr('TINE VerbNet\n') self.__p_stderr('T: %s \n H: %s\n'%(self.args_text, self.args_hypo)) self.__p_stderr('T: %s \n H: %s\n'%(self.args_text.keys(), self.args_hypo.keys())) for verb_t, args_t in self.args_text.items(): for verb_h, args_h in self.args_hypo.items(): sim_verbs = self.__simVerbs(verb_t, verb_h) if sim_verbs == 1: self.__p_stderr('verbs(%s, %s)\n'%(verb_t, verb_h)) args_score = self.__simArgs(args_t, args_h) sum_verb += args_score self.tine_score = float(sum_verb) / num_verbs_h self.__p_stderr('score:%s\n'%(self.tine_score)) return self.tine_score def __simVerbs(self, verb_t = '', verb_h = ''): if verb_t == verb_h: return 1 self.verb_net.set_text_verb(verb_t) self.verb_net.set_hypo_verb(verb_h) isec = self.verb_net.vn_isec() if isec == 0: vo = self.verb_net.vo() return vo else: return isec return isec def __simArgs(self, args_t = [], args_h = []): sum_args = 0 num_args_h = len(args_h) for tag_t, tokens_t in args_t: for tag_h, tokens_h in args_h: if tag_t == tag_h: expand_t = [] expand_h = [] if self.sim_type == 'Lin': expand_t = self.lin.expand_bow(tokens_t) expand_h = self.lin.expand_bow(tokens_h) elif self.sim_type == 'WN': expand_t = self.wn.expand_bow_tree(tokens_t) expand_h = self.wn.expand_bow_tree(tokens_h) self.arg_sim.set_text(expand_t) self.arg_sim.set_hypo(expand_h) self.arg_score = self.arg_sim.cosine() self.__p_stderr('\t[%s|%s] %s %s\n'%(tag_t, self.arg_score, expand_t, expand_h)) sum_args += self.arg_score if num_args_h == 0: return 0 else: self.verb_score = float(sum_args) / num_args_h return self.verb_score def __p_stderr(self, text = ''): if self.verbose == 1: sys.stderr.write(text) return def get_verb_score(self): return self.verb_score def get_arg_score(self): return self.arg_score
class Edistance: def __init__(self, frames_text = {}, tokens_text = [], chunks_text = [], frames_hypo = {}, tokens_hypo = [], chunks_hypo = [], e_type = 'simple', verbose = 1, entailment = -1): self.e_type = e_type self.srl_t = SRLTools() self.srl_h = SRLTools() self.verb_net = VerbMetrics() self.arg_sim = SetMetrics() self.lin = Lin() self.wn = WNTools() self.frames_hypo = frames_text self.frames_text = frames_hypo self.tokens_hypo = tokens_hypo self.tokens_text = tokens_text self.chunks_hypo = chunks_hypo self.chunks_text = chunks_text self.args_text = {} self.args_hypo = {} self.verbs_text = [] self.verbs_hypo = [] self.edit_score = 0 self.verb_score = 0 self.chunk_score = 0 self.oper_type = {'del':0, 'in':0, 'sub':0} self.verbose = verbose self.entailment = entailment return def get_edistance_micai(self, frames_text = {}, tokens_text = [], chunks_text = [], frames_hypo = {}, tokens_hypo = [], chunks_hypo = [], entailment = -1): if frames_text: self.frames_text = frames_text if frames_hypo: self.frames_hypo = frames_hypo if tokens_text: self.tokens_text = tokens_text if tokens_hypo: self.tokens_hypo = tokens_hypo if chunks_text: self.chunks_text = chunks_text if chunks_hypo: self.chunks_hypo = chunks_hypo if entailment: self.entailment = entailment self.srl_t.set_frames(self.frames_text) self.srl_h.set_frames(self.frames_hypo) self.srl_t.set_tokens(self.tokens_text) self.srl_h.set_tokens(self.tokens_hypo) self.args_text = self.srl_t.get_words_frame() self.args_hypo = self.srl_h.get_words_frame() sum_verb = 0 num_verbs_h = len(self.args_text.keys()) self.__p_stderr('###V(%s)###\nT:%s\nH:%s\n'%(self.entailment, ' '.join(tokens_text), ' '.join(tokens_hypo))) self.edit_score = 0 for verb_t, args_t in self.args_text.items(): for verb_h, args_h in self.args_hypo.items(): sim_verbs = self.__simVerbs(verb_t, verb_h) if sim_verbs == 1: #sim verbs self.__p_stderr('\tverbs(%s, %s)\n'%(verb_t, verb_h)) if self.e_type == 'simple': self.edit_score += self.__simpleARG(args_t, args_h) self.edit_score = float(self.edit_score) / num_verbs_h self.__p_stderr('\ted_score(%s)\n'%(self.edit_score)) if self.edit_score == 0.0: self.edit_score = self.__back_off_order(self.chunks_text, self.chunks_hypo) #if edit_score == 0 go for chunk backoff return self.edit_score def __simpleARG(self, args_t = [], args_h = []): score = 1.0 oper_sum = 0 tags_t = self.__extract_tags(args_t) tags_h = self.__extract_tags(args_h) self.oper_type = {'del':0, 'in':0, 'sub':0} #look for sub for tag_t, tokens_t in tags_t.items(): for tag_h, tokens_h in tags_h.items(): if tag_t == tag_h: # same tag if ' '.join(tokens_t) != ' '.join(tokens_h): self.oper_type['sub'] += 1 #subtitution self.__p_stderr('\t\toper(sub): [%s] %s -> %s\n'%(tag_t, tokens_t, tokens_h)) oper_sum += 1 #look for insertion for tag_t, tokens_t in tags_t.items(): if not tag_t in tags_h: # insertion self.oper_type['in'] += 1 self.__p_stderr('\t\toper(in): [%s] %s\n'%(tag_t, tags_t[tag_t])) oper_sum += 1 #look for deletion for tag_h, tokens_h in tags_h.items(): if not tag_h in tags_t: # deletion self.oper_type['del'] += 1 self.__p_stderr('\t\toper(del): [%s] %s\n'%(tag_h, tags_h[tag_h])) oper_sum += 1 self.__p_stderr('num oper: %s\n'%self.oper_type) self.__p_stderr('sum oper: %s\n'%oper_sum) if oper_sum == 0: return 0 else: score = score / float(oper_sum) self.__p_stderr('simp_score: %s\n'%score) return score def __extract_tags(self, args): tags = {} for tag, tokens in args: tags[tag] = tokens return tags def __simVerbs(self, verb_t = '', verb_h = ''): if verb_t == verb_h: return 1 self.verb_net.set_text_verb(verb_t) self.verb_net.set_hypo_verb(verb_h) isec = self.verb_net.vn_isec() if isec == 0: vo = self.verb_net.vo() return vo else: return isec def __back_off_order(self, chunks_t = [], chunks_h = []): score = 0 sum_ch = 0 self.__p_stderr('chunk back-off\n') chunks_t = chunks_t.pos() chunks_h = chunks_h.pos() for i, chunk_h in enumerate(chunks_h): try: (node_t, tag_t) = chunks_t[i] (node_h, tag_h) = chunk_h if tag_t == tag_h: (word_t, pos_t) = node_t (word_h, pos_h) = node_h if word_t == word_h: sum_ch += 1.0 self.__p_stderr('\t[%s]: %s\n'%(tag_t, word_t)) else: sum_ch += 0.5 self.__p_stderr('\t[%s|%s]: %s %s \n'%(tag_t, sum_ch, word_t, word_h)) except: sum_ch += 0.0 score = float(sum_ch) / len(chunks_h) return score def __p_stderr(self, text = ''): if self.verbose == 1: sys.stderr.write(text) return