def __load_similar_sents(self): filepath = PathUtil().similiar_sentences_filepath line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath) _sentences_map = line_o2m_dict # for k,v in _sentences_map.items(): # print(k,v) return _sentences_map
def _preload(self): # 同义词和标准词的对应 line_o2m_dict, line_o2o_dict = readfile_line2dict( self._similiar_filepath) self._entity_o2o_dict = self._entity_class.entity_o2o_dict entity_o2o_dict_tmp = {**line_o2o_dict, **self._entity_o2o_dict} self._o2o_similar_dict = dict2sorted_dict(entity_o2o_dict_tmp) self.re_o2o_similar_keys = re.compile( '(' + '|'.join(self._o2o_similar_dict.keys()) + ')') #一对多的对应 o2m_similar_dict = collections.OrderedDict() entity_o2m_dict = self._entity_class.entity_o2m_order_dict o2m_tmp_dict = [entity_o2m_dict, line_o2m_dict] for o2m_dict_iter in o2m_tmp_dict: for o_iter, m_iter in o2m_dict_iter.items(): if o_iter not in o2m_similar_dict: o2m_similar_dict[o_iter] = set(m_iter) else: o2m_similar_dict[o_iter].update(m_iter) re_o2m_similiar_dict = dict() for o_iter, m_iter in o2m_similar_dict.items(): m_iter_sorted = sorted(m_iter, key=lambda x: len(x), reverse=True) o2m_similar_dict[o_iter] = m_iter_sorted re_o2m_similiar_dict[o_iter] = list2re(m_iter_sorted) self._o2m_similar_dict = o2m_similar_dict self._re_o2m_similar_dict = re_o2m_similiar_dict
def get_o2o_map_word_and_sentence(self): line_o2m_dict, line_o2o_dict = readfile_line2dict( PathUtil().similiar_sentences_filepath) # 句子同义置换和同义词同义置换同时放在一块进行处理 word_o2o_dict = self._o2o_similar_dict o2o_dict = {**word_o2o_dict, **line_o2o_dict} o2o_dict_sorted = dict2sorted_dict(o2o_dict) return o2o_dict_sorted
def __load_similar_sents(self): # 从文件获取所有相似词的dict std2similar_words_dict = collections.defaultdict(list) for filepath_iter in self._entity_filepath_set: line_o2m_dict, line_o2o_dict_order = readfile_line2dict( filepath_iter) for key, value in line_o2m_dict.items(): std2similar_words_dict[key].extend(value)
def _load_abbreviation2std_map(self): results = collections.defaultdict(dict) for label_iter in self._abbreviations_entity: filepath = self._filepath_general.format(label_iter) line_o2m_dict, line_o2o_dict_order = readfile_line2dict(filepath) for k, v in line_o2m_dict.items(): # print('line_o2m_dict[k]==>',line_o2m_dict[k]) line_o2m_dict[k].remove(k) line_o2m_dict_sorted = dict2sorted_dict(line_o2m_dict) results[label_iter] = line_o2m_dict_sorted return results
def _preload_reback(self): #替换了替换错了,回退城一个标准的实体词 line_o2m_dict, line_o2o_dict_order = readfile_line2dict( self._reback_filepath) return line_o2o_dict_order
def _load_represent_chars_map(self): line_o2m_dict, line_o2o_dict_order = readfile_line2dict( PathUtil().char_represent_map_file) self._represent_char_o2o_dict = line_o2o_dict_order
def __read_represent_words(self): """获取字母表示的一类词的映射""" filepath = PathUtil().get_represent_words_map line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath) # print(line_o2m_dict) return line_o2m_dict