Пример #1
0
 def __load_similar_sents(self):
     filepath = PathUtil().similiar_sentences_filepath
     line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath)
     _sentences_map = line_o2m_dict
     # for k,v in _sentences_map.items():
     #     print(k,v)
     return _sentences_map
Пример #2
0
 def _preload(self):
     # 同义词和标准词的对应
     line_o2m_dict, line_o2o_dict = readfile_line2dict(
         self._similiar_filepath)
     self._entity_o2o_dict = self._entity_class.entity_o2o_dict
     entity_o2o_dict_tmp = {**line_o2o_dict, **self._entity_o2o_dict}
     self._o2o_similar_dict = dict2sorted_dict(entity_o2o_dict_tmp)
     self.re_o2o_similar_keys = re.compile(
         '(' + '|'.join(self._o2o_similar_dict.keys()) + ')')
     #一对多的对应
     o2m_similar_dict = collections.OrderedDict()
     entity_o2m_dict = self._entity_class.entity_o2m_order_dict
     o2m_tmp_dict = [entity_o2m_dict, line_o2m_dict]
     for o2m_dict_iter in o2m_tmp_dict:
         for o_iter, m_iter in o2m_dict_iter.items():
             if o_iter not in o2m_similar_dict:
                 o2m_similar_dict[o_iter] = set(m_iter)
             else:
                 o2m_similar_dict[o_iter].update(m_iter)
     re_o2m_similiar_dict = dict()
     for o_iter, m_iter in o2m_similar_dict.items():
         m_iter_sorted = sorted(m_iter, key=lambda x: len(x), reverse=True)
         o2m_similar_dict[o_iter] = m_iter_sorted
         re_o2m_similiar_dict[o_iter] = list2re(m_iter_sorted)
     self._o2m_similar_dict = o2m_similar_dict
     self._re_o2m_similar_dict = re_o2m_similiar_dict
Пример #3
0
 def get_o2o_map_word_and_sentence(self):
     line_o2m_dict, line_o2o_dict = readfile_line2dict(
         PathUtil().similiar_sentences_filepath)
     # 句子同义置换和同义词同义置换同时放在一块进行处理
     word_o2o_dict = self._o2o_similar_dict
     o2o_dict = {**word_o2o_dict, **line_o2o_dict}
     o2o_dict_sorted = dict2sorted_dict(o2o_dict)
     return o2o_dict_sorted
Пример #4
0
 def __load_similar_sents(self):
     # 从文件获取所有相似词的dict
     std2similar_words_dict = collections.defaultdict(list)
     for filepath_iter in self._entity_filepath_set:
         line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
             filepath_iter)
         for key, value in line_o2m_dict.items():
             std2similar_words_dict[key].extend(value)
Пример #5
0
 def _load_abbreviation2std_map(self):
     results = collections.defaultdict(dict)
     for label_iter in self._abbreviations_entity:
         filepath = self._filepath_general.format(label_iter)
         line_o2m_dict, line_o2o_dict_order = readfile_line2dict(filepath)
         for k, v in line_o2m_dict.items():
             # print('line_o2m_dict[k]==>',line_o2m_dict[k])
             line_o2m_dict[k].remove(k)
             line_o2m_dict_sorted = dict2sorted_dict(line_o2m_dict)
         results[label_iter] = line_o2m_dict_sorted
     return results
Пример #6
0
 def _preload_reback(self):
     #替换了替换错了,回退城一个标准的实体词
     line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
         self._reback_filepath)
     return line_o2o_dict_order
Пример #7
0
 def _load_represent_chars_map(self):
     line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
         PathUtil().char_represent_map_file)
     self._represent_char_o2o_dict = line_o2o_dict_order
 def __read_represent_words(self):
     """获取字母表示的一类词的映射"""
     filepath = PathUtil().get_represent_words_map
     line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath)
     # print(line_o2m_dict)
     return line_o2m_dict