Exemplo n.º 1
0
 def _expand_similar_sentences(self, o2o_dict_sorted_in):
     # similar_line_o2o_dict = self._similar_line_o2o_dict
     similar_line_o2o_dict = o2o_dict_sorted_in
     o2m_similar_dict = self._similiar_class.o2m_similar_dict
     similar_line_o2o_expanded_dict = dict()
     for similar_str_iter, std_str_iter in similar_line_o2o_dict.items():
         for std_word, re_similar_words in self._similiar_class.re_o2m_similar_dict.items(
         ):
             matched_part = re_similar_words.search(similar_str_iter)
             if matched_part:
                 matched_str = matched_part.group()
                 for word_iter in o2m_similar_dict[std_word]:
                     new_str_iter = similar_str_iter.replace(
                         matched_str, word_iter)
                     similar_line_o2o_expanded_dict[
                         new_str_iter] = std_str_iter
     # print('before  expand', len(o2o_dict_sorted_in))
     _similar_line_o2o_dict = {
         **o2o_dict_sorted_in,
         **similar_line_o2o_expanded_dict
     }
     _similar_line_o2o_dict_sorted = dict2sorted_dict(
         _similar_line_o2o_dict)
     # print('after   expand', len(_similar_line_o2o_dict_sorted))
     return _similar_line_o2o_dict_sorted
Exemplo n.º 2
0
 def _preload(self):
     # 同义词和标准词的对应
     line_o2m_dict, line_o2o_dict = readfile_line2dict(
         self._similiar_filepath)
     self._entity_o2o_dict = self._entity_class.entity_o2o_dict
     entity_o2o_dict_tmp = {**line_o2o_dict, **self._entity_o2o_dict}
     self._o2o_similar_dict = dict2sorted_dict(entity_o2o_dict_tmp)
     self.re_o2o_similar_keys = re.compile(
         '(' + '|'.join(self._o2o_similar_dict.keys()) + ')')
     #一对多的对应
     o2m_similar_dict = collections.OrderedDict()
     entity_o2m_dict = self._entity_class.entity_o2m_order_dict
     o2m_tmp_dict = [entity_o2m_dict, line_o2m_dict]
     for o2m_dict_iter in o2m_tmp_dict:
         for o_iter, m_iter in o2m_dict_iter.items():
             if o_iter not in o2m_similar_dict:
                 o2m_similar_dict[o_iter] = set(m_iter)
             else:
                 o2m_similar_dict[o_iter].update(m_iter)
     re_o2m_similiar_dict = dict()
     for o_iter, m_iter in o2m_similar_dict.items():
         m_iter_sorted = sorted(m_iter, key=lambda x: len(x), reverse=True)
         o2m_similar_dict[o_iter] = m_iter_sorted
         re_o2m_similiar_dict[o_iter] = list2re(m_iter_sorted)
     self._o2m_similar_dict = o2m_similar_dict
     self._re_o2m_similar_dict = re_o2m_similiar_dict
Exemplo n.º 3
0
 def get_o2o_map_word_and_sentence(self):
     line_o2m_dict, line_o2o_dict = readfile_line2dict(
         PathUtil().similiar_sentences_filepath)
     # 句子同义置换和同义词同义置换同时放在一块进行处理
     word_o2o_dict = self._o2o_similar_dict
     o2o_dict = {**word_o2o_dict, **line_o2o_dict}
     o2o_dict_sorted = dict2sorted_dict(o2o_dict)
     return o2o_dict_sorted
Exemplo n.º 4
0
 def _preload(self):
     intent2sim2std_o2o_dict = collections.defaultdict(dict)
     for field_iter in self._field_o2o_needed:
         dict_iter = self._get_o2o_by_intent(field_iter)
         intent2sim2std_o2o_dict[field_iter].update(dict_iter)
     for field_iter, o2o_iter in intent2sim2std_o2o_dict.items():
         intent2sim2std_o2o_dict[field_iter] = dict2sorted_dict(o2o_iter)
     self._intent2sim2std_o2o_dict = intent2sim2std_o2o_dict
Exemplo n.º 5
0
 def get_intent2entities(self):
     # entity_o2o_dict, entity_label2words_dict, entity_word2label_dict_sorted,entity_o2m_order_dict,label2std2sim_dict=self._get_entities()
     # return entity_label2words_dict
     intent2entities_dict = collections.defaultdict(list)
     for domain_iter, items in self._domain2intent2words_dict.items():
         for intent_iter, v2 in items.items():
             intent2entities_dict[intent_iter].extend(v2)
     intent2entities_dict_sorted = dict2sorted_dict(intent2entities_dict)
     for k, v in intent2entities_dict_sorted.items():
         intent2entities_dict_sorted[k] = list2sorted_list(v)
     return intent2entities_dict_sorted
Exemplo n.º 6
0
 def _load_abbreviation2std_map(self):
     results = collections.defaultdict(dict)
     for label_iter in self._abbreviations_entity:
         filepath = self._filepath_general.format(label_iter)
         line_o2m_dict, line_o2o_dict_order = readfile_line2dict(filepath)
         for k, v in line_o2m_dict.items():
             # print('line_o2m_dict[k]==>',line_o2m_dict[k])
             line_o2m_dict[k].remove(k)
             line_o2m_dict_sorted = dict2sorted_dict(line_o2m_dict)
         results[label_iter] = line_o2m_dict_sorted
     return results
Exemplo n.º 7
0
    def _get_entities(self, domain2entity2paths_set_in):
        domain2intent2words_dict, domain2word2intent_dict, domain2entity_o2o_dict, domain2entity_o2m_dict, domain2intent2std2sim_dict = {}, {}, {}, {}, {},
        intent2sim2std_o2o_dict = collections.defaultdict(dict)
        assert isinstance(domain2entity2paths_set_in, dict)
        for domain_iter, entity2paths_set_iter in domain2entity2paths_set_in.items(
        ):
            classification2words_dict, word2classification_dict, entity_o2o_dict, entity_o2m_dict, classification2std2sim_dict, classification2sim2std_o2o_dict = \
                read_data_from_paths_set(entity2paths_set_iter)

            word2classification_dict_sorted = dict2sorted_dict(
                word2classification_dict)
            entity_o2m_dict_sorted = dict2sorted_dict(entity_o2m_dict)
            entity_o2o_dict_sorted = dict2sorted_dict(entity_o2o_dict)

            domain2intent2words_dict[domain_iter] = classification2words_dict
            domain2word2intent_dict[
                domain_iter] = word2classification_dict_sorted

            domain2entity_o2o_dict[domain_iter] = entity_o2o_dict_sorted
            domain2entity_o2m_dict[domain_iter] = entity_o2m_dict_sorted
            domain2intent2std2sim_dict[
                domain_iter] = classification2std2sim_dict

            for intent_iter, sim2std_dict_iter in classification2sim2std_o2o_dict.items(
            ):
                intent2sim2std_o2o_dict[intent_iter].update(sim2std_dict_iter)

        alldomain_intent2words_dict = collections.defaultdict(list)
        for domain_iter, dict_iter in domain2intent2words_dict.items():
            for intent_iter, words_iter in dict_iter.items():
                alldomain_intent2words_dict[intent_iter].extend(words_iter)
        domain2intent2words_dict[
            dcname.alldomain.value] = alldomain_intent2words_dict

        alldomain_word2intent_dict = collections.defaultdict(list)
        for domain_iter, dict_iter in domain2word2intent_dict.items():
            for word_iter, intent_iter in dict_iter.items():
                alldomain_word2intent_dict[word_iter] = intent_iter
        domain2word2intent_dict[
            dcname.alldomain.value] = alldomain_word2intent_dict
        return domain2intent2words_dict, domain2word2intent_dict, domain2entity_o2o_dict, domain2entity_o2m_dict, domain2intent2std2sim_dict, intent2sim2std_o2o_dict
Exemplo n.º 8
0
 def entity_o2m_order_dict(self):
     vs = {}
     for k, v in self._domain2entity_o2m_dict.items():
         vs.update(v)
     vs_sorted = dict2sorted_dict(vs)
     return vs_sorted