Пример #1
0
class RecallModel(SimilarModelBase):
    _entity_filepath_set = [
        PathUtil().domain_filepath,
        PathUtil().intent_filepath,
        PathUtil().property_filepath,
        PathUtil().company_domain_filepath,
        PathUtil().company_property_filepath
    ]

    def __init__(self):
        super(RecallModel, self).__init__()

    def __load_similar_sents(self):
        # 从文件获取所有相似词的dict
        std2similar_words_dict = collections.defaultdict(list)
        for filepath_iter in self._entity_filepath_set:
            line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
                filepath_iter)
            for key, value in line_o2m_dict.items():
                std2similar_words_dict[key].extend(value)

    def _get_words2term(self):
        # 获取每个细分字词单元跟term的映射,同时获取到字词在该term的权重
        pass

    def _get_weight_word_in_term(self):
        # 获取每个细分字词在此term中的tfidf
        pass

    def recall(self, str_in):
        # 根据输入句子,分词,召回相关的term
        pass
Пример #2
0
 def __load_similar_sents(self):
     filepath = PathUtil().similiar_sentences_filepath
     line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath)
     _sentences_map = line_o2m_dict
     # for k,v in _sentences_map.items():
     #     print(k,v)
     return _sentences_map
class EntitySet(object):
    _entity_class = Entity()
    _words_map_filepaths = {'pronoun': [PathUtil().prounoun_filepath]}

    def __init__(self):
        pass

    def get_all_represent_words(self):
        label2entities_part1 = self._entity_class.get_intent2entities
        label2entities_part2 = self.__read_represent_words()
        label2entities_part3 = self.__read_words_map_filepaths()
        label2entities = {
            **label2entities_part1,
            **label2entities_part2,
            **label2entities_part3
        }
        return label2entities

    def __read_words_map_filepaths(self):
        """获取代词映射"""
        #classification2words_dict,word2classification_dict,entity_o2o_dict,entity_o2m_dict,classification2std2sim_dict,classification2sim2std_o2o_dict
        entity_label2words_dict, entity_word2label_dict, entity_o2o_dict, _, _, _ = read_data_from_paths_set(
            self._words_map_filepaths)
        return entity_label2words_dict

    def __read_represent_words(self):
        """获取字母表示的一类词的映射"""
        filepath = PathUtil().get_represent_words_map
        line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath)
        # print(line_o2m_dict)
        return line_o2m_dict
Пример #4
0
def jieba_diy():
    diywords_filepath = PathUtil().diy_words_filepath
    jieba.load_userdict(diywords_filepath)
    # print(psg.lcut('什么是职业分类表'))
    for word_iter in jieba_add_words():
        jieba.add_word(word_iter)
    jieba.suggest_freq(['那', '不买'], tune=True)
Пример #5
0
class PronounBase(object):
    _path_class = PathUtil()
    entity_class = Entity()

    def __init__(self):
        self._preload()

    def _preload(self):
        pronouns = self._get_pronouns()
        compare_pronouns = self._get_compare_pronouns()
        self._re_prounouns = list2re(pronouns)
        self._re_compare_pronouns = list2re(compare_pronouns)

    def _get_pronouns(self):
        prouns = readfile_line2list(self._path_class.prounoun_filepath)
        return prouns

    def _get_compare_pronouns(self):
        pronouns = readfile_line2list(self._path_class.compare_pronoun_filepath)
        return pronouns

    def bool_pronouns_exist(self, str_in):
        prounous = self._re_prounouns.findall(str_in)
        return bool(prounous), prounous

    def bool_compare_pronoun_exist(self, str_in):
        prounoun = self._re_compare_pronouns.findall(str_in)
        return bool(prounoun), prounoun

    def get_pronoun_word(self, word_list_in):
        """获取含有指代词的具体词"""
        word_proun = [word_iter for word_iter in word_list_in if self._re_prounouns.search(word_iter)]
        word_proun_filter_entity = [word_iter for word_iter in word_proun if
                                    not self.entity_class.re_entity_list.search(word_iter)]
        return word_proun_filter_entity
Пример #6
0
 def get_o2o_map_word_and_sentence(self):
     line_o2m_dict, line_o2o_dict = readfile_line2dict(
         PathUtil().similiar_sentences_filepath)
     # 句子同义置换和同义词同义置换同时放在一块进行处理
     word_o2o_dict = self._o2o_similar_dict
     o2o_dict = {**word_o2o_dict, **line_o2o_dict}
     o2o_dict_sorted = dict2sorted_dict(o2o_dict)
     return o2o_dict_sorted
def fill_words2patterns():
    """将 words 填充 到 句式 pattern 中"""
    label2words_str = _trans_label2words_format()
    ymlpath = PathUtil().get_pattern_with_represent_yamlpath
    key2patterns_str_dict = read_yaml_dict_onelayer(ymlpath)
    key2patterns_dict = collections.defaultdict(list)
    for key, patterns_str in key2patterns_str_dict.items():
        for pattern_str in patterns_str:
            pattern_str_cur = pattern_str
            for label, words_str_iter in label2words_str.items():
                if label in pattern_str_cur:
                    pattern_str_cur = pattern_str_cur.replace(
                        label, words_str_iter)
            pattern_cur = re.compile(pattern_str_cur)
            key2patterns_dict[key].append(pattern_cur)
    return key2patterns_dict
Пример #8
0
class InferAbbrev2StdEntity(object):
    _abbreviations_entity = [cname.domain.value]
    _filename_general = 'abbreviations2entity_{}.txt'
    _filepath_general = os.path.join(PathUtil().files_dirpath, _filename_general)
    _entity_class = Entity()

    def __init__(self):
        self._load_words()

    def _load_words(self):
        self._label2abbreviation2std_map = self._load_abbreviation2std_map()

    def _load_abbreviation2std_map(self):
        results = collections.defaultdict(dict)
        for label_iter in self._abbreviations_entity:
            filepath = self._filepath_general.format(label_iter)
            line_o2m_dict, line_o2o_dict_order = readfile_line2dict(filepath)
            for k, v in line_o2m_dict.items():
                # print('line_o2m_dict[k]==>',line_o2m_dict[k])
                line_o2m_dict[k].remove(k)
                line_o2m_dict_sorted = dict2sorted_dict(line_o2m_dict)
            results[label_iter] = line_o2m_dict_sorted
        return results

    def get_interaction_strs_map(self, str_in):
        abbrev2stds_list = list()
        abbrev_str = ''
        for label_iter in self._abbreviations_entity:
            for abbrev_iter, stds_iter in self._label2abbreviation2std_map[label_iter].items():
                if abbrev_iter in str_in:
                    abbrev_str = abbrev_iter
                    for std_iter in stds_iter:
                        str_replaced = str_in.replace(abbrev_iter, std_iter)
                        # abbrev2stds_list.append({std_iter: str_replaced})
                        #todo 后期再改回来,前端显示
                        abbrev2stds_list.append({_tmp_for_show_str.get(std_iter, std_iter): str_replaced})
                        # results.append(str_replaced)
                        # results_words.append(jieba.lcut(str_replaced))
                    return abbrev_str, abbrev2stds_list
        return abbrev_str, []

    def bool_abbrevation(self, str_in):
        abbrev_str, abbrev2stds_list = self.get_interaction_strs_map(str_in)
        if abbrev2stds_list:
            return True
        else:
            return False
Пример #9
0
def jieba_add_words():
    reset = ReSet()
    pu = PathUtil()
    filepaths = [
        pu.domain_filepath, pu.intent_filepath, pu.property_filepath,
        pu.company_domain_filepath, pu.company_property_filepath,
        pu.prounoun_filepath
    ]
    words_ret = list()
    for filepath in filepaths:
        with open(filepath, 'r', encoding='utf-8') as fr:
            lines = [line.strip('\r\n ') for line in fr]
            for line in lines:
                words_iter = reset.split_pattern[0].split(line)
                words_iter = [
                    word_iter for word_iter in words_iter if len(word_iter) > 1
                ]
                words_ret.extend(words_iter)
    return words_ret
Пример #10
0
 def __init__(self):
     logfile = PathUtil().logfilepath
     # log_path = os.path.dirname(os.getcwd()) + '/logs/'
     # if not os.path.exists(log_path):
     #     os.mkdir(log_path)
     rq = time.strftime('%Y%m%d', time.localtime(time.time()))
     logfilename = rq + '.log'
     # 第一步,创建一个logger
     logger = logging.getLogger()
     self.logger = logger
     logger.setLevel(logging.INFO)  # Log等级总开关
     # 第二步,创建一个handler,用于写入日志文件
     # log_name = os.path.join(log_path,logfilename )
     # logfile = log_name
     fh = logging.FileHandler(logfile, mode='w', encoding='utf-8')
     fh.setLevel(logging.INFO)  # 输出到file的log等级的开关
     # 第三步,定义handler的输出格式
     formatter = logging.Formatter(
         "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
     )
     fh.setFormatter(formatter)
     # 第四步,将logger添加到handler里面
     logger.addHandler(fh)
Пример #11
0
 def _load_represent_chars_map(self):
     line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
         PathUtil().char_represent_map_file)
     self._represent_char_o2o_dict = line_o2o_dict_order
Пример #12
0
class SimiliarReplace(object):
    _similiar_filepath = PathUtil().similiar_words_filepath
    _pathutil_class = PathUtil()
    _entity_class = Entity()
    _reback_filepath = PathUtil().reback_file

    def __init__(self):
        self._preload()
        self._reback_o2o_dict = self._preload_reback()

    def _preload_reback(self):
        #替换了替换错了,回退城一个标准的实体词
        line_o2m_dict, line_o2o_dict_order = readfile_line2dict(
            self._reback_filepath)
        return line_o2o_dict_order

    def _preload(self):
        # 同义词和标准词的对应
        line_o2m_dict, line_o2o_dict = readfile_line2dict(
            self._similiar_filepath)
        self._entity_o2o_dict = self._entity_class.entity_o2o_dict
        entity_o2o_dict_tmp = {**line_o2o_dict, **self._entity_o2o_dict}
        self._o2o_similar_dict = dict2sorted_dict(entity_o2o_dict_tmp)
        self.re_o2o_similar_keys = re.compile(
            '(' + '|'.join(self._o2o_similar_dict.keys()) + ')')
        #一对多的对应
        o2m_similar_dict = collections.OrderedDict()
        entity_o2m_dict = self._entity_class.entity_o2m_order_dict
        o2m_tmp_dict = [entity_o2m_dict, line_o2m_dict]
        for o2m_dict_iter in o2m_tmp_dict:
            for o_iter, m_iter in o2m_dict_iter.items():
                if o_iter not in o2m_similar_dict:
                    o2m_similar_dict[o_iter] = set(m_iter)
                else:
                    o2m_similar_dict[o_iter].update(m_iter)
        re_o2m_similiar_dict = dict()
        for o_iter, m_iter in o2m_similar_dict.items():
            m_iter_sorted = sorted(m_iter, key=lambda x: len(x), reverse=True)
            o2m_similar_dict[o_iter] = m_iter_sorted
            re_o2m_similiar_dict[o_iter] = list2re(m_iter_sorted)
        self._o2m_similar_dict = o2m_similar_dict
        self._re_o2m_similar_dict = re_o2m_similiar_dict
        # self._entity_class.get_label2entities()
    @property
    def get_o2o_map_word_and_sentence(self):
        line_o2m_dict, line_o2o_dict = readfile_line2dict(
            PathUtil().similiar_sentences_filepath)
        # 句子同义置换和同义词同义置换同时放在一块进行处理
        word_o2o_dict = self._o2o_similar_dict
        o2o_dict = {**word_o2o_dict, **line_o2o_dict}
        o2o_dict_sorted = dict2sorted_dict(o2o_dict)
        return o2o_dict_sorted

    @property
    def o2m_similar_dict(self):
        return self._o2m_similar_dict

    @property
    def re_o2m_similar_dict(self):
        return self._re_o2m_similar_dict

    def replace_str(self, str_in):  # 字符串判断
        str_tmp = str_in

        _fufilled_part_str_missied_copy = copy.deepcopy(
            _fufilled_part_str_missied)
        flag_change = 0
        for k, v in _fufilled_part_str_missied.items():
            if k in str_tmp and k + v not in str_tmp:
                flag_change = 1
                str_tmp = str_tmp.replace(k, k + v)

        tmp_key_store = ['A#$', 'B#$', 'C#$', 'D#$', 'E#$', 'F#$', 'G#$']
        tmp_key_dict = {}
        index_incres = 0
        tmp_kept_strs_part_dict = dict()
        for sent_similar_iter, sent_std_iter in self._o2o_similar_dict.items():

            if sent_similar_iter in str_tmp:
                for k in list(_fufilled_part_str_missied_copy.keys()):
                    v = _fufilled_part_str_missied_copy[k]
                    if k + v in sent_similar_iter:
                        _fufilled_part_str_missied_copy.pop(k)
                elems = []
                str_split = str_tmp.split(sent_similar_iter)
                char_represent = tmp_key_store[index_incres]
                tmp_key_dict[tmp_key_store[index_incres]] = sent_std_iter
                for elem in str_split[:-1]:
                    elem += char_represent
                    if self.re_o2o_similar_keys.search(str_split[-1]):
                        pass
                    elif sent_similar_iter[-2:] in _shared_strs_by_two_entity:
                        elem += sent_similar_iter[-2:]
                        tmp_kept_strs_part_dict[
                            char_represent +
                            sent_similar_iter[-2:]] = char_represent

                    else:
                        pass
                    elems.append(elem)
                index_incres += 1
                elems.append(str_split[-1])
                str_tmp = ''.join(elems)
            else:
                pass
        for key, value in tmp_kept_strs_part_dict.items():
            str_tmp = str_tmp.replace(key, value)
        for key, value in tmp_key_dict.items():
            str_tmp = str_tmp.replace(key, value)

        for k, v in _fufilled_part_str_missied_copy.items():
            if flag_change == 1:
                str_tmp = str_tmp.replace(v, '', 1)

        return str_tmp

    def replace_list(self, str_list_in):  # 分词结果比较
        str_list_tmp = str_list_in
        for raw_word_iter, standard_word_iter in self._o2o_similar_dict.items(
        ):
            for index, word_iter in enumerate(str_list_tmp):
                if word_iter == raw_word_iter:
                    str_list_tmp[index] = standard_word_iter
                elif index >= 1 and str_list_tmp[
                        index - 1][:-2] + word_iter == raw_word_iter:
                    str_list_tmp[index] = standard_word_iter
                else:
                    pass
        return str_list_tmp

    def bool_entity_words_exists_and_number(self, sentence_in):
        str_tmp = sentence_in
        counter_exist = 0
        for raw_word_iter, standard_word_iter in self._entity_o2o_dict.items():
            if raw_word_iter in str_tmp:
                str_tmp = str_tmp.replace(raw_word_iter, '')
                counter_exist += 1
        return bool(counter_exist), counter_exist

    def reback_replace(self, str_synonym_in, str_raw_in):
        str_tmp = str_synonym_in
        for word_old_iter, word_new_iter in self._reback_o2o_dict.items():
            if word_old_iter in str_synonym_in and word_old_iter in str_raw_in:
                pass
            elif word_old_iter in str_synonym_in and word_new_iter in str_raw_in:
                str_tmp = str_tmp.replace(word_old_iter, word_new_iter)
            else:
                pass
        return str_tmp
Пример #13
0
class Entity(object):
    #   实现根据领域加载词典,加载正则,词典合并。
    #
    _pathutil_class = PathUtil()
    _domain2entity2paths_set = domain2entity2paths_set

    def __init__(self):
        self._preload()

    def _preload(self):
        self._domain2intent2words_dict, self._domain2word2intent_dict, self._domain2entity_o2o_dict, self._domain2entity_o2m_dict, self._domain2intent2std2sim_dict, self._intent2sim2std_o2o_dict = \
            self._get_entities(self._domain2entity2paths_set)
        # self._domain2intent2words_dict[dcname.alldomain.value]=self.get_intent2entities

    @property
    def re_entity_list(self):
        keys = list()
        for domain_iter, value_iter in self._domain2entity_o2o_dict.items():
            keys.extend(value_iter)
        keys_sorted = sorted(keys, key=lambda x: len(x), reverse=True)
        return list2re(keys_sorted)

    @property
    def entity_o2o_dict(self):
        vs = {}
        for k, v in self._domain2entity_o2o_dict.items():
            vs.update(v)
        vs_sorted = dict2sorted_dict(vs)
        return vs_sorted

    @property
    def entity_o2m_order_dict(self):
        vs = {}
        for k, v in self._domain2entity_o2m_dict.items():
            vs.update(v)
        vs_sorted = dict2sorted_dict(vs)
        return vs_sorted

    @property
    def get_intent2entities(self):
        # entity_o2o_dict, entity_label2words_dict, entity_word2label_dict_sorted,entity_o2m_order_dict,label2std2sim_dict=self._get_entities()
        # return entity_label2words_dict
        intent2entities_dict = collections.defaultdict(list)
        for domain_iter, items in self._domain2intent2words_dict.items():
            for intent_iter, v2 in items.items():
                intent2entities_dict[intent_iter].extend(v2)
        intent2entities_dict_sorted = dict2sorted_dict(intent2entities_dict)
        for k, v in intent2entities_dict_sorted.items():
            intent2entities_dict_sorted[k] = list2sorted_list(v)
        return intent2entities_dict_sorted

    @property
    def get_intent2sim2std_o2o_dict(self):
        return self._intent2sim2std_o2o_dict

    @property
    def get_domain2intent2std2sim_dict(self):
        return self._domain2intent2std2sim_dict

    @property
    def get_domain2pattern_dict(self):
        domain2pattern_dict = dict()
        for domain_iter, words_iter in self._domain2entity_o2o_dict.items():
            pattern_iter = list2re(words_iter)
            domain2pattern_dict[domain_iter] = pattern_iter
        return domain2pattern_dict

    @property
    def get_domain2intent2words_dict(self):
        return self._domain2intent2words_dict

    @property
    def get_domain2entity_o2m_dict(self):
        return self._domain2entity_o2m_dict

    def _get_entities(self, domain2entity2paths_set_in):
        domain2intent2words_dict, domain2word2intent_dict, domain2entity_o2o_dict, domain2entity_o2m_dict, domain2intent2std2sim_dict = {}, {}, {}, {}, {},
        intent2sim2std_o2o_dict = collections.defaultdict(dict)
        assert isinstance(domain2entity2paths_set_in, dict)
        for domain_iter, entity2paths_set_iter in domain2entity2paths_set_in.items(
        ):
            classification2words_dict, word2classification_dict, entity_o2o_dict, entity_o2m_dict, classification2std2sim_dict, classification2sim2std_o2o_dict = \
                read_data_from_paths_set(entity2paths_set_iter)

            word2classification_dict_sorted = dict2sorted_dict(
                word2classification_dict)
            entity_o2m_dict_sorted = dict2sorted_dict(entity_o2m_dict)
            entity_o2o_dict_sorted = dict2sorted_dict(entity_o2o_dict)

            domain2intent2words_dict[domain_iter] = classification2words_dict
            domain2word2intent_dict[
                domain_iter] = word2classification_dict_sorted

            domain2entity_o2o_dict[domain_iter] = entity_o2o_dict_sorted
            domain2entity_o2m_dict[domain_iter] = entity_o2m_dict_sorted
            domain2intent2std2sim_dict[
                domain_iter] = classification2std2sim_dict

            for intent_iter, sim2std_dict_iter in classification2sim2std_o2o_dict.items(
            ):
                intent2sim2std_o2o_dict[intent_iter].update(sim2std_dict_iter)

        alldomain_intent2words_dict = collections.defaultdict(list)
        for domain_iter, dict_iter in domain2intent2words_dict.items():
            for intent_iter, words_iter in dict_iter.items():
                alldomain_intent2words_dict[intent_iter].extend(words_iter)
        domain2intent2words_dict[
            dcname.alldomain.value] = alldomain_intent2words_dict

        alldomain_word2intent_dict = collections.defaultdict(list)
        for domain_iter, dict_iter in domain2word2intent_dict.items():
            for word_iter, intent_iter in dict_iter.items():
                alldomain_word2intent_dict[word_iter] = intent_iter
        domain2word2intent_dict[
            dcname.alldomain.value] = alldomain_word2intent_dict
        return domain2intent2words_dict, domain2word2intent_dict, domain2entity_o2o_dict, domain2entity_o2m_dict, domain2intent2std2sim_dict, intent2sim2std_o2o_dict

    def find_entity(self, str_list_in, domain_classification):
        """依靠 domain2entity2patterns 正则,每次处理一类"""
        label_entities_dict = collections.defaultdict(list)
        words_timestap = list()
        for str_iter in str_list_in:
            str_tmp = str_iter
            for word_similiar_iter, intent_iter in self._domain2word2intent_dict[
                    domain_classification].items():
                # words_ret = re_words_iter.findall(str_tmp)
                if word_similiar_iter in str_tmp:
                    label_entities_dict[intent_iter].append(word_similiar_iter)
                    # str_tmp = str_tmp.replace(word_similiar_iter, '')
                    str_tmp = str_tmp.replace(word_similiar_iter[:-2], '')
                    words_timestap.append(word_similiar_iter)
                else:
                    pass
        return label_entities_dict, words_timestap
Пример #14
0
class TrainAbbrev2StdEntity(object):
    """处理情况:单词完整,单词缩略, 带有缩略的句子"""
    _del_chars = ['保', '险', '附', '加', '合', '同', '公', '司', '疾', '病','两','全','意','外','重','疾','旅','行','健','医','疗']
    _entity_class = Entity()
    _filename_general = 'abbreviations2entity_{}.txt'
    _filepath_general = os.path.join(PathUtil().files_dirpath, _filename_general)
    _abbreviations_entity = [cname.domain.value]

    def __init__(self):
        self._preload()
        # self._load_words()
        pass

    def __load_entity_words(self):
        """load o2m_dict"""
        label2std2sim_dict = self._entity_class.get_domain2intent2std2sim_dict[dcname.product.value]
        return label2std2sim_dict

    def _preload(self):
        self._label2std2sim_dict = self.__load_entity_words()
        domain2abbrev2std_dict = self._get_intersection_abbreviations()
        self._write(domain2abbrev2std_dict)

    def create_abbreviations_map(self):
        self._preload()

    def _get_abbreviations_oneterm(self, elems_in):
        r_range = (2, 3, 4)
        results = list()
        for elem_iter in elems_in:
            for r_iter in r_range:
                combinations_iter = list(itertools.combinations(elem_iter, r=r_iter))
                results.extend(combinations_iter)
        return set(results)

    def _combinate_elems(self, nest_list_in):
        combinations_2 = list(itertools.combinations(nest_list_in, r=2))
        return combinations_2

    def _get_duplicate_abbreviations(self, list_one_in, list_two_in):
        intersected_items = set(list_one_in).intersection(set(list_two_in))
        return intersected_items

    def _get_different_abbreviations(self, list_one_in, list_two_in):
        difference_items = set(list_one_in).difference(set(list_two_in))
        return difference_items

    def _del_abbreviations(self, list_in):
        abbreviations_filtered = list()
        for elem_iter in list_in:
            flag_exist = False
            for char_iter in self._del_chars:
                if char_iter in elem_iter:
                    flag_exist = True
                    break
                else:
                    pass
            if not flag_exist:
                abbreviations_filtered.append(elem_iter)
        return abbreviations_filtered

    def _get_abbreviations(self, dict_in):
        results = collections.defaultdict(dict)
        for domain_iter, values in dict_in.items():
            for std_iter, sim_iter in values.items():
                combinations_iter = self._get_abbreviations_oneterm(sim_iter)
                results[domain_iter].update({std_iter: combinations_iter})
        return results

    def _abbreviations2complete(self, nest_dict_in):
        domain2abbrev2std_dict = collections.defaultdict(dict)
        for domain_iter, values_iter in nest_dict_in.items():
            allwords_domain = itertools.chain.from_iterable(list(values_iter.values()))
            for word_iter in allwords_domain:
                for std_iter, abbrevations_iter in values_iter.items():
                    if word_iter in abbrevations_iter:
                        if word_iter not in domain2abbrev2std_dict[domain_iter]:
                            domain2abbrev2std_dict[domain_iter][word_iter] = set()
                        else:
                            pass
                        domain2abbrev2std_dict[domain_iter][word_iter].add(std_iter)
        return domain2abbrev2std_dict

    def _get_intersection_abbreviations(self):
        label2std2abbreviations = collections.defaultdict(dict)
        domain2terms = self._get_abbreviations(self._label2std2sim_dict)
        counter = 0
        for domain_iter, std2terms in domain2terms.items():
            # combinations_2=self._combinate_elems(terms)
            domain2terms_copy = copy.deepcopy(domain2terms)
            domain2terms_copy.pop(domain_iter)
            for std_iter, term_iter in std2terms.items():
                std2terms_copy = copy.deepcopy(std2terms)
                std2terms_copy.pop(std_iter)
                terms_del_curitem = set(list(itertools.chain.from_iterable(list(std2terms_copy.values()))))
                duplicate_items = self._get_duplicate_abbreviations(term_iter, terms_del_curitem)
                duplicate_del_items = self._del_abbreviations(duplicate_items)
                # print(duplicate_items)
                duplicate_different_items = self._get_different_abbreviations(duplicate_del_items, domain2terms_copy)
                if duplicate_different_items:
                    label2std2abbreviations[domain_iter][std_iter] = duplicate_different_items
                counter += 1
                print(counter)
        # for k1,v1 in label2std2abbreviations.items():
        #     for k2,v2 in v1.items():
        #         print(k1,k2,v2)
        domain2abbrev2std_dict = self._abbreviations2complete(label2std2abbreviations)
        return domain2abbrev2std_dict

    def _write(self, domain2abbrev2std_dict_in):
        for domain_iter, abbrev2std_dict_iter in domain2abbrev2std_dict_in.items():
            filepath_cur = self._filepath_general.format(domain_iter)
            write2txt(abbrev2std_dict_iter, filepath_cur)

    def judge(self, str_preprocessed_in):
        pass
 def __read_represent_words(self):
     """获取字母表示的一类词的映射"""
     filepath = PathUtil().get_represent_words_map
     line_o2m_dict, line_o2o_dict = readfile_line2dict(filepath)
     # print(line_o2m_dict)
     return line_o2m_dict
 def _get_patterns(self):
     ymlpath = PathUtil().get_pattern_with_represent_yamlpath
     key2patterns_dict = read_yaml_dict_onelayer(ymlpath)
Пример #17
0
 def _preload_stopwords(self):
     """停用词加载"""
     stopwords_list = readfile_line2list(PathUtil().stopwords_filepath)
     words = list(itertools.chain.from_iterable(stopwords_list))
     return words
from online.field_check import PreprocessCheck, RewriteCheck
from online.nlu.preprocess import Preprocess
from online.nlu.query_rewrite import NerDeal
from online.utils.funcs import Singleton
import abc
import copy
from online.utils.nameutil import ResponseCategory as rcategory
import logging.config
from common.pathutil import PathUtil
from online.nlu.user_manage import usermanager
logging.config.fileConfig(open(PathUtil().get_logging_config,
                               encoding='utf-8'))
logger = logging.getLogger("response_manage")


class BaseResponse(object):
    _data_return = {'status': 0, 'data': {}, 'msg': None}

    def __init__(self):
        pass

    @abc.abstractmethod
    def _check(self, input):
        pass

    @abc.abstractmethod
    def _response(self, input):
        pass

    def response(self, input_in):
        input_str = '此次响应,输入的input是\t '
Пример #19
0
from online.field_check import PreprocessCheck,RewriteCheck
from online.nlu.preprocess import Preprocess
from online.nlu.query_rewrite import NerDeal
from online.utils.funcs import Singleton
import abc
import copy
from online.utils.nameutil import ResponseCategory as rcategory
import logging.config
from common.pathutil import PathUtil
from online.nlu.user_manage import usermanager
import json
logging.config.fileConfig(open(PathUtil().get_logging_config, encoding='utf-8'))
logger = logging.getLogger("response_manage")

class BaseResponse(object):
    _data_return = {'status': 0, 'data': {}, 'msg': None}
    def __init__(self):
        pass
    @abc.abstractmethod
    def _check(self,input):
        pass
    @abc.abstractmethod
    def _response(self,input):
        pass
    def response(self,input_in):
        input_str='此次响应,输入的input是\t '
        for key,value in input_in.items():
            # print(key,value)
            input_str+=('\t'+key+'\t'+str(value)+'\t'*2)
        logger.info(input_str)
        data_return=self._response(input_in)
Пример #20
0

class MyLogger(object):
    def __init__(self):
        pass


if __name__ == '__main__':
    # from config.setting import PathUtil
    # pu=PathUtil()
    # result=readfile_line2dict(pu.domain_filepath)
    # print(result)
    from common.pathutil import PathUtil

    flag = 3
    if flag == 1:
        out = jieba_add_words()
        print(out)
    elif flag == 2:
        rs = ReSet()
        s = '退保/取消合同/终止合同/终止保险/取消保险'
        ret = rs.split_pattern.split(s)
        print(ret)
    elif flag == 3:
        yamlpath = PathUtil().get_pattern_with_represent_yamlpath
        dict_ret = read_yaml_dict_onelayer(yamlpath)
        # print(ret)
        # for key,values in ret.items():
        #     for value_iter in values:
        #         print(key,value_iter)
Пример #21
0
from online.utils.nameutil import FieldClassifyNames as dcname,ContextNames as cname
from common.pathutil import PathUtil
_pathutil_class=PathUtil()


domain2entity2paths_set={dcname.product.value:
                                  {cname.domain.value:[_pathutil_class.domain_filepath,
                                                       ],
                                   cname.property.value:[_pathutil_class.property_filepath,
                                                        ]
                                    },
                              dcname.company.value:
                                  {cname.domain.value: [_pathutil_class.company_domain_filepath,
                                                        _pathutil_class.fufilled_company_domain_filepath
                                                        ],
                                   cname.property.value: [_pathutil_class.company_property_filepath
                                                          ]
                                   }
                              }

_environ='local'
# _environ='online'