def _generate_char_dict(self):
        """
        generate the char dict and ord map dict json file according to the lexicon list.
        gather all the single characters used in lexicon list.
        :return:
        """
        char_lexicon_set = set()
        for lexcion in self._lexicon_list:
            for s in lexcion:
                char_lexicon_set.add(s)

        log.info('Char set length: {:d}'.format(len(char_lexicon_set)))

        char_lexicon_list = list(char_lexicon_set)
        char_dict_builder = establish_char_dict.CharDictBuilder()
        char_dict_builder.write_char_dict(char_lexicon_list, save_path=self._char_dict_path)
        char_dict_builder.map_ord_to_index(char_lexicon_list, save_path=self._ord_map_dict_path)

        log.info('Write char dict map complete')
示例#2
0
def generate_char_dict(char_dict_file, save_dir):
    """
        Example char dict file is stored in data/char_dict/chinese_dict.txt
    :param char_dict_file:
    :param save_dir:
    :return:
    """
    generator = establish_char_dict.CharDictBuilder()

    char_dict_path = ops.join(save_dir, 'char_dict.json')
    ord_2_index_map_path = ops.join(save_dir, 'ord_2_index_map.json')
    index_2_ord_map_path = ops.join(save_dir, 'index_2_ord_map.json')

    generator.write_char_dict(char_dict_file, char_dict_path)
    generator.map_ord_to_index(char_dict_file, ord_2_index_map_path)
    generator.map_index_to_ord(char_dict_file, index_2_ord_map_path)

    print('Generate {:s}, {:s} and {:s} complete'.format(char_dict_path, ord_2_index_map_path, index_2_ord_map_path))

    return
    def _generate_char_dict(self):
        """

        :return:
        """
        char_lexicon_set = set()
        for lexcion in self._lexicon_list:
            char_list = [s for s in lexcion]
            char_lexicon_set = char_lexicon_set.union(set(char_list))

        log.info('Char set length: {:d}'.format(len(char_lexicon_set)))

        char_lexicon_list = list(char_lexicon_set)
        char_dict_builder = establish_char_dict.CharDictBuilder()
        char_dict_builder.write_char_dict(char_lexicon_list,
                                          save_path=self._char_dict_path)
        char_dict_builder.map_ord_to_index(char_lexicon_list,
                                           save_path=self._ord_map_dict_path)

        log.info('Write char dict map complete')