Exemplo n.º 1
0
    def process_google_translation_to_omegat_dict(en_file, cn_file, result_file=None):
        if result_file is None:
            result_file = filex.get_result_file_name(en_file, '', 'tmx')
        en_dict = filex.get_dict_from_file(en_file)
        cn_dict = filex.get_dict_from_file(cn_file)
        print('英文词典%d条' % (len(en_dict)))
        print('中文词典%d条' % (len(cn_dict)))

        result = dict()
        for cn_key, cn_value in cn_dict.items():
            cn_key = cn_key.strip()
            cn_value=cn_value.strip()
            if cn_key in en_dict.keys():
                en_value = en_dict[cn_key]
                result[en_value] = cn_value

        print('结果%d条' % (len(result)))
        Tools.save_omegat_dict(result, result_file)
Exemplo n.º 2
0
 def export_to_omegat(file_path, result_file=None):
     """导出为OmegaT的记忆文件"""
     if result_file is None:
         result_file = filex.get_result_file_name(file_path, '', '.tmx.xml')
     translation_dict = filex.get_dict_from_file(file_path)
     output_dict = dict()
     for key, value in translation_dict.items():
         if value:
             if '【】' in value:
                 output_dict[key] = value.split('【】')[0]
             else:
                 output_dict[key] = value
     Tools.save_omegat_dict(output_dict, result_file)
Exemplo n.º 3
0
    def get_dict_from_file(file_path,
                           separator='=',
                           delete_value_ellipsis=True,
                           delete_value_underline=True,
                           delete_value_and_symbol=False,
                           delete_cn_shortcut=False,
                           trans_unicode=False):
        """
        从文件中读取字典,
        :param file_path:文件路径
        :param separator:分隔符
        :return: 如果有错返回None
        :param delete_value_ellipsis:删除省略号
        :param delete_value_underline: 删除快捷方式
        :param delete_value_and_symbol: 是否删降&符号
        :param delete_cn_shortcut: 是否删除中文翻译中的快捷方式
        :param trans_unicode: 将unicode转为中文
        """
        result = filex.get_dict_from_file(file_path, separator)
        if result is None:
            return None

        if delete_value_ellipsis or delete_value_underline or trans_unicode:
            # (空白?点一次或多次)
            p_ellipsis = r'(\s?\.+)$'
            # 空白?,左括号,&?,字母,右括号
            p_shortcut = r'\s?\(&?\w\)$'
            # &后跟一字母
            p_add_symbol = r'&(\w)'
            for key, value in result.items():
                if delete_value_ellipsis:
                    value = re.sub(p_ellipsis, '', value)
                if delete_value_underline:
                    value = value.replace('_', '')
                if delete_value_and_symbol:
                    value = re.sub(p_add_symbol, r'\1', value)
                if delete_cn_shortcut:
                    value = re.sub(p_shortcut, '', value)
                if trans_unicode:
                    value = encodex.unicode_str_to_chinese(value)
                result[key] = value
        return result
Exemplo n.º 4
0
    def compare_translation(en_dir,
                            compare_dir_list,
                            omegat_dict_file=None,
                            dict_file=None,
                            dict_diff_file=None,
                            by_index=False,
                            trans_unicode=True):
        if dict_file is None:
            dict_file = 'data/dict.txt'
        if dict_diff_file is None:
            dict_diff_file = filex.get_result_file_name(dict_file, '_diff')
        separator = '[xx|]'
        dict_list = list()
        for i in compare_dir_list:
            if i == r'C:\Users\Admin\Desktop\AndroidStudio汉化\汉化包\整理':
                t_dict = dict()
                for i_file in filex.list_file(i, '.properties'):
                    t_dict.update(filex.get_dict_from_file(i_file))
                dict_list.append(t_dict)
                continue
            i_all_translation, i_diff_translation = Translator.check_same_en_difference_cn(
                en_dir, i, False, '', trans_unicode=trans_unicode)
            dict_list.append(i_all_translation)
        if omegat_dict_file is not None:
            dict_list.insert(0, Translator.get_omegat_dict(omegat_dict_file))

        for i in range(len(dict_list)):
            print('%d中共包含翻译%d条' % (i + 1, len(sorted(dict_list[i].keys()))))

        all_translation = dict()
        diff_translation = dict()
        print_i = True
        if by_index:
            # 按倒序更新,得到结果
            for i in range(len(dict_list) - 1, -1, -1):
                all_translation.update(dict_list[i])
                print('更新%d后,size是%d' %
                      (i + 1, len(sorted(all_translation.keys()))))
        else:
            for i in range(len(dict_list)):
                i_dict = dict_list[i]
                index = 0
                length = len(sorted(i_dict.keys()))
                for key, i_value in i_dict.items():
                    index += 1
                    if print_i:
                        print('\n检查%d/%d,%s' % (index, length, key))
                        print('词典%d中是%s' % (i, i_value))
                    has_diff = False
                    for j in range(i + 1, len(dict_list)):
                        j_dict = dict_list[j]
                        if key in j_dict:
                            j_value = j_dict[key]
                            if i_value == j_value:
                                if print_i:
                                    print('词典%d中相同' % j)
                            else:
                                has_diff = True
                                if key in diff_translation.keys():
                                    pre_translation = diff_translation[key]
                                    if j_value not in pre_translation.split(
                                            separator):
                                        diff_translation[
                                            key] = pre_translation + separator + j_value.replace(
                                                '\n', '')
                                else:
                                    diff_translation[key] = (i_value +
                                                             separator +
                                                             j_value).replace(
                                                                 '\n', '')
                                if print_i:
                                    print('词典%d中是%s' % (j, j_value))
                            # 处理后移除
                            j_dict.pop(key)
                        else:
                            if print_i:
                                print('词典%d中缺少' % j)
                    if not has_diff:
                        if print_i:
                            print('统一翻译')
                        if i_value:
                            # 只添加不为空的
                            all_translation[key] = i_value
                print('%d中处理%d条,其中%d条翻译相同,%d条不同' %
                      (i, len(sorted(
                          i_dict.keys())), len(sorted(all_translation.keys())),
                       len(sorted(diff_translation.keys()))))

        print('size is %d' % len(sorted(all_translation.keys())))
        if all_translation:
            if dict_file.endswith('.tmx') or dict_file.endswith('.tmx.xml'):
                Tools.save_omegat_dict(all_translation, dict_file)
            else:
                result = list()
                for key, value in all_translation.items():
                    result.append('%s=%s\n' % (key, value))
                filex.write_lines(dict_file, result)

        print('diff size is %d' % len(sorted(diff_translation.keys())))
        if diff_translation:
            result = list()
            for key, value in diff_translation.items():
                result.append('%s=%s\n' % (key, value))
            filex.write_lines(dict_diff_file, result)