def add_ellipsis_and_shortcut(en_file, cn_file, result_file=None): """ 处理快捷键,将_字母替换为(_字母) :param en_file: :param cn_file: :param result_file: :return: """ if result_file is None: result_file = filex.get_result_file_name(cn_file, '_add_ellipsis_and_shortcut') en_dict = Tools.get_dict_from_file(en_file, delete_value_ellipsis=False, delete_value_underline=False) cn_dict = Tools.get_dict_from_file(cn_file, delete_value_ellipsis=False, delete_value_underline=False) count = 0 p_ellipsise = re.compile('……|…$') p_period = re.compile('\.') for (k, v) in en_dict.items(): if v.endswith('.'): # 以.结尾 if k in cn_dict.keys(): cn_value = cn_dict[k] old_value = cn_value if v.endswith('...'): # 省略号结尾 cn_value = re.sub(p_ellipsise, '...', cn_value) if not cn_value.endswith('...'): cn_value += '...' elif v.endswith('.'): # 句号结尾 cn_value = re.sub(p_period, '。', cn_value) if not cn_value.endswith('。'): cn_value += '。' if cn_value != old_value: print('修改【%s】为【%s】' % (old_value, cn_value)) cn_dict[k] = cn_value if '_' in v: # 有快捷方式 index = v.find('_') shortcut = v[index + 1:index + 2] # 包含快捷键 if k in cn_dict.keys(): # 都有 cn_value = cn_dict[k] count += 1 # 已经是(_字母结)结尾的,重新替换一遍 p = re.compile(r'(.*)(\(_\w\))') if re.match(p, cn_value) is not None: replace_result = re.sub(p, r'\1' + '(_%s)' % shortcut, cn_value) print('替换%d,key=%s,v=%s,cn=%s,r=%s' % (count, shortcut, v, cn_value, replace_result)) else: replace_result = cn_value.replace('_', '') + '(_%s)' % shortcut print('添加%d,key=%s,v=%s,cn=%s,r=%s' % (count, shortcut, v, cn_value, replace_result)) cn_dict[k] = replace_result result = Tools.translate_file_by_dict(en_file, cn_dict, '') # 重新翻译 result.insert(0, '# from:[AndroidStudio翻译(3)-ActionsBundle中文翻译](http://blog.pingfangx.com/2355.html)\n') filex.write_lines(result_file, result)
def update_omegat_dict_by_pseudo_dict(omegat_dict_file, pseudo_dict_file, result_file=None): "根据伪翻译记忆更新记忆库" if result_file is None: result_file = filex.get_result_file_name(pseudo_dict_file, '_update_by_pseudo') source_dict = Tools.get_dict_from_omegat(omegat_dict_file) pseudo_dict = Tools.get_dict_from_omegat(pseudo_dict_file) print('记忆库共%d条记录,伪翻译共%d条记录' % (len(source_dict), len(pseudo_dict))) result_dict = ActionsBundle.update_dict_add_extra_info(source_dict, pseudo_dict) print('处理结果共%s条记录' % len(result_dict)) Tools.save_omegat_dict(result_dict, result_file)
def change_unicode_to_chinese(self): project_name_list = os.listdir(self.source_dir) for project_name in project_name_list: project_path = self.target_dir + os.sep + project_name if os.path.isdir(project_path): tips_dir = project_path + os.sep + 'IdeTipsAndTricks' tips_file_path = tips_dir + os.sep + 'IdeTipsAndTricks_en_zh_CN.properties' tips_cn_file_path = tips_dir + os.sep + 'IdeTipsAndTricks_cn.properties' if os.path.exists(tips_file_path): print('处理 %s' % tips_file_path) Tools.change_unicode_to_chinese(tips_file_path, tips_cn_file_path)
def check_same_en_difference_cn(en_dir, cn_dir, print_msg=False, suffix='', trans_unicode=True): """英文相同时,是否有不一致的翻译""" all_translation = dict() diff_translation = dict() en_file_list = filex.list_file(en_dir, '\.(?!png|gif)') for en_file in en_file_list: print('\ncheck ' + en_file) cn_file = Translator.get_cn_file_name(en_dir, cn_dir, en_file, suffix) if not os.path.exists(cn_file): print('中文文件不存在' + cn_file) continue en_dict = Tools.get_dict_from_file(en_file) cn_dict = Tools.get_dict_from_file(cn_file, delete_cn_shortcut=True, trans_unicode=trans_unicode) for key, en_value in en_dict.items(): if key in cn_dict.keys(): # 有key对应的中英文 cn_value = cn_dict[key] if cn_value != en_value: # 中英文不一样才算翻译 if en_value in all_translation.keys(): pre_translation = all_translation[en_value] if pre_translation != cn_value: if en_value not in diff_translation.keys(): diff_translation[ en_value] = pre_translation + '\n' + cn_value else: pre_diff_translation = diff_translation[ en_value] if cn_value not in pre_diff_translation.split( '\n'): # 之前没有记录过才再记录 diff_translation[ en_value] = pre_diff_translation + '\n' + cn_value if print_msg: print( '\n词典中已经存在%s,但翻译不相同\n%s\n%s' % (en_value, pre_translation, cn_value)) else: all_translation[en_value] = cn_value if print_msg: print('the size is %d' % len(sorted(all_translation.keys()))) # 读取守毕 for key in diff_translation.keys(): all_translation.pop(key) return all_translation, diff_translation
def inspect_file(pseudo_file, translation_file, inspection_list, result_file=None, print_msg=False, print_change=False): """ :param pseudo_file: 伪翻译文件,也可以与翻译文件相同,用于自校检 :param translation_file: 翻译文件 :param inspection_list: 要检测的方法列表,如检测tips时是不需要检测快捷方式的 :param result_file: 结果文件 :param print_msg: 是否输出操作时的消息 :param print_change: 是否输出更改时的消息,如果2个都置为False,可以查看需要处理的内容 :return: """ if result_file is None: result_file = filex.get_result_file_name(translation_file, '_inspection') pseudo_dict = Tools.get_dict_from_omegat(pseudo_file) translation_dict = Tools.get_dict_from_omegat(translation_file) print(u'pseudo size is %d' % (len(pseudo_dict))) print(u'translation size is %d' % (len(translation_dict))) result = dict() i = 0 for en in pseudo_dict.keys(): cn_key = None if en in translation_dict.keys(): cn_key = en else: abbreviated_en = DeleteAction.delete_all_symbol_of_string( en, False) if abbreviated_en in translation_dict.keys(): cn_key = abbreviated_en if cn_key: value = translation_dict[cn_key] if not value: continue translation_dict.pop(cn_key) old = value for inspection in inspection_list: value = inspection(en, value, print_msg) if old != value: if print_change: i += 1 print(u'\n%d.\n【%s】根据【%s】被修改为\n【%s】\n' % (i, old, en, value)) result[en] = value else: print(u'不包含key:%s' % en) print(u'remain translation size %d' % (len(translation_dict))) Tools.save_omegat_dict(result, result_file)
def export_to_omegat(file_path, result_file=None): """导出为OmegaT的记忆文件""" if result_file is None: result_file = filex.get_result_file_name(file_path, '', '.tmx.xml') translation_dict = filex.get_dict_from_file(file_path) output_dict = dict() for key, value in translation_dict.items(): if value: if '【】' in value: output_dict[key] = value.split('【】')[0] else: output_dict[key] = value Tools.save_omegat_dict(output_dict, result_file)
def update_omegat_dict(dict_file, omegat_dict_file, result_dict_file): """更新omegat的记忆文件""" pre_dict = Tools.get_dict_from_file(dict_file) print('pre size is %d' % len(sorted(pre_dict.keys()))) omegat_dict = Translator.get_omegat_dict(omegat_dict_file) print('omegat size is %d' % len(sorted(omegat_dict.keys()))) # 更新,以omegat为准 pre_dict.update(omegat_dict) print('result size is %d' % len(sorted(pre_dict.keys()))) Tools.save_omegat_dict(pre_dict, result_dict_file)
def get_action_desc_dict(en_file, print_msg=False): """ 读取操作对应的描述 :param en_file: :param print_msg: :return: """ en_dict = Tools.get_dict_from_file(en_file) desc_dict = dict() # 重复的处理起来麻烦,索性删除了 duplicate_value = list() # 反转 for key, value in en_dict.items(): if key.endswith('.text'): desc_key = key[:-len('.text')] + '.description' if desc_key in en_dict.keys(): desc = en_dict[desc_key] if value in desc_dict.keys(): pre_desc = desc_dict[value] if desc == pre_desc: print('%s重复【%s】' + value, desc) else: duplicate_value.append(value) desc = '%s【或】%s' % (pre_desc, desc) print('%s有不同的描述【%s】【%s】' % (value, desc, pre_desc)) desc_dict[value] = desc for value in duplicate_value: if value in desc_dict.keys(): desc_dict.pop(value) if print_msg: print('删除' + value) if print_msg: print(desc_dict) return desc_dict
def process_google_translation_to_omegat_dict(en_file, cn_file, result_file=None): if result_file is None: result_file = filex.get_result_file_name(en_file, '', 'tmx') en_dict = filex.get_dict_from_file(en_file) cn_dict = filex.get_dict_from_file(cn_file) print('英文词典%d条' % (len(en_dict))) print('中文词典%d条' % (len(cn_dict))) result = dict() for cn_key, cn_value in cn_dict.items(): cn_key = cn_key.strip() cn_value=cn_value.strip() if cn_key in en_dict.keys(): en_value = en_dict[cn_key] result[en_value] = cn_value print('结果%d条' % (len(result))) Tools.save_omegat_dict(result, result_file)
def generate_need_translation_file2(en_dir, result_file): """生成需要翻译的文件""" en_file_list = filex.list_file(en_dir) all_translation = dict() for en_file in en_file_list: en_dict = Tools.get_dict_from_file(en_file) all_translation.update(en_dict) result = list() for key, value in all_translation.items(): result.append('%s=%s\n' % (key, value)) filex.write_lines(result_file, result)
def check_same_key_difference_value(dir_path): """如果key相同,value是否有不一致 发现确实有不一致,所以必须区分文件""" file_list = filex.list_file(dir_path) all_translation = dict() for file in file_list: print('\ncheck ' + file) translation_dict = Tools.get_dict_from_file( file, delete_value_and_symbol=True, trans_unicode=True) if all_translation: # 不为空再处理,为空的第一次直接update for key, value in translation_dict.items(): if key in all_translation.keys(): if all_translation[key] != value: print('key相同%s,但value不一致\n%s\n%s' % (key, all_translation[key], value)) all_translation.update(translation_dict)
def generate_need_translation_file(en_dir, result_dir): """生成需要翻译的文件""" en_file_list = filex.list_file(en_dir) all_translation = dict() for en_file in en_file_list: en_dict = Tools.get_dict_from_file(en_file) all_translation.update(en_dict) i = 0 size = 3000 result = list() for key, value in all_translation.items(): i += 1 result.append('%s=%s\n' % (key, value)) if i % size == size - 1: index = int(i / size) result_file = '%s\\%2d.properties' % (result_dir, index) filex.write_lines(result_file, result) result.clear() if result: index = int(i / size) result_file = '%s\\%2d.properties' % (result_dir, index) filex.write_lines(result_file, result) result.clear()
def compare_translation(en_dir, compare_dir_list, omegat_dict_file=None, dict_file=None, dict_diff_file=None, by_index=False, trans_unicode=True): if dict_file is None: dict_file = 'data/dict.txt' if dict_diff_file is None: dict_diff_file = filex.get_result_file_name(dict_file, '_diff') separator = '[xx|]' dict_list = list() for i in compare_dir_list: if i == r'C:\Users\Admin\Desktop\AndroidStudio汉化\汉化包\整理': t_dict = dict() for i_file in filex.list_file(i, '.properties'): t_dict.update(filex.get_dict_from_file(i_file)) dict_list.append(t_dict) continue i_all_translation, i_diff_translation = Translator.check_same_en_difference_cn( en_dir, i, False, '', trans_unicode=trans_unicode) dict_list.append(i_all_translation) if omegat_dict_file is not None: dict_list.insert(0, Translator.get_omegat_dict(omegat_dict_file)) for i in range(len(dict_list)): print('%d中共包含翻译%d条' % (i + 1, len(sorted(dict_list[i].keys())))) all_translation = dict() diff_translation = dict() print_i = True if by_index: # 按倒序更新,得到结果 for i in range(len(dict_list) - 1, -1, -1): all_translation.update(dict_list[i]) print('更新%d后,size是%d' % (i + 1, len(sorted(all_translation.keys())))) else: for i in range(len(dict_list)): i_dict = dict_list[i] index = 0 length = len(sorted(i_dict.keys())) for key, i_value in i_dict.items(): index += 1 if print_i: print('\n检查%d/%d,%s' % (index, length, key)) print('词典%d中是%s' % (i, i_value)) has_diff = False for j in range(i + 1, len(dict_list)): j_dict = dict_list[j] if key in j_dict: j_value = j_dict[key] if i_value == j_value: if print_i: print('词典%d中相同' % j) else: has_diff = True if key in diff_translation.keys(): pre_translation = diff_translation[key] if j_value not in pre_translation.split( separator): diff_translation[ key] = pre_translation + separator + j_value.replace( '\n', '') else: diff_translation[key] = (i_value + separator + j_value).replace( '\n', '') if print_i: print('词典%d中是%s' % (j, j_value)) # 处理后移除 j_dict.pop(key) else: if print_i: print('词典%d中缺少' % j) if not has_diff: if print_i: print('统一翻译') if i_value: # 只添加不为空的 all_translation[key] = i_value print('%d中处理%d条,其中%d条翻译相同,%d条不同' % (i, len(sorted( i_dict.keys())), len(sorted(all_translation.keys())), len(sorted(diff_translation.keys())))) print('size is %d' % len(sorted(all_translation.keys()))) if all_translation: if dict_file.endswith('.tmx') or dict_file.endswith('.tmx.xml'): Tools.save_omegat_dict(all_translation, dict_file) else: result = list() for key, value in all_translation.items(): result.append('%s=%s\n' % (key, value)) filex.write_lines(dict_file, result) print('diff size is %d' % len(sorted(diff_translation.keys()))) if diff_translation: result = list() for key, value in diff_translation.items(): result.append('%s=%s\n' % (key, value)) filex.write_lines(dict_diff_file, result)
def check_translation_complete(en_dir, cn_dir, out_put=None, suffix=''): """翻译是否完整""" incomplete_dict = dict() en_file_list = filex.list_file(en_dir, '\.(?!png|gif)') incomplete_file = [] miss_file = [] complete_count = 0 same_file = [] complete_file = [] for en_file in en_file_list: # print('\ncheck ' + en_file) cn_file = Translator.get_cn_file_name(en_dir, cn_dir, en_file, suffix) if not os.path.exists(cn_file): # print('中文文件不存在' + cn_file) miss_file.append(cn_file) continue if filecmp.cmp(en_file, cn_file): # print('文件相同' + en_file) same_file.append(en_file) continue en_dict = Tools.get_dict_from_file(en_file) cn_dict = Tools.get_dict_from_file(cn_file, trans_unicode=True) is_complete = True translation_count_in_file = 0 for key, en_value in en_dict.items(): if key not in cn_dict.keys(): is_complete = False incomplete_dict[key] = en_value # print('没有翻译%s对应的%s' % (key, en_value)) else: cn_value = cn_dict[key] if en_value == cn_value: is_complete = False incomplete_dict[key] = en_value # print('%s对应的翻译仍然是%s,未翻译' % (key, en_value)) else: translation_count_in_file += 1 complete_count += 1 if not is_complete: print('文件未完全翻译' + en_file) incomplete_file.append(en_file) else: if translation_count_in_file == 0: # 一句都没翻译 # print('文件一句都没翻译' + en_file) same_file.append(en_file) else: complete_file.append(en_file) print('文件翻译完整' + en_file) print('缺少%d个文件' % len(miss_file)) print(miss_file) print('有%d个文件完全相同' % len(same_file)) print('有%d个文件未翻译完整' % len(incomplete_file)) print(incomplete_file) print('有%d个文件完整翻译,共%d条翻译' % (len(complete_file), complete_count)) if out_put is not None: result = list() for key, value in incomplete_dict.items(): result.append('%s=%s\n\n' % (key, value)) print('incomplete size is %d' % len(sorted(incomplete_dict.keys()))) filex.write_lines(out_put, result)
def translate_file(cls, file_path, result_file=None, ignore_reg_list=None, ignore_file_path=None, delete_empty_translation=False): """ 翻译 寻找一个要翻译的单词 翻译,更新字典,保存 继续 :param cls: 类 :param file_path: 文件路径 :param result_file: 结果文件 :param ignore_reg_list: 忽略正则列表,如果匹配则忽略 :param ignore_file_path: 忽略的单词保存于,用于生成 auto中的tmx ,避免每次都要手动设为相同翻译 :param delete_empty_translation: 删除空翻译 :return: """ if result_file is None: result_file = file_path if not issubclass(cls, MachineTranslator): print('%s 不是 MachineTranslator 的子类' % cls) return en_dict = Tools.get_dict_from_omegat(file_path) if not en_dict: print('翻译文件字典为空') return keys = sorted(en_dict.keys()) length = len(keys) for i in range(length): en = keys[i] cn = en_dict[en] if cn is not None: continue continue_loop = False if ignore_reg_list: for ignore_reg in ignore_reg_list: if ignore_reg.startswith('^'): if re.match(ignore_reg, en): continue_loop = True else: # 不以 ^ 开头才搜索 if re.search(ignore_reg, en): continue_loop = True if continue_loop: print('\n跳过 %d/%d 个:【%s】' % (i + 1, length, en)) if ignore_file_path: MachineTranslation.save_translation(ignore_file_path, en, en) break if continue_loop: continue print('\n翻译 %d/%d 个:【%s】' % (i + 1, length, en)) cn = cls.translate(en) print('翻译结果 %d/%d 个:【%s】' % (i + 1, length, cn)) # 更新字典 en_dict[en] = cn # 写入文件 MachineTranslation.save_translation(result_file, en, cn) if delete_empty_translation: MachineTranslation.delete_empty_or_same_translation(result_file, True, True)