def process_google_translation_to_omegat_dict(en_file, cn_file, result_file=None): if result_file is None: result_file = filex.get_result_file_name(en_file, '', 'tmx') en_dict = filex.get_dict_from_file(en_file) cn_dict = filex.get_dict_from_file(cn_file) print('英文词典%d条' % (len(en_dict))) print('中文词典%d条' % (len(cn_dict))) result = dict() for cn_key, cn_value in cn_dict.items(): cn_key = cn_key.strip() cn_value=cn_value.strip() if cn_key in en_dict.keys(): en_value = en_dict[cn_key] result[en_value] = cn_value print('结果%d条' % (len(result))) Tools.save_omegat_dict(result, result_file)
def export_to_omegat(file_path, result_file=None): """导出为OmegaT的记忆文件""" if result_file is None: result_file = filex.get_result_file_name(file_path, '', '.tmx.xml') translation_dict = filex.get_dict_from_file(file_path) output_dict = dict() for key, value in translation_dict.items(): if value: if '【】' in value: output_dict[key] = value.split('【】')[0] else: output_dict[key] = value Tools.save_omegat_dict(output_dict, result_file)
def get_dict_from_file(file_path, separator='=', delete_value_ellipsis=True, delete_value_underline=True, delete_value_and_symbol=False, delete_cn_shortcut=False, trans_unicode=False): """ 从文件中读取字典, :param file_path:文件路径 :param separator:分隔符 :return: 如果有错返回None :param delete_value_ellipsis:删除省略号 :param delete_value_underline: 删除快捷方式 :param delete_value_and_symbol: 是否删降&符号 :param delete_cn_shortcut: 是否删除中文翻译中的快捷方式 :param trans_unicode: 将unicode转为中文 """ result = filex.get_dict_from_file(file_path, separator) if result is None: return None if delete_value_ellipsis or delete_value_underline or trans_unicode: # (空白?点一次或多次) p_ellipsis = r'(\s?\.+)$' # 空白?,左括号,&?,字母,右括号 p_shortcut = r'\s?\(&?\w\)$' # &后跟一字母 p_add_symbol = r'&(\w)' for key, value in result.items(): if delete_value_ellipsis: value = re.sub(p_ellipsis, '', value) if delete_value_underline: value = value.replace('_', '') if delete_value_and_symbol: value = re.sub(p_add_symbol, r'\1', value) if delete_cn_shortcut: value = re.sub(p_shortcut, '', value) if trans_unicode: value = encodex.unicode_str_to_chinese(value) result[key] = value return result
def compare_translation(en_dir, compare_dir_list, omegat_dict_file=None, dict_file=None, dict_diff_file=None, by_index=False, trans_unicode=True): if dict_file is None: dict_file = 'data/dict.txt' if dict_diff_file is None: dict_diff_file = filex.get_result_file_name(dict_file, '_diff') separator = '[xx|]' dict_list = list() for i in compare_dir_list: if i == r'C:\Users\Admin\Desktop\AndroidStudio汉化\汉化包\整理': t_dict = dict() for i_file in filex.list_file(i, '.properties'): t_dict.update(filex.get_dict_from_file(i_file)) dict_list.append(t_dict) continue i_all_translation, i_diff_translation = Translator.check_same_en_difference_cn( en_dir, i, False, '', trans_unicode=trans_unicode) dict_list.append(i_all_translation) if omegat_dict_file is not None: dict_list.insert(0, Translator.get_omegat_dict(omegat_dict_file)) for i in range(len(dict_list)): print('%d中共包含翻译%d条' % (i + 1, len(sorted(dict_list[i].keys())))) all_translation = dict() diff_translation = dict() print_i = True if by_index: # 按倒序更新,得到结果 for i in range(len(dict_list) - 1, -1, -1): all_translation.update(dict_list[i]) print('更新%d后,size是%d' % (i + 1, len(sorted(all_translation.keys())))) else: for i in range(len(dict_list)): i_dict = dict_list[i] index = 0 length = len(sorted(i_dict.keys())) for key, i_value in i_dict.items(): index += 1 if print_i: print('\n检查%d/%d,%s' % (index, length, key)) print('词典%d中是%s' % (i, i_value)) has_diff = False for j in range(i + 1, len(dict_list)): j_dict = dict_list[j] if key in j_dict: j_value = j_dict[key] if i_value == j_value: if print_i: print('词典%d中相同' % j) else: has_diff = True if key in diff_translation.keys(): pre_translation = diff_translation[key] if j_value not in pre_translation.split( separator): diff_translation[ key] = pre_translation + separator + j_value.replace( '\n', '') else: diff_translation[key] = (i_value + separator + j_value).replace( '\n', '') if print_i: print('词典%d中是%s' % (j, j_value)) # 处理后移除 j_dict.pop(key) else: if print_i: print('词典%d中缺少' % j) if not has_diff: if print_i: print('统一翻译') if i_value: # 只添加不为空的 all_translation[key] = i_value print('%d中处理%d条,其中%d条翻译相同,%d条不同' % (i, len(sorted( i_dict.keys())), len(sorted(all_translation.keys())), len(sorted(diff_translation.keys())))) print('size is %d' % len(sorted(all_translation.keys()))) if all_translation: if dict_file.endswith('.tmx') or dict_file.endswith('.tmx.xml'): Tools.save_omegat_dict(all_translation, dict_file) else: result = list() for key, value in all_translation.items(): result.append('%s=%s\n' % (key, value)) filex.write_lines(dict_file, result) print('diff size is %d' % len(sorted(diff_translation.keys()))) if diff_translation: result = list() for key, value in diff_translation.items(): result.append('%s=%s\n' % (key, value)) filex.write_lines(dict_diff_file, result)