Пример #1
0
def command_gen_subset_fonts():
    """
    Creates custom fonts that attempt to contain all the glyphs and other font features
    that are used in user-facing text for the translation in each language.

    We make a separate subset font for common strings, which generally overaps somewhat
    with the individual language subsets. This slightly increases how much the client
    needs to download on first request, but reduces Kolibri's distribution size by a
    couple megabytes.
    """
    logging.info("generating subset fonts...")

    _clean_up(SCOPE_COMMON)
    _clean_up(SCOPE_SUBSET)

    _subset_and_merge_fonts(
        text=" ".join(_get_common_strings()),
        default_font=NOTO_SANS_LATIN,
        subset_reg_path=_woff_font_path(SCOPE_COMMON, is_bold=False),
        subset_bold_path=_woff_font_path(SCOPE_COMMON, is_bold=True),
    )

    languages = utils.supported_languages(include_in_context=True,
                                          include_english=True)
    for lang_info in languages:
        logging.info("gen subset for {}".format(lang_info[utils.KEY_ENG_NAME]))
        strings = []
        strings.extend(_get_lang_strings(utils.local_locale_path(lang_info)))
        strings.extend(
            _get_lang_strings(utils.local_perseus_locale_path(lang_info)))

        name = lang_info[utils.KEY_INTL_CODE]
        _subset_and_merge_fonts(
            text=" ".join(strings),
            default_font=lang_info[utils.KEY_DEFAULT_FONT],
            subset_reg_path=_woff_font_path(_scoped(SCOPE_SUBSET, name),
                                            is_bold=False),
            subset_bold_path=_woff_font_path(_scoped(SCOPE_SUBSET, name),
                                             is_bold=True),
        )

    # generate common subset file
    _generate_inline_font_css(name=SCOPE_COMMON, font_family=SCOPE_COMMON)

    # generate language-specific subset font files
    languages = utils.supported_languages(include_in_context=True,
                                          include_english=True)
    for lang in languages:
        _generate_inline_font_css(
            name=_scoped(SCOPE_SUBSET, lang[utils.KEY_INTL_CODE]),
            font_family=SCOPE_SUBSET,
        )

    logging.info("subsets created")
Пример #2
0
def pretranslate(branch, approve_all=False):
    """
    Apply pre-translation to the given branch
    """
    checkApiKey()

    params = []
    files = [
        "{}/{}".format(branch, f)
        for f in crowdin_files(branch, get_crowdin_details())
    ]
    params.extend([("files[]", file) for file in files])
    codes = [
        lang[utils.KEY_CROWDIN_CODE] for lang in utils.supported_languages()
    ]
    params.extend([("languages[]", code) for code in codes])

    msg = (
        "Crowdin: pre-translating and pre-approving untranslated matches in '{}'..."
        if approve_all else
        "Crowdin: pre-translating untranslated matches in '{}'...")
    msg += "\n\tNote that this operation can take a long time and may time out."
    msg += "\n\tYou should see the results on Crowdin eventually..."
    logging.info(msg.format(branch))

    r = requests.post(
        PRETRANSLATE_URL.format(approve_option=0 if approve_all else 1),
        params=params)
    r.raise_for_status()
    logging.info("Crowdin: succeeded!")
Пример #3
0
def main(title, message, link_text):
    """
    Generate JSON suitable for sending in nutrition facts notifications
    """

    supported_languages = utils.supported_languages(include_in_context=False,
                                                    include_english=True)

    output = {}
    for lang_object in supported_languages:
        file_path = os.path.join(utils.local_locale_path(lang_object),
                                 FILE_NAME)
        i18n = {}
        with open(file_path) as f:
            input_data = json.load(f)
        if title in input_data:
            i18n[I18N_TITLE] = input_data[title]
        if message in input_data:
            i18n[I18N_MESSAGE] = input_data[message]
        if link_text in input_data:
            i18n[I18N_LINK_TEXT] = input_data[link_text]
        output[lang_object[utils.KEY_INTL_CODE]] = i18n

    # output JSON
    print(
        json.dumps(output,
                   sort_keys=True,
                   indent=2,
                   separators=(",", ": "),
                   ensure_ascii=False))
Пример #4
0
def command_download(branch):
    """
    Downloads and updates the local translation files from the given branch on Crowdin
    """
    logging.info("Crowdin: downloading '{}'...".format(branch))

    # delete previous files
    _wipe_translations(utils.LOCALE_PATH)
    _wipe_translations(utils.PERSEUS_LOCALE_PATH)

    for lang_object in utils.supported_languages(include_in_context=True):
        code = lang_object[utils.KEY_CROWDIN_CODE]
        url = DOWNLOAD_URL.format(language=code, branch=branch)
        r = requests.get(url)
        r.raise_for_status()
        z = zipfile.ZipFile(io.BytesIO(r.content))
        target = utils.local_locale_path(lang_object)
        logging.info("\tExtracting {} to {}".format(code, target))
        z.extractall(target)

        # hack for perseus
        perseus_target = utils.local_perseus_locale_path(lang_object)
        if not os.path.exists(perseus_target):
            os.makedirs(perseus_target)
        shutil.move(
            os.path.join(target, PERSEUS_FILE),
            os.path.join(perseus_target, PERSEUS_FILE),
        )

    _format_json_files()  # clean them up to make git diffs more meaningful
    logging.info("Crowdin: download succeeded!")
Пример #5
0
def _font_priorities(default_font):
    """
    Given a default font, return a list of all possible font names roughly in the order
    that we ought to look for glyphs in. Many fonts contain overlapping sets of glyphs.

    Without doing this: we risk loading a bunch of random font files just because they
    happen to contain one of the glyphs, and we also risk loading the 'wrong' version
    of the glyphs if they happen to differ.
    """

    # start with the default
    font_names = [default_font]

    # look in the latin set next
    if default_font is not NOTO_SANS_LATIN:
        font_names.append(NOTO_SANS_LATIN)

    # then look at the rest of the supported languages' default fonts
    for lang_info in utils.supported_languages():
        name = lang_info[utils.KEY_DEFAULT_FONT]
        if name not in font_names:
            font_names.append(name)

    # finally look at the remaining langauges
    font_names.extend(
        [fn for fn in noto_source.FONT_MANIFEST if fn not in font_names])
    return font_names
Пример #6
0
def _csv_to_json():
    """
    Convert all CSV json files to JSON and ensure consistent diffs with ordered keys
    """

    for lang_object in utils.supported_languages(include_in_context=True):
        locale_path = utils.local_locale_path(lang_object)
        perseus_path = utils.local_perseus_locale_path(lang_object)

        csv_locale_dir_path = os.path.join(
            utils.local_locale_csv_path(), lang_object["crowdin_code"]
        )
        perseus_locale_dir_path = os.path.join(
            utils.local_perseus_locale_csv_path(), lang_object["crowdin_code"]
        )

        # Make sure that the Perseus directory for CSV_FILES/{lang_code} exists
        if not os.path.exists(perseus_locale_dir_path):
            os.makedirs(perseus_locale_dir_path)

        csv_dirs = os.listdir(csv_locale_dir_path) + os.listdir(perseus_locale_dir_path)

        for file_name in csv_dirs:
            if "csv" not in file_name:
                continue

            if file_name is PERSEUS_CSV:
                csv_path = os.path.join(perseus_locale_dir_path, file_name)
            else:
                csv_path = os.path.join(csv_locale_dir_path, file_name)

            # Account for csv reading differences in Pythons 2 and 3
            try:
                newline = None if sys.version_info[0] < 3 else ""
                mode = "r+b" if sys.version_info[0] < 3 else "r"
                encoding = None if sys.version_info[0] < 3 else "utf-8"
                csv_file = io.open(
                    csv_path, mode=mode, encoding=encoding, newline=newline
                )
            except EnvironmentError as e:
                logging.info("Failed to find CSV file in: {}".format(csv_path))
                continue

            with csv_file as f:
                csv_data = list(row for row in csv.DictReader(f))

            data = _locale_data_from_csv(csv_data)

            if file_name is PERSEUS_CSV:
                utils.json_dump_formatted(
                    data, perseus_path, file_name.replace("csv", "json")
                )
            else:
                utils.json_dump_formatted(
                    data, locale_path, file_name.replace("csv", "json")
                )
Пример #7
0
def upload_translations(branch):
    """
    Upload translations to the given branch
    """
    checkPerseus()
    checkApiKey()

    supported_languages = utils.supported_languages(include_in_context=False,
                                                    include_english=False)
    for lang_object in supported_languages:
        _upload_translation(branch, lang_object)
Пример #8
0
def loadLang():
    # from en_US to en
    system_lang = QtCore.QLocale.system().name()[:-3]
    _kwipe_trans = QtCore.QTranslator()
    _qt_trans = QtCore.QTranslator()
    for lang in utils.supported_languages():
        if system_lang == lang.split('_')[1][:-3]:
            _kwipe_trans.load(lang, path_to_files + '/language/')
    _qt_trans.load(
        'qt_' + QtCore.QLocale.system().name(),
        QtCore.QLibraryInfo.location(QtCore.QLibraryInfo.TranslationsPath))
    return _kwipe_trans, _qt_trans
Пример #9
0
def command_gen_full_fonts():
    logging.info("generating full fonts...")

    _clean_up(SCOPE_FULL)

    for font_name in noto_source.FONT_MANIFEST:
        _write_full_font(font_name, is_bold=False)
        _write_full_font(font_name, is_bold=True)

    languages = utils.supported_languages(include_in_context=True,
                                          include_english=True)
    for lang_info in languages:
        _gen_full_css_modern(lang_info)
        _gen_full_css_basic(lang_info)

    logging.info("finished generating full fonts")
Пример #10
0
def _format_json_files():
    """
    re-print all json files to ensure consistent diffs with ordered keys
    """
    locale_paths = []
    for lang_object in utils.supported_languages(include_in_context=True):
        locale_paths.append(utils.local_locale_path(lang_object))
        locale_paths.append(utils.local_perseus_locale_path(lang_object))
    for locale_path in locale_paths:
        for file_name in os.listdir(locale_path):
            if not file_name.endswith(".json"):
                continue
            file_path = os.path.join(locale_path, file_name)
            with io.open(file_path, mode="r", encoding="utf-8") as f:
                data = json.load(f)
            utils.json_dump_formatted(data, file_path)
Пример #11
0
def main(title, message, link_text):
    """
    Generate JSON suitable for sending in nutrition facts notifications
    """

    supported_languages = utils.supported_languages(include_in_context=False,
                                                    include_english=True)

    output = {}
    for lang_object in supported_languages:
        file_path = os.path.join(utils.local_locale_path(lang_object),
                                 FILE_NAME)
        i18n = {}

        # If the language code is "en", parse csv file instead of json file.
        # Note that `make i18n-extract-frontend` should have been run to generate the csv file.
        if lang_object[utils.KEY_INTL_CODE] == "en":
            file_path = file_path.replace("json", "csv")
            with open(file_path) as f:
                csv_reader = csv.DictReader(f)
                for row in csv_reader:
                    identifier = row["Identifier"]
                    if title == identifier:
                        i18n[I18N_TITLE] = row["Source String"]
                    if message == identifier:
                        i18n[I18N_MESSAGE] = row["Source String"]
                    if link_text == identifier:
                        i18n[I18N_LINK_TEXT] = row["Source String"]

        else:
            with open(file_path) as f:
                input_data = json.load(f)
            if title in input_data:
                i18n[I18N_TITLE] = input_data[title]
            if message in input_data:
                i18n[I18N_MESSAGE] = input_data[message]
            if link_text in input_data:
                i18n[I18N_LINK_TEXT] = input_data[link_text]
        output[lang_object[utils.KEY_INTL_CODE]] = i18n

    # output JSON
    print(
        json.dumps(output,
                   sort_keys=True,
                   indent=2,
                   separators=(",", ": "),
                   ensure_ascii=False))
Пример #12
0
def download_translations(branch):
    """
    Download translations from the given branch
    """
    checkPerseus()
    checkApiKey()

    logging.info("Crowdin: downloading '{}'...".format(branch))

    # delete previous files
    _wipe_translations(utils.LOCALE_PATH)
    _wipe_translations(utils.PERSEUS_LOCALE_PATH)

    for lang_object in utils.supported_languages(include_in_context=True):
        code = lang_object[utils.KEY_CROWDIN_CODE]
        url = DOWNLOAD_URL.format(language=code, branch=branch)
        r = requests.get(url)
        r.raise_for_status()
        z = zipfile.ZipFile(io.BytesIO(r.content))
        target = utils.local_locale_csv_path()
        logging.info("\tExtracting {} to {}".format(code, target))
        z.extractall(target)

        # hack for perseus
        perseus_target = os.path.join(utils.local_perseus_locale_csv_path(),
                                      lang_object["crowdin_code"])
        # TODO - Update this to work with perseus properly - likely to need to update
        # the kolibri-exercise-perseus-plugin repo directly to produce a CSV for its
        # translations.
        if not os.path.exists(perseus_target):
            os.makedirs(perseus_target)
        try:
            shutil.move(
                os.path.join(target, lang_object["crowdin_code"], PERSEUS_CSV),
                os.path.join(perseus_target, PERSEUS_CSV),
            )
        except Exception as e:
            logging.error("Ignoring an exception")
            logging.error(e)
            pass

    # TODO Don't need to format here... going to do this in the new command.
    _csv_to_json()  # clean them up to make git diffs more meaningful
    logging.info("Crowdin: download succeeded!")
Пример #13
0
def _format_json_files():
    """
    re-print all json files to ensure consistent diffs with ordered keys
    """

    for lang_object in utils.supported_languages(include_in_context=True):
        locale_path = utils.local_locale_path(lang_object)
        perseus_path = utils.local_perseus_locale_path(lang_object)

        csv_locale_dir_path = os.path.join(utils.local_locale_csv_path(),
                                           lang_object["crowdin_code"])
        for file_name in os.listdir(csv_locale_dir_path):
            if file_name.endswith("json"):
                # Then it is a Perseus JSON file - just copy it.
                source = os.path.join(csv_locale_dir_path, file_name)
                target = os.path.join(perseus_path, file_name)
                try:
                    os.makedirs(perseus_path)
                except:
                    pass
                shutil.copyfile(source, target)
                continue
            elif not file_name.endswith("csv"):
                continue

            csv_path = os.path.join(csv_locale_dir_path, file_name)

            # Account for csv reading differences in Pythons 2 and 3
            if sys.version_info[0] < 3:
                csv_file = open(csv_path, "rb")
            else:
                csv_file = open(csv_path, "r", newline="")

            with csv_file as f:
                csv_data = list(row for row in csv.DictReader(f))

            data = _locale_data_from_csv(csv_data)

            utils.json_dump_formatted(data, locale_path,
                                      file_name.replace("csv", "json"))
Пример #14
0
def _get_common_strings():
    """
    Text useful for all languages: displaying the language switcher, Kolibri version
    numbers, symbols, and other un-translated text
    """

    # Special characters that are used directly in untranslated template strings.
    # Search the codebase with this regex to find new ones: [^\x00-\x7F©–—…‘’“”•→›]
    strings = [
        chr(0x0),  # null
        "©",
        "–",  # en dash
        "—",  # em dash
        "…",
        "‘",
        "’",
        "“",
        "”",
        "•",
        "●",
        "→",
        "›",
    ]

    # all the basic printable ascii characters
    strings.extend([chr(c) for c in range(32, 127)])

    # text from language names, both lower- and upper-case
    languages = utils.supported_languages(include_in_context=True,
                                          include_english=True)
    for lang in languages:
        strings.append(lang[utils.KEY_LANG_NAME])
        strings.append(lang[utils.KEY_LANG_NAME].upper())
        strings.append(lang[utils.KEY_ENG_NAME])
        strings.append(lang[utils.KEY_ENG_NAME].upper())

    return strings
Пример #15
0
def translation_stats(branch):
    """
    Print stats for the given branch
    """
    checkApiKey()

    logging.info("Crowdin: getting details for '{}'...".format(branch))

    def _is_branch_node(node):
        return node["node_type"] == "branch" and node["name"] == branch

    needs_approval_table = []
    strings_total = 0
    untranslated_table = []
    words_total = 0

    sorted_languages = sorted(utils.supported_languages(),
                              key=lambda x: x[utils.KEY_ENG_NAME])
    for lang in sorted_languages:

        logging.info("Retrieving stats for {}...".format(
            lang[utils.KEY_ENG_NAME]))
        r = requests.post(
            LANG_STATUS_URL.format(language=lang[utils.KEY_CROWDIN_CODE]))
        r.raise_for_status()
        try:
            branch_node = next(node for node in r.json()["files"]
                               if _is_branch_node(node))
        except StopIteration:
            logging.error("Branch '{}' not found on Crowdin".format(branch))
            sys.exit(1)

        needs_approval_table.append((
            lang[utils.KEY_ENG_NAME],
            branch_node["words_translated"] - branch_node["words_approved"],
            branch_node["translated"] - branch_node["approved"],
        ))
        untranslated_table.append((
            lang[utils.KEY_ENG_NAME],
            branch_node["words"] - branch_node["words_translated"],
            branch_node["phrases"] - branch_node["translated"],
        ))

        strings_total = branch_node[
            "phrases"]  # should be the same across languages
        words_total = branch_node[
            "words"]  # should be the same across languages

    total_untranslated_strings = sum([row[2] for row in untranslated_table])
    total_unapproved_strings = sum([row[2] for row in needs_approval_table])

    avg_untranslated_strings = round(total_untranslated_strings /
                                     len(untranslated_table))
    avg_unapproved_strings = round(total_unapproved_strings /
                                   len(needs_approval_table))

    total_untranslated_words = sum([row[1] for row in untranslated_table])
    total_unapproved_words = sum([row[1] for row in needs_approval_table])

    avg_untranslated_words = round(total_untranslated_words /
                                   len(untranslated_table))
    avg_unapproved_words = round(total_unapproved_words /
                                 len(needs_approval_table))

    summary_table_headers = ["", "Words", "Strings"]
    summary_table = [
        ("Avg. Untranslated", avg_untranslated_words,
         avg_untranslated_strings),
        ("Avg. Needs Approval", avg_unapproved_words, avg_unapproved_strings),
        ("Total (for a new language)", words_total, strings_total),
    ]
    needs_approval_table_headers = ["Language", "Words", "Strings"]
    untranslated_table_headers = ["Language", "Words", "Strings"]

    logging.info(
        STATS_TEMPLATE.format(
            branch=branch,
            summary_table=tabulate(summary_table,
                                   headers=summary_table_headers),
            untranslated_table=tabulate(untranslated_table,
                                        headers=untranslated_table_headers),
            needs_approval_table=tabulate(
                needs_approval_table, headers=needs_approval_table_headers),
        ))
Пример #16
0
def command_upload_translations(branch):
    supported_languages = utils.supported_languages(
        include_in_context=False, include_english=False
    )
    for lang_object in supported_languages:
        _upload_translation(branch, lang_object)