示例#1
0
def coverage(font, threshold=10):
    cmap = set(chr(c) for c in font.getBestCmap())

    languages = set()
    scripts = set()
    partial = {}

    for locale in Locale.getAvailableLocales():
        data = LocaleData(locale)
        examplar = set("".join(data.getExemplarSet()))
        if not cmap.isdisjoint(examplar):
            locale = Locale(locale)
            locale.addLikelySubtags()
            diff = examplar - cmap
            if not diff:
                scripts.add(locale.getDisplayScript())
                languages.add(locale.getDisplayLanguage())
            elif len(diff) <= threshold:
                partial[locale.getDisplayLanguage()] = diff

    return scripts, languages, partial
示例#2
0
try:
    from icu import Locale, Collator
    HAVE_ICU = True
except ImportError:
    try:
        from PyICU import Locale, Collator
        HAVE_ICU = True
    except ImportError as err:
        # No logger, save the warning message for later.
        _icu_err = (
            "ICU not loaded because %s. Localization will be impaired. "
            "Use your package manager to install PyICU" % str(err))

ICU_LOCALES = None
if HAVE_ICU:
    ICU_LOCALES = Locale.getAvailableLocales()

# Map of languages for converting to Microsoft locales and naming
# locales for display to the user.  It's important to add to this list
# when a new translation is added.  Note the dummy _(): That's just to
# get xgettext to include the string in gramps.pot; actual translation
# is done in _get_language_string() below.
# (The gramps officially-supported language list is ALL_LINGUAS in setup.py)
_ = lambda x: x
_LOCALE_NAMES = {
    'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")),
    'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")),
    'br': (None, None, _("Breton")),  #Windows has no translation for Breton
    'ca': ('Catalan_Spain', '1252', _("Catalan")),
    'cs': ('Czech_Czech Republic', '1250', _("Czech")),
    'da': ('Danish_Denmark', '1252', _("Danish")),
示例#3
0
# -*- coding: utf-8 -*-
"""

"""
import os
import json

from icu import Locale

BASE_PATH = os.path.dirname(os.path.abspath(__file__))

locales = []
for locale in Locale.getAvailableLocales().values():
    locales.append({'locale': locale.getName(),
                    'name': locale.getDisplayName(locale)})

json.dump(locales, open(os.path.join(BASE_PATH, 'locales.json'), 'w'))
示例#4
0
# LOG.setLevel(logging.DEBUG)
try:
    from icu import Locale, Collator
    HAVE_ICU = True
except ImportError:
    try:
        from PyICU import Locale, Collator
        HAVE_ICU = True
    except ImportError as err:
        # No logger, save the warning message for later.
        _icu_err = ("ICU not loaded because %s. Localization will be impaired. "
                    "Use your package manager to install PyICU" % str(err))

ICU_LOCALES = None
if HAVE_ICU:
    ICU_LOCALES = Locale.getAvailableLocales()

# Map of languages for converting to Microsoft locales and naming
# locales for display to the user.  It's important to add to this list
# when a new translation is added.  Note the dummy _(): That's just to
# get xgettext to include the string in wearnow.pot; actual translation
# is done in _get_language_string() below.
# (The wearnow officially-supported language list is ALL_LINGUAS in setup.py)
_ = lambda x: x
_LOCALE_NAMES = {
    'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")),
    'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")),
    'br': (None, None, _("Breton")), #Windows has no translation for Breton
    'ca': ('Catalan_Spain', '1252', _("Catalan")),
    'cs': ('Czech_Czech Republic', '1250', _("Czech")),
    'da': ('Danish_Denmark', '1252', _("Danish")),
示例#5
0
    fonts = get_sys_fonts()
    font_names = np.array(list(fonts.keys()))
    print("Found %d system fonts" % len(fonts), file=sys.stderr)

    # Make a huge sparse binary matrix that gives the availability of glyphs for each char in each font
    glyph_avail = lil_matrix(
        (max(b['stop'] for b in unicode_blocks.values()), len(font_names)),
        dtype=np.uint8)
    for i, (name, font) in tqdm(enumerate(fonts.items()),
                                total=len(fonts),
                                desc="Checking glyph availability"):
        chars = get_unicode_tables_by_font(font)
        glyph_avail[chars, i] = 1

    # Package all locales
    locales = [(k, v) for k, v in Locale.getAvailableLocales().items()
               if k in [
                   'en', 'te', 'th', 'vi', 'ar', 'he', 'km', 'ta', 'gu', 'bn',
                   'ml', 'el', 'ru', 'ko', 'zh', 'ja'
               ]]
    for code, locale in tqdm(locales, desc="Packaging locales"):
        chars = get_locale_chars(code, unicode=True)
        name = locale.getDisplayName().encode('ascii',
                                              'ignore').decode('ascii')

        char_codes = np.sort(list(
            chain(*chars.values())))  # Unicode code of each char
        row_by_code = dict(zip(char_codes, range(len(
            char_codes))))  # Where each code appears in the resulting matrix

        # Convert the char dict to use matrix indices