def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map): result = {} for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"): locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k), calendars.keys()) language_id = languageNameToId(locale.language, language_map) if language_id == -1: sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) script_id = scriptNameToId(locale.script, script_map) if script_id == -1: sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script) country_id = countryNameToId(locale.country, country_map) if country_id == -1: sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) if language_id != 1: # C if country_id == 0: sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language) if script_id == 0: # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags) for key in likely_subtags_map.keys(): tmp = likely_subtags_map[key] if tmp["from"][0] == locale.language and tmp["from"][ 1] == "AnyScript" and tmp["from"][ 2] == locale.country: locale.script = tmp["to"][1] script_id = scriptNameToId(locale.script, script_map) break if script_id == 0 and country_id != 0: # try with no country for key in likely_subtags_map.keys(): tmp = likely_subtags_map[key] if tmp["from"][0] == locale.language and tmp["from"][ 1] == "AnyScript" and tmp["from"][ 2] == "AnyCountry": locale.script = tmp["to"][1] script_id = scriptNameToId(locale.script, script_map) break result[(language_id, script_id, country_id)] = locale return result
def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map): result = {} for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"): locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k)) language_id = languageNameToId(locale.language, language_map) if language_id == -1: sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) script_id = scriptNameToId(locale.script, script_map) if script_id == -1: sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script) country_id = countryNameToId(locale.country, country_map) if country_id == -1: sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) if language_id != 1: # C if country_id == 0: sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language) if script_id == 0: # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags) for key in likely_subtags_map.keys(): tmp = likely_subtags_map[key] if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country: locale.script = tmp["to"][1] script_id = scriptNameToId(locale.script, script_map) break if script_id == 0 and country_id != 0: # try with no country for key in likely_subtags_map.keys(): tmp = likely_subtags_map[key] if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry": locale.script = tmp["to"][1] script_id = scriptNameToId(locale.script, script_map) break result[(language_id, script_id, country_id)] = locale return result
def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""): if not path.endswith(".xml"): return {} if language_code == 'root': # just skip it return {} # we do not support variants # ### actually there is only one locale with variant: en_US_POSIX # does anybody care about it at all? if variant_code: raise xpathlite.Error('we do not support variants ("%s")' % variant_code) language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: raise xpathlite.Error('unknown language code "%s"' % language_code) script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: raise xpathlite.Error('unknown script code "%s"' % script_code) # we should handle fully qualified names with the territory if not country_code: return {} country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: raise xpathlite.Error('unknown country code "%s"' % country_code) # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html # But we only respect the resolution for new datas for backward # compatibility. draft = DraftResolution.contributed result = dict( language=enumdata.language_list[language_id][0], language_code=language_code, language_id=language_id, script=enumdata.script_list[script_id][0], script_code=script_code, script_id=script_id, country=enumdata.country_list[country_id][0], country_code=country_code, country_id=country_id, variant_code=variant_code) (dir_name, file_name) = os.path.split(path) def from_supplement(tag, path=os.path.join(dir_name, '..', 'supplemental', 'supplementalData.xml')): return findTagsInFile(path, tag) currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code) result['currencyIsoCode'] = '' result['currencyDigits'] = 2 result['currencyRounding'] = 1 if currencies: for e in currencies: if e[0] == 'currency': t = [x[1] == 'false' for x in e[1] if x[0] == 'tender'] if t and t[0]: pass elif not any(x[0] == 'to' for x in e[1]): result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next() break if result['currencyIsoCode']: t = from_supplement("currencyData/fractions/info[iso4217=%s]" % result['currencyIsoCode']) if t and t[0][0] == 'info': result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next() result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next() numbering_system = None try: numbering_system = findEntry(path, "numbers/defaultNumberingSystem") except: pass def findEntryDef(path, xpath, value=''): try: return findEntry(path, xpath) except xpathlite.Error: return value def get_number_in_system(path, xpath, numbering_system): if numbering_system: try: return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") except xpathlite.Error: # in CLDR 1.9 number system was refactored for numbers (but not for currency) # so if previous findEntry doesn't work we should try this: try: return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) except xpathlite.Error: # fallback to default pass return findEntry(path, xpath) result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system) result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system) result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system) result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) try: numbering_systems = {} for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', 'numberingSystems.xml'), 'numberingSystems'): tmp = {} id = "" for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] tmp[data[0]] = data[1] if data[0] == u"id": id = data[1] numbering_systems[id] = tmp result['zero'] = numbering_systems[numbering_system][u"digits"][0] except e: sys.stderr.write("Native zero detection problem:\n" + str(e) + "\n") result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() result['quotationStart'] = findEntry(path, "delimiters/quotationStart") result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd") result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]")) result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]")) result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]")) result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]")) result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft) result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft) result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern")) result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern")) result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern")) result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern")) endonym = None if country_code and script_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code)) if not endonym and script_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code)) if not endonym and country_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code)) if not endonym: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code)) result['language_endonym'] = endonym result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code)) currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system) currency_format = parse_number_format(currency_format, result) result['currencyFormat'] = currency_format[0] result['currencyNegativeFormat'] = '' if len(currency_format) > 1: result['currencyNegativeFormat'] = currency_format[1] result['currencySymbol'] = '' result['currencyDisplayName'] = '' if result['currencyIsoCode']: result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) result['currencyDisplayName'] = ';'.join( findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] + ']/displayName' + tail) for tail in ['',] + [ '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') ]) + ';' def findUnitDef(path, stem, fallback=''): # The displayName for a quantified unit in en.xml is kByte # instead of kB (etc.), so prefer any unitPattern provided: for count in ('many', 'few', 'two', 'other', 'zero', 'one'): try: ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) except xpathlite.Error: continue # TODO: epxloit count-handling, instead of discarding placeholders if ans.startswith('{0}'): ans = ans[3:].lstrip() if ans: return ans return findEntryDef(path, stem + 'displayName', fallback) # First without quantifier, then quantified each way: result['byte_unit'] = findEntryDef( path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName', 'bytes') stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/' known = [] # cases where we *do* have a given version: result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known)) # IEC 60027-2 # http://physics.nist.gov/cuu/Units/binary.html result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known)) # Used for month and day data: namings = ( ('standaloneLong', 'stand-alone', 'wide'), ('standaloneShort', 'stand-alone', 'abbreviated'), ('standaloneNarrow', 'stand-alone', 'narrow'), ('long', 'format', 'wide'), ('short', 'format', 'abbreviated'), ('narrow', 'format', 'narrow'), ) # Month data: for cal in ('gregorian',): # We shall want to add to this stem = 'dates/calendars/calendar[' + cal + ']/months/' for (key, mode, size) in namings: prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' result[key + 'Months'] = ';'.join( findEntry(path, stem + prop + "month[%d]" % i) for i in range(1, 13)) + ';' # Day data (for Gregorian, at least): stem = 'dates/calendars/calendar[gregorian]/days/' days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') for (key, mode, size) in namings: prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' result[key + 'Days'] = ';'.join( findEntry(path, stem + prop + '[' + day + ']') for day in days) + ';' return Locale(result)
sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) continue # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags if to_country == "AnyCountry" and from_country != to_country: to_country = from_country if to_script == "AnyScript" and from_script != to_script: to_script = from_script print " <likelySubtag>" print " <from>" print " <language>" + from_language + "</language>" print " <script>" + from_script + "</script>" print " <country>" + from_country + "</country>" print " </from>" print " <to>" print " <language>" + to_language + "</language>" print " <script>" + to_script + "</script>" print " <country>" + to_country + "</country>" print " </to>" print " </likelySubtag>" print " </likelySubtags>" print " <localeList>" Locale.C().toXml() for key in locale_keys: locale_database[key].toXml() print " </localeList>" print "</localeDatabase>"
continue # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags if to_country == "AnyCountry" and from_country != to_country: to_country = from_country if to_script == "AnyScript" and from_script != to_script: to_script = from_script print " <likelySubtag>" print " <from>" print " <language>" + from_language + "</language>" print " <script>" + from_script + "</script>" print " <country>" + from_country + "</country>" print " </from>" print " <to>" print " <language>" + to_language + "</language>" print " <script>" + to_script + "</script>" print " <country>" + to_country + "</country>" print " </to>" print " </likelySubtag>" print " </likelySubtags>" if skips: wrappedwarn('skipping likelySubtags (for unknown language codes): ', skips) print " <localeList>" Locale.C(calendars).toXml(calendars) for key in locale_keys: locale_database[key].toXml(calendars) print " </localeList>" print "</localeDatabase>"