class AlphabeticIndex(icuAlphabeticIndex): """ Call the ICU AlphabeticIndex, passing the ICU Locale """ def __init__(self, rlocale): self.iculocale = Locale(rlocale.collation) super().__init__(self.iculocale) # set the maximum number of buckets, the undocumented default is 99 # Latin + Greek + Cyrillic + Hebrew + Arabic + Tamil + Hiragana + # CJK Unified is about 206 different buckets self.maxLabelCount = 500 # pylint: disable=invalid-name # Add bucket labels for scripts other than the one for the output # which is being generated self.iculocale.addLikelySubtags() default_script = self.iculocale.getDisplayScript() used_scripts = [default_script] for lang_code in glocale.get_language_dict().values(): loc = Locale(lang_code) loc.addLikelySubtags() script = loc.getDisplayScript() if script not in used_scripts: used_scripts.append(script) super().addLabels(loc)
def gen_khm_words(text: str) -> str: bi = BreakIterator.createWordInstance(Locale("km")) bi.setText(text) start = bi.first() for end in bi: yield text[start:end] start = end
def index(request): preferred_language = translation.get_language() collator = Collator.createInstance(Locale(preferred_language)) if request.GET.get('aspect'): aspect = request.GET['aspect'] else: aspect = 'Collection' try: aspect_uri = ROUTABLES[aspect] except KeyError: aspect_uri = ROUTABLES['Collection'] this_results = [] for res in graph.subjects(RDF.type, aspect_uri): r = Resource(graph,res) if Resource(graph,UNBIST.PlaceName) in list(r[RDF.type]): continue res_label = get_preferred_label(res,preferred_language) this_results.append({'uri': res, 'pref_label':res_label}) #sorted_results = sorted(this_results, key=lambda tup: tup['pref_label'], cmp=collator.compare) sorted_results = sorted(this_results, key=lambda tup: tup['pref_label']) try: page = request.GET.get('page',1) except PageNotAnInteger: page = 1 p = Paginator(sorted_results, 20, request=request) paginated_results = p.page(page) return render(request, 'thesaurus/index.html', {'results': paginated_results, 'target': 'instances', 'aspect':aspect })
def widget(cls, field, value, collation=None, **attributes): """ Generates a SELECT tag, including OPTIONs (only 1 option allowed) see also: `FormWidget.widget` """ default = dict(value=value) attr = cls._attributes(field, default, **attributes) requires = field.requires if not isinstance(requires, (list, tuple)): requires = [requires] if requires: if hasattr(requires[0], 'options'): options = requires[0].options() else: raise SyntaxError('widget cannot determine options of %s' % field) if collation: myloc = Locale(collation) coll = Collator.createInstance(myloc) options = sorted(options, key=itemgetter(1), cmp=coll.compare) opts = [OPTION(v, _value=k) for (k, v) in options] return SELECT(*opts, **attr)
def _gen_words(text: str) -> str: bd = BreakIterator.createWordInstance(Locale("th")) bd.setText(text) p = bd.first() for q in bd: yield text[p:q] p = q
def __init__(self, *args, **kwargs): '''Initialize a unicode dictionary. The signature is changed because the kwargs are used to set the comparison details ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) if len(args) == 1 and isinstance(args[0],self.__class__): locale = args[0].locale if 'locale' not in kwargs else kwargs.pop('locale') comparison_level = args[0].comparison_level if 'comparison_level' \ not in kwargs else kwargs.pop('comparison_level') case_sensitive = args[0].case_sensitive if 'case_sensitive' \ not in kwargs else kwargs.pop('case_sensitive') else: locale = kwargs.pop('locale','en_US') comparison_level = max(0,min(3,kwargs.pop('comparison_level',0))) case_sensitive = kwargs.pop('case_sensitive', False) self.__locale = Locale(locale) self.__collator = Collator.createInstance(self.__locale) self.__collator.setStrength(comparison_level) self.__collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF) if len(args) == 1: if isinstance(args[0],Mapping): vals = list(args[0].items()) else: vals = args[0] for key,val in vals: self.__setitem__(key,val)
def sorted_(data, l, series=False, i=False): loc = Locale.forLanguageTag(l) collator = Collator.createInstance(loc) if isinstance(data, dict): sorted_data = sorted(data.items(), key=lambda x: collator.getSortKey(x[i])) return dict(sorted_data)
def _localize_timezones(locale: babel.Locale) -> LocalizedTimezone: zones_and_aliases = _read_timezone_ids_and_aliases() # locale.language: 'en' or 'en_US' collator = Collator.createInstance(Locale.createFromName(locale.language)) return [ _localize_timezone(zone, aliases, locale, collator) for zone, aliases in zones_and_aliases.items() ]
def default_locale(self): """The default locale from the configuration as instance of a `icu.Locale` object. """ default = self.app.config['ICU_DEFAULT_LOCALE'] if default is None: default = 'en' return Locale(default)
def sortkey_length(strength, word): c = Collator.createInstance(Locale('')) c.setStrength(strength) c.setAttribute( UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED, ) coll_key = c.getSortKey(word) return len(coll_key) - 1 # subtract 1 for ending \x00 byte
def __init__(self, locale="en"): from icu import Locale, BreakIterator # ICU includes lists of common abbreviations that can be used to filter, to ignore, # these false sentence boundaries for some languages. # (http://userguide.icu-project.org/boundaryanalysis) if locale in {"en", "de", "es", "it", "pt"}: locale += "@ss=standard" self.locale = Locale(locale) self.breaker = BreakIterator.createSentenceInstance(self.locale)
def primary_difference(prev_key, new_key, rlocale=glocale): """ Try to use the PyICU collation. If we generate a report for another language, make sure we use the good collation sequence """ collate_lang = Locale(rlocale.collation) collation = Collator.createInstance(collate_lang) collation.setStrength(Collator.PRIMARY) return collation.compare(prev_key, new_key) != 0
def _compute_char_brkpoints(self): """ This function uses ICU BreakIterator to identify and store extended grapheme clusters. """ chars_break_iterator = BreakIterator.createCharacterInstance( Locale.getRoot()) chars_break_iterator.setText(self.unsegmented) self.char_brkpoints = [0] for brkpoint in chars_break_iterator: self.char_brkpoints.append(brkpoint)
def sortkey(strength, maxlength=None): c = Collator.createInstance(Locale('')) c.setStrength(strength) c.setAttribute( UCollAttribute.ALTERNATE_HANDLING, UCollAttributeValue.SHIFTED, ) if maxlength is None: return c.getSortKey else: return lambda x: c.getSortKey(x)[:maxlength]
class UnicodeStrFactory(object): def __init__(self,locale="EN_US",comparison_level=0,case_sensitive=False): comparison_level = max(0,min(3,comparison_level)) self.__locale = Locale(locale) self.__collator = Collator.createInstance(self.__locale) self.__collator.setStrength(comparison_level) self.__collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF) if comparison_level == 0 and case_sensitive == False: self.__base_coll = self.__collator else: self.__base_coll = Collator.createInstance(self.__locale) self.__base_coll.setStrength(0) self.__base_coll.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.OFF) @property def locale(self): return self.__locale.getName() @property def comparison_level(self): return self.__collator.getStrength() @property def case_sensitive(self): return self.__collator.getAttribute(UCollAttribute.CASE_LEVEL) == UCollAttributeValue.ON @property def collator(self): return self.__collator def coll_len(self,string): return len(self.__base_coll.getSortKey(string))-1 __marker = object() def __call__(self,obj,encoding=__marker, errors='strict'): class unicode_str(unicode_str_base): _factory = self if encoding == self.__marker: return unicode_str(obj) else: return unicode_str(obj,encoding=encoding,errors=errors) def __reduce__(self): inst_dict = vars(self).copy() for k in vars(self.__class__()): inst_dict.pop(k, None) return (self.__class__, (self.locale,self.comparison_level,self.case_sensitive), inst_dict) def key_for_caching(self,word): return self(word).key_for_caching()
def get_installed_translations(): # # returns a list of translated installed languages, (de, es)... # and a list of lang/country combos for that language (de_DE, de_AT)... # import locale import gettext la_list = [] la_co_list = [] for (ident, la_co) in locale.windows_locale.iteritems(): if gettext.find("fpdb", localedir="locale", languages=[la_co]): if "_" in la_co: la, co = la_co.split("_", 1) la_list.append(la) else: la_list.append(la_co) la_co_list.append(la_co) # # eliminate dupes # la_set = set(la_list) la_list = list(la_set) la_dict = {} la_co_dict = {} try: from icu import Locale for code in la_list: la_dict[code] = Locale.getDisplayName(Locale(code)) for code in la_co_list: la_co_dict[code] = Locale.getDisplayName(Locale(code)) except: for code in la_list: la_dict[code] = code for code in la_co_list: la_co_dict[code] = code return la_dict, la_co_dict
def sort(iterable, loc, key=None, reverse=False): """ Creates new sorted list from passed list (or any iterable data) according to the passed locale. arguments: iterable -- iterable object (typically a list or a tuple) loc -- locale identifier (e.g. cs_CZ.UTF-8, en_US,...) key -- access to sorted value reverse -- whether the result should be in reversed order (default is False) """ collator = Collator.createInstance(Locale(loc)) return sorted(iterable, cmp=collator.compare, key=key, reverse=reverse)
def icu_format_message(locale_id: str, message: str, arguments: _MessageArguments = {}) -> str: """Substitute arguments into ICU-style message. You can have variable substitution, plurals, selects and nested messages. Raises `ICUError` in case of incorrectly formatted message. The arguments must be a dict """ return MessageFormat(message, Locale.createFromName(locale_id)).format( list(arguments.keys()), [Formattable(x) for x in arguments.values()])
def divideIntoWords(txt, locale): loc = Locale.createFromName(locale) bi = BreakIterator.createWordInstance(loc) #print txt bi.setText(txt) res = [] while True: try: #print bi.next() res.append(bi.next()) except StopIteration: return res
def get_installed_translations(): # # returns a list of translated installed languages, (de, es)... # and a list of lang/country combos for that language (de_DE, de_AT)... # import locale import gettext la_list = [] la_co_list = [] for (ident, la_co) in locale.windows_locale.iteritems(): if gettext.find("fpdb", localedir="locale", languages=[la_co]): if "_" in la_co: la, co = la_co.split("_", 1) la_list.append(la) else: la_list.append(la_co) la_co_list.append(la_co) # #eliminate dupes # la_set = set(la_list) la_list = list(la_set) la_dict = {} la_co_dict = {} try: from icu import Locale for code in la_list: la_dict[code] = Locale.getDisplayName(Locale(code)) for code in la_co_list: la_co_dict[code] = Locale.getDisplayName(Locale(code)) except: for code in la_list: la_dict[code] = code for code in la_co_list: la_co_dict[code] = code return la_dict, la_co_dict
def sort_for_script(cp_list, script): lang = lang_for_script(script) if not lang: print 'cannot sort for script, no lang for %s' % script return cp_list if _HAVE_ICU: from icu import Locale, Collator loc = Locale(lang + '_' + script) col = Collator.createInstance(loc) return sorted(cp_list, cmp=col.compare) else: import locale return sorted(cp_list, cmp=locale.strcoll)
def main(): print "ICU Break Iterator Sample Program" print "C++ Break Iteration in Python" stringToExamine = u"Aaa bbb ccc. Ddd eee fff." print "Examining: ", stringToExamine # print each sentence in forward and reverse order boundary = BreakIterator.createSentenceInstance(Locale.getUS()) boundary.setText(stringToExamine) print print "Sentence Boundaries... " print "----- forward: -----------" printEachForward(boundary) print "----- backward: ----------" printEachBackward(boundary) # print each word in order print print "Word Boundaries..." boundary = BreakIterator.createWordInstance(Locale.getUS()) boundary.setText(stringToExamine) print "----- forward: -----------" printEachForward(boundary) # print first element print "----- first: -------------" printFirst(boundary) # print last element print "----- last: --------------" printLast(boundary) # print word at charpos 10 print "----- at pos 10: ---------" printAt(boundary, 10) print print "End C++ Break Iteration in Python"
class Language(object): def __init__(self, choice): basic_name, code, confidence, bytesize = choice self.locale = Locale(code) self.confidence = float(confidence) self.read_bytes = int(bytesize) @property def name(self): return self.locale.getDisplayLanguage() @property def code(self): return self.locale.getName() def __str__(self): return ("name: {:<12}code: {:<9}confidence: {:>5.1f} " "read bytes:{:>6}".format(self.name, self.code, self.confidence, self.read_bytes)) @staticmethod def from_code(code): return Language(("", code, 100, 0))
def __init__(self,locale="EN_US",comparison_level=0,case_sensitive=False): comparison_level = max(0,min(3,comparison_level)) self.__locale = Locale(locale) self.__collator = Collator.createInstance(self.__locale) self.__collator.setStrength(comparison_level) self.__collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF) if comparison_level == 0 and case_sensitive == False: self.__base_coll = self.__collator else: self.__base_coll = Collator.createInstance(self.__locale) self.__base_coll.setStrength(0) self.__base_coll.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.OFF)
def worker(path, outdir, with_sorting=True): collator = Collator.createInstance(Locale("pl_PL.UTF-8")) separator = re.compile("[\W\d]+") filepath = path.replace(".yml", ".txt") with open(filepath) as file: text = file.read().lower().rstrip() words = set(re.split(separator, text)) with open(path) as file: meta = yaml.safe_load(file) with open(f"{outdir}/extracted-words-for-{meta['label']}.txt", "w") as file: if with_sorting: words = sorted(words, key=collator.getSortKey) file.write("\n".join(words)) return path
def _compute_icu_segmented(self): """ This function computes the ICU segmented version of the line using the unsegmented version. Therefore, in order to use it the unsegmented version must have been already computed. """ words_break_iterator = BreakIterator.createWordInstance( Locale.getRoot()) words_break_iterator.setText(self.unsegmented) self.icu_word_brkpoints = [0] for brkpoint in words_break_iterator: self.icu_word_brkpoints.append(brkpoint) self.icu_segmented = "|" for i in range(len(self.icu_word_brkpoints) - 1): self.icu_segmented += self.unsegmented[ self.icu_word_brkpoints[i]:self.icu_word_brkpoints[i + 1]] + "|"
def sort_choices(choices): ''' Sorts choices alphabetically. Either using cmp or ICU. ''' if not HAS_ICU: sorter = cmp else: sorter = Collator.createInstance(Locale(get_language())).compare # Actually sort values return sorted( choices, key=lambda tup: tup[1], cmp=sorter )
def coverage(font, threshold=10): cmap = set(chr(c) for c in font.getBestCmap()) languages = set() scripts = set() partial = {} for locale in Locale.getAvailableLocales(): data = LocaleData(locale) examplar = set("".join(data.getExemplarSet())) if not cmap.isdisjoint(examplar): locale = Locale(locale) locale.addLikelySubtags() diff = examplar - cmap if not diff: scripts.add(locale.getDisplayScript()) languages.add(locale.getDisplayLanguage()) elif len(diff) <= threshold: partial[locale.getDisplayLanguage()] = diff return scripts, languages, partial
def character_tokenize(self, word): """ Returns the tokenization in character level. Arguments: word {string} -- word to be tokenized in character level. Returns: [list] -- list of characters. """ temp_ = BreakIterator.createCharacterInstance(Locale()) temp_.setText(word) char = [] i = 0 for j in temp_: s = word[i:j] char.append(s) i = j return char
def icu_format_html_message( locale_id: str, message: str, arguments: _MessageArguments = {}, tags: _TagMapping = {}, ) -> str: """Substitute arguments into ICU-style HTML message. You can have variable substitution, plurals, selects and nested messages. You can also replace HTML tag placeholders. Raises `ICUError` in case of incorrectly formatted message. """ return MessageFormat(restore_tags( message, tags), Locale.createFromName(locale_id)).format( list(arguments.keys()), [ Formattable(escape(x) if isinstance(x, str) else x) for x in arguments.values() ], )
def endElement(self, name): if name == u"Unicode": self.__isUni = False loc = Locale.createFromName("utf-8") bi = BreakIterator.createWordInstance(loc) bi.setText(self.__uniText) tokens = [] prev = 0 while True: try: ind = bi.next() tokens.append(self.__uniText[prev:ind]) prev = ind except StopIteration: break text = u"" for t in tokens: text += processToken(t) self.__downstream.characters(text) self.__downstream.endElement(name)
def get_locale(): """Returns the locale that should be used for this request as `icu.Locale` object. Returns `None` if used outside of a request. """ ctx = _request_ctx_stack.top if ctx is None: return None locale = getattr(ctx, 'icu_locale', None) if locale is None: icu = ctx.app.extensions['icu'] if icu.locale_selector_func is None: locale = icu.default_locale else: rv = icu.locale_selector_func() if rv is None: locale = icu.default_locale else: locale = Locale(rv) ctx.icu_locale = locale return locale
def sort(iterable, loc, key=None, reverse=False): """ Creates new sorted list from passed list (or any iterable data) according to the passed locale. arguments: iterable -- iterable object (typically a list or a tuple) loc -- locale identifier (e.g. cs_CZ.UTF-8, en_US,...) key -- access to sorted value reverse -- whether the result should be in reversed order (default is False) """ if not loc: raise LocalizationError( 'cannot sort string due to missing locale information (probably a configuration issue)') collator = Collator.createInstance(Locale(loc)) if key is None: kf = cmp_to_key(collator.compare) else: def tmp(v1, v2): return collator.compare(key(v1), key(v2)) kf = cmp_to_key(tmp) return sorted(iterable, key=kf, reverse=reverse)
def cross_validate(english_value, other_language_value, other_language, key_name=None): this_lang = other_language.split("/")[-1].split(".js")[0].replace("-", "_") this_lang = Locale(this_lang).getDisplayName(english_lang) if other_language_value is None: print( "🟡 In" + Style.BRIGHT + Fore.YELLOW, f"{this_lang}" + Style.RESET_ALL, f"there is no value for {Fore.YELLOW + key_name + Fore.WHITE}.", ) elif type(english_value) != type(other_language_value): raise Exception( f"The type of the English value ({english_value}) and the type of" + f"{this_lang}'s value ({other_language_value}) are different for key {key_name}." ) elif isinstance(english_value, dict): for name, item in english_value.items(): cross_validate(item, other_language_value.get(name), other_language, name)
def make_collator(request): loc = Locale.createFromName(request.locale_name) return Collator.createInstance(loc)
# -*- coding: utf-8 -*- """ """ import os import json from icu import Locale BASE_PATH = os.path.dirname(os.path.abspath(__file__)) locales = [] for locale in Locale.getAvailableLocales().values(): locales.append({'locale': locale.getName(), 'name': locale.getDisplayName(locale)}) json.dump(locales, open(os.path.join(BASE_PATH, 'locales.json'), 'w'))
def __init__(self, choice): basic_name, code, confidence, bytesize = choice self.locale = Locale(code) self.confidence = float(confidence) self.read_bytes = int(bytesize)
class unicode_set(set): '''Set that support unicode comparison as defined by icu (UCA) It uses a dict as the underlying storage instead of the built-in set despite the performance difference since it needs to keep anyway a mapping dict ''' def __init__(self, *args, **kwargs): '''Initialize a unicode set. The signature is changed because the kwargs are used to set the comparison details ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) if len(args) == 1 and isinstance(args[0],self.__class__): locale = args[0].locale if 'locale' not in kwargs else kwargs.pop('locale') comparison_level = args[0].comparison_level if 'comparison_level' \ not in kwargs else kwargs.pop('comparison_level') case_sensitive = args[0].case_sensitive if 'case_sensitive' \ not in kwargs else kwargs.pop('case_sensitive') else: locale = kwargs.pop('locale','en_US') comparison_level = max(0,min(3,kwargs.pop('comparison_level',0))) case_sensitive = kwargs.pop('case_sensitive', False) self.__locale = Locale(locale) self.__collator = Collator.createInstance(self.__locale) self.__collator.setStrength(comparison_level) self.__collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF) self.__values = {} # set implementation if len(args) == 1: vals = args[0] for val in vals: self.add(val) @property def locale(self): return self.__locale.getName() @property def comparison_level(self): return self.__collator.getStrength() @property def case_sensitive(self): return self.__collator.getAttribute(UCollAttribute.CASE_LEVEL) == UCollAttributeValue.ON def __in_key(self,key): return self.__collator.getSortKey(key) if isinstance(key,basestring) else key def __in_equality(self,other): return self.locale == other.locale and\ self.comparison_level == other.comparison_level and \ self.case_sensitive == other.case_sensitive def add(self,val): '''Add an element to a set. This has no effect if the element is already present. ''' self.__values[self.__in_key(val)] = val def clear(self): '''Remove all elements from this set. ''' self.__values.clear() def copy(self): '''Return a shallow copy of a set. ''' return self.__class__(self) def difference(self, *args): '''Return the difference of two or more sets as a new set. (i.e. all elements that are in this set but not the others.) ''' ret = self.__class__(self) ret.difference_update(*args) return ret def difference_update(self, *args): '''Remove all elements of another set from this set. ''' if len(args) > 1: for arg in args: self.difference_update(arg) else: arg = args[0] if isinstance(arg,self.__class__) and self.__in_equality(arg): for i in arg.__values.iterkeys(): if i in self.__values: del self.__values[i] else: for i in arg: i = self.__in_key(i) if i in self.__values: del self.__values[i] def discard(self,val): '''Remove an element from a set if it is a member. If the element is not a member, do nothing. ''' try: self.remove(val) except KeyError: pass def intersection(self,*args): '''Return the intersection of two or more sets as a new set. (i.e. elements that are common to all of the sets.) ''' ret = self.__class__(self) ret.intersection_update(*args) return ret def intersection_update(self,*args): '''Update a set with the intersection of itself and another. ''' if len(args) > 1: for arg in args: self.intersection_update(arg) else: if isinstance(args[0],self.__class__) and self.__in_equality(args[0]): arg = args[0] else: arg = self.__class__(args[0], locale = self.locale, case_sensitive = self.case_sensitive, comparison_level = self.comparison_level) for k,v in self.__values.items(): if v not in arg: del self.__values[k] def isdisjoint(self,other): '''Return True if two sets have a null intersection. ''' return len(self.intersection(other)) == 0 def issubset(self,other): '''Report whether another set contains this set. ''' return self.__class__(other, locale = self.locale, case_sensitive = self.case_sensitive, comparison_level = self.comparison_level).issuperset(self) def issuperset(self,other): '''Report whether this set contains another set. ''' return len(self.__class__(other, locale = self.locale, case_sensitive = self.case_sensitive, comparison_level = self.comparison_level)) == len(self.intersection(other)) def pop(self): '''Remove and return an arbitrary set element. Raises KeyError if the set is empty. ''' return self.__values.popitem()[1] def remove(self,val): '''Remove an element from a set; it must be a member. If the element is not a member, raise a KeyError. ''' del self.__values[self.__in_key(val)] def symmetric_difference(self,other): '''Return the symmetric difference of two sets as a new set. (i.e. all elements that are in exactly one of the sets.) ''' ret = self.__class__(self) ret.update(other) ret.difference_update(self.intersection(other)) return ret def symmetric_difference_update(self,other): '''Update a set with the symmetric difference of itself and another. ''' bck = self.__class__(self) self.update(other) self.difference_update(bck.intersection(other)) def union(self,*others): '''Return the union of sets as a new set. (i.e. all elements that are in either set.) ''' ret = self.__class__(self) ret.update(*others) return ret def update(self,*others): '''Update a set with the union of itself and others. ''' for other in others: if isinstance(other,self.__class__) and self.__in_equality(other): self.__values.update(other.__values) else: self.__values.update({self.__in_key(i):i for i in other}) def __and__(self,other): '''x.__and__(y) <==> x&y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.intersection(other) def __contains__(self,obj): '''x.__contains__(y) <==> y in x. ''' return self.__in_key(obj) in self.__values def __eq__(self,other): '''x.__eq__(y) <==> x==y ''' return isinstance(other,self.__class__) and self.__in_equality(other) \ and set(self.__values.keys()) == set(other.__values.keys()) def __ge__(self,other): '''x.__ge__(y) <==> x>=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.issuperset(other) def __gt__(self,other): '''x.__gt__(y) <==> x>y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.issuperset(other) and self != other def __iand__(self,other): '''x.__iand__(y) <==> x&=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") self.intersection_update(other) return self def __ior__(self,other): '''x.__ior__(y) <==> x|=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") self.update(other) return self def __isub__(self,other): '''x.__isub__(y) <==> x-=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") self.difference_update(other) return self def __iter__(self): '''x.__iter__() <==> iter(x) ''' return self.__values.itervalues() def __ixor__(self,other): '''x.__ixor__(y) <==> x^=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") self.symmetric_difference_update(other) return self def __le__(self,other): '''x.__le__(y) <==> x<=y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.issubset(other) def __len__(self): '''x.__len__() <==> len(x) ''' return len(self.__values) def __lt__(self,other): '''x.__lt__(y) <==> x<y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.issubset(other) and self != other def __ne__(self,other): '''x.__ne__(y) <==> x!=y ''' return not self == other def __or__(self,other): '''x.__or__(y) <==> x|y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.union(other) def __rand__(self,other): '''x.__rand__(y) <==> y&x ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return other & self def __repr__(self, _repr_running={}): '''x.__repr__() <==> repr(x) ''' call_key = id(self), _get_ident() if call_key in _repr_running: return '...' _repr_running[call_key] = 1 try: if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, self.__values.values()) finally: del _repr_running[call_key] def __ror__(self,other): '''x.__ror__(y) <==> y|x ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return other | self def __rsub__(self,other): '''x.__rsub__(y) <==> y-x ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return other - self def __rxor__(self,other): '''x.__rxor__(y) <==> y^x ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return other ^ self def __sizeof__(self): '''S.__sizeof__() -> size of S in memory, in bytes ''' return self.__value.__sizeof__() def __sub__(self,other): '''x.__sub__(y) <==> x-y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.difference(other) def __xor__(self,other): '''x.__xor__(y) <==> x^y ''' if not isinstance(other,self.__class__): raise TypeError("can only compare to a unicode_set") if not self.__in_equality(other): raise TypeError("can only compare to a unicode_set with the same caracteristic") return self.symmetric_difference(other) def __reduce__(self): inst_dict = vars(self).copy() for k in vars(unicode_set()): inst_dict.pop(k, None) inst_dict.update({ 'locale':self.locale, 'comparison_level':self.comparison_level, 'case_sensitive': self.case_sensitive }) return (unicode_set_from_data, ([self.__values.values()],inst_dict))
def __init__(self, localedir=None, lang=None, domain=None, languages=None): """ Init a WearNowLocale. Run __init_first_instance() to set up the environment if this is the first run. Return __first_instance otherwise if called without arguments. """ global _hdlr #initialized is special, used only for the "first instance", #and created by __new__(). It's used to prevent re-__init__ing #__first_instance when __new__() returns its pointer. if hasattr(self, 'initialized') and self.initialized: return _first = self._WearNowLocale__first_instance self.localedir = None # Everything breaks without localedir, so get that set up # first. Warnings are logged in _init_first_instance or # _init_secondary_locale if this comes up empty. if localedir and os.path.exists(os.path.abspath(localedir)): self.localedir = localedir elif (_first and hasattr(_first, 'localedir') and _first.localedir and os.path.exists(os.path.abspath(_first.localedir))): self.localedir = _first.localedir else: LOG.warn('Missing or invalid localedir %s; no translations will be available.', repr(localedir)) self.lang = lang self.localedomain = domain or 'wearnow' if languages: self.language = [x for x in [self.check_available_translations(l) for l in languages.split(":")] if x] else: self.language = None if self == _first: self._WearNowLocale__init_first_instance() else: self._init_secondary_locale() self.icu_locales = {} self.collator = None if HAVE_ICU: self.icu_locales["default"] = Locale.createFromName(self.lang) if self.collation and self.collation != self.lang: self.icu_locales["collation"] = Locale.createFromName(self.collation) else: self.icu_locales["collation"] = self.icu_locales["default"] try: self.collator = Collator.createInstance(self.icu_locales["collation"]) except ICUError as err: LOG.warning("Unable to create collator: %s", str(err)) self.collator = None try: self.translation = self._get_translation(self.localedomain, self.localedir, self.language) except ValueError: LOG.warning("Unable to find translation for languages in %s, using US English", ':'.join(self.language)) self.translation = WearNowNullTranslations() self.translation._language = "en" if _hdlr: LOG.removeHandler(_hdlr) _hdlr = None self._dd = self._dp = None #Guards against running twice on the first instance. self.initialized = True
# LOG.setLevel(logging.DEBUG) try: from icu import Locale, Collator HAVE_ICU = True except ImportError: try: from PyICU import Locale, Collator HAVE_ICU = True except ImportError as err: # No logger, save the warning message for later. _icu_err = ("ICU not loaded because %s. Localization will be impaired. " "Use your package manager to install PyICU" % str(err)) ICU_LOCALES = None if HAVE_ICU: ICU_LOCALES = Locale.getAvailableLocales() # Map of languages for converting to Microsoft locales and naming # locales for display to the user. It's important to add to this list # when a new translation is added. Note the dummy _(): That's just to # get xgettext to include the string in wearnow.pot; actual translation # is done in _get_language_string() below. # (The wearnow officially-supported language list is ALL_LINGUAS in setup.py) _ = lambda x: x _LOCALE_NAMES = { 'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")), 'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")), 'br': (None, None, _("Breton")), #Windows has no translation for Breton 'ca': ('Catalan_Spain', '1252', _("Catalan")), 'cs': ('Czech_Czech Republic', '1250', _("Czech")), 'da': ('Danish_Denmark', '1252', _("Danish")),
def __init__(self): self.BreakIterator = BreakIterator.createWordInstance( Locale.createFromName('ar'))
class unicode_dict(dict): '''Dictionary that support unicode comparison as defined by icu (UCA) ''' def __init__(self, *args, **kwargs): '''Initialize a unicode dictionary. The signature is changed because the kwargs are used to set the comparison details ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) if len(args) == 1 and isinstance(args[0],self.__class__): locale = args[0].locale if 'locale' not in kwargs else kwargs.pop('locale') comparison_level = args[0].comparison_level if 'comparison_level' \ not in kwargs else kwargs.pop('comparison_level') case_sensitive = args[0].case_sensitive if 'case_sensitive' \ not in kwargs else kwargs.pop('case_sensitive') else: locale = kwargs.pop('locale','en_US') comparison_level = max(0,min(3,kwargs.pop('comparison_level',0))) case_sensitive = kwargs.pop('case_sensitive', False) self.__locale = Locale(locale) self.__collator = Collator.createInstance(self.__locale) self.__collator.setStrength(comparison_level) self.__collator.setAttribute(UCollAttribute.CASE_LEVEL, UCollAttributeValue.ON if case_sensitive else UCollAttributeValue.OFF) if len(args) == 1: if isinstance(args[0],Mapping): vals = list(args[0].items()) else: vals = args[0] for key,val in vals: self.__setitem__(key,val) @property def locale(self): return self.__locale.getName() @property def comparison_level(self): return self.__collator.getStrength() @property def case_sensitive(self): return self.__collator.getAttribute(UCollAttribute.CASE_LEVEL) == UCollAttributeValue.ON def __in_key(self,key): return self.__collator.getSortKey(key) if isinstance(key,str) else key def __setitem__(self, key, value): super(unicode_dict,self).__setitem__(self.__in_key(key),(key,value)) def __getitem__(self, key): try: return super(unicode_dict,self).__getitem__(self.__in_key(key))[1] except KeyError: raise KeyError(key) def get(self, key, default = None): try: return super(unicode_dict,self).__getitem__(self.__in_key(key))[1] except KeyError: return default def __delitem__(self, key): try: super(unicode_dict,self).__delitem__(self.__in_key(key)) except KeyError: raise KeyError(key) def __iter__(self): for i,_ in super(unicode_dict,self).values(): yield i def __contains__(self,key): return super(unicode_dict,self).__contains__(self.__in_key(key)) def clear(self): super(unicode_dict,self).clear() def keys(self): return list(self) def values(self): return [i for _,i in super(unicode_dict,self).values()] def items(self): return [i for i in super(unicode_dict,self).values()] def iterkeys(self): return iter(self) def itervalues(self): for _,i in super(unicode_dict,self).values(): yield i def iteritems(self): for i in super(unicode_dict,self).values(): yield i def update(self, *args,**kwargs): if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) if isinstance(args[0],Mapping): vals = list(args[0].items()) else: vals = args[0] for key,val in vals: self.__setitem__(key,val) for key,val in kwargs: self.__setitem__(key,val) __marker = object() def pop(self, key, default=__marker): if key in self: r = self[key] del self[key] return r if default is self.__marker: raise KeyError(key) return default def setdefault(self, key, default=None): if key in self: return self[key] self[key] = default return default def popitem(self): _,v = super(unicode_dict,self).popitem() return v def __repr__(self, _repr_running={}): call_key = id(self), _get_ident() if call_key in _repr_running: return '...' _repr_running[call_key] = 1 try: if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, dict(list(self.items()))) finally: del _repr_running[call_key] def __reduce__(self): items = list(self.items()) inst_dict = vars(self).copy() for k in vars(unicode_dict()): inst_dict.pop(k, None) inst_dict.update({ 'locale':self.locale, 'comparison_level':self.comparison_level, 'case_sensitive': self.case_sensitive }) return (unicode_dict_from_data, ([items],inst_dict)) def copy(self): return self.__class__(self) def __eq__(self, other): ''' Two unicode_dict are equal only if have all keys equal and the matching val is equal unicode_dict are equal only with themselves ''' if not isinstance(other,self.__class__): return False return self.locale == other.locale and self.comparison_level == other.comparison_level and \ self.case_sensitive == other.case_sensitive and \ unicode_set(iter(self.keys())) == unicode_set(iter(other.keys())) and \ all(self[k] == other[k] for k in self) def __ne__(self, other): return not self == other