def _init_lang_for_script_map():
  locs_by_lit_pop = [loc for _, loc in cldr_data.get_lang_scrs_by_decreasing_global_lit_pop()]
  for t in locs_by_lit_pop:
    lsrv = cldr_data.loc_tag_to_lsrv(t)
    script = lsrv[1]
    if script not in _lang_for_script_map:
      lang = lsrv[0]
      # print '%s lang => %s' % (script, lang)
      _lang_for_script_map[script] = lang
Exemplo n.º 2
0
def _init_lang_for_script_map():
  locs_by_lit_pop = [loc for _, loc in cldr_data.get_lang_scrs_by_decreasing_global_lit_pop()]
  for t in locs_by_lit_pop:
    lsrv = cldr_data.loc_tag_to_lsrv(t)
    script = lsrv[1]
    if script not in _lang_for_script_map:
      lang = lsrv[0]
      # print('%s lang => %s' % (script, lang))
      _lang_for_script_map[script] = lang
def select_rare_chars_for_loc(script, locs_with_rare_chars,
                              shared_lang_threshold, char_to_lang_map):
    """Return a list of 2-tuples of loc and selected rare chars,
  ordered by decreasing literate population of the locale."""

    rarity_threshold_map = {}
    for lang_tag in locs_with_rare_chars:
        rarity_threshold_map[lang_tag] = shared_lang_threshold

    selected = []
    locs_by_lit_pop = [
        loc
        for _, loc in cldr_data.get_lang_scrs_by_decreasing_global_lit_pop()
    ]
    # examine locales in decreasing order of literate population
    for loc_tag in locs_by_lit_pop:
        if script not in loc_tag:
            continue
        loc_tag = loc_tag.replace('_', '-')
        if loc_tag not in locs_with_rare_chars:
            continue
        most_specific_chars = set()
        most_specific_chars_count = rarity_threshold_map[loc_tag]
        # From the rare chars for this locale, select those that
        # are most specific to this language. In most cases they
        # are unique to this language.
        for cp in locs_with_rare_chars[loc_tag]:
            num_chars = len(char_to_lang_map[cp])
            if num_chars <= most_specific_chars_count:
                if num_chars < most_specific_chars_count:
                    most_specific_chars = set()
                most_specific_chars.add(cp)
                most_specific_chars_count = num_chars
        if most_specific_chars:
            selected.append((loc_tag, most_specific_chars))
            for cp in most_specific_chars:
                for tag in char_to_lang_map[cp]:
                    if rarity_threshold_map[tag] > most_specific_chars_count:
                        rarity_threshold_map[tag] = most_specific_chars_count
    return selected
def select_rare_chars_for_loc(script, locs_with_rare_chars, shared_lang_threshold,
                              char_to_lang_map):
  """Return a list of 2-tuples of loc and selected rare chars,
  ordered by decreasing literate population of the locale."""

  rarity_threshold_map = {}
  for lang_tag in locs_with_rare_chars:
    rarity_threshold_map[lang_tag] = shared_lang_threshold

  selected = []
  locs_by_lit_pop = [loc for _, loc in cldr_data.get_lang_scrs_by_decreasing_global_lit_pop()]
  # examine locales in decreasing order of literate population
  for loc_tag in locs_by_lit_pop:
    if script not in loc_tag:
      continue
    loc_tag = loc_tag.replace('_', '-')
    if loc_tag not in locs_with_rare_chars:
      continue
    most_specific_chars = set()
    most_specific_chars_count = rarity_threshold_map[loc_tag]
    # From the rare chars for this locale, select those that
    # are most specific to this language. In most cases they
    # are unique to this language.
    for cp in locs_with_rare_chars[loc_tag]:
      num_chars = len(char_to_lang_map[cp])
      if num_chars <= most_specific_chars_count:
        if num_chars < most_specific_chars_count:
          most_specific_chars = set()
        most_specific_chars.add(cp)
        most_specific_chars_count = num_chars
    if most_specific_chars:
      selected.append((loc_tag, most_specific_chars))
      for cp in most_specific_chars:
        for tag in char_to_lang_map[cp]:
          if rarity_threshold_map[tag] > most_specific_chars_count:
            rarity_threshold_map[tag] = most_specific_chars_count
  return selected