Python get_emoji_zwj_sequences示例，nototools.unicode_data.get_emoji_zwj_sequences Python示例

示例#1

0

显示文件

文件： check_emoji_sequences.py 项目： dougfelt/noto-emoji

def seq_name(seq):
  global _namedata

  if not _namedata:
    def strip_vs_map(seq_map):
      return {
          unicode_data.strip_emoji_vs(k): v
          for k, v in seq_map.iteritems()}
    _namedata = [
        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
        strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
        strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
        ]

  if len(seq) == 1:
    return unicode_data.name(seq[0], None)

  for data in _namedata:
    if seq in data:
      return data[seq]
  if EMOJI_VS in seq:
    non_vs_seq = unicode_data.strip_emoji_vs(seq)
    for data in _namedata:
      if non_vs_seq in data:
        return data[non_vs_seq]

  return None

示例#2

0

显示文件

文件： check_emoji_sequences.py 项目： danielfspencer/oxfordhack-2019

def seq_name(seq):
    global _namedata

    if not _namedata:

        def strip_vs_map(seq_map):
            return {
                unicode_data.strip_emoji_vs(k): v
                for k, v in seq_map.iteritems()
            }

        _namedata = [
            strip_vs_map(unicode_data.get_emoji_combining_sequences()),
            strip_vs_map(unicode_data.get_emoji_flag_sequences()),
            strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
            strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
        ]

    if len(seq) == 1:
        return unicode_data.name(seq[0], None)

    for data in _namedata:
        if seq in data:
            return data[seq]
    if EMOJI_VS in seq:
        non_vs_seq = unicode_data.strip_emoji_vs(seq)
        for data in _namedata:
            if non_vs_seq in data:
                return data[non_vs_seq]

    return None

示例#3

0

显示文件

def _check_zwj_sequences(seq_to_filepath):
    """Verify that zwj sequences are valid."""
    zwj_sequence_to_name = unicode_data.get_emoji_zwj_sequences()
    # strip emoji variant selectors and add extra mappings
    zwj_sequence_without_vs_to_name_canonical = {}
    for seq, seq_name in zwj_sequence_to_name.iteritems():
        if EMOJI_VS in seq:
            stripped_seq = strip_vs(seq)
            zwj_sequence_without_vs_to_name_canonical[stripped_seq] = (
                seq_name, seq)

    zwj_seq_to_filepath = {
        seq: fp
        for seq, fp in seq_to_filepath.iteritems() if ZWJ in seq
    }

    for seq, fp in zwj_seq_to_filepath.iteritems():
        if seq not in zwj_sequence_to_name:
            if seq not in zwj_sequence_without_vs_to_name_canonical:
                print >> sys.stderr, 'zwj sequence not defined: %s' % fp
            else:
                _, can = zwj_sequence_without_vs_to_name_canonical[seq]

示例#4

0

显示文件

文件： check_emoji_sequences.py 项目： dougfelt/noto-emoji

def _check_coverage(seq_to_filepath, unicode_version):
  """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

  age = unicode_version

  non_vs_to_canonical = {}
  for k in seq_to_filepath:
    if EMOJI_VS in k:
      non_vs = unicode_data.strip_emoji_vs(k)
      non_vs_to_canonical[non_vs] = k

  aliases = add_aliases.read_default_emoji_aliases()
  for k, v in sorted(aliases.items()):
    if v not in seq_to_filepath and v not in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s missing target %s' % (alias_str, target_str))
      continue
    if k in seq_to_filepath or k in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s already exists as %s (%s)' % (
          alias_str, target_str, seq_name(v)))
      continue
    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
  emoji = sorted(unicode_data.get_emoji(age=age))
  for cp in emoji:
    if tuple([cp]) not in seq_to_filepath:
      print(
          'coverage: missing single %04x (%s)' % (
              cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
      non_vs_seq = unicode_data.strip_emoji_vs(seq)
      if non_vs_seq not in seq_to_filepath:
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))

  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)

  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
  if tuple([0xfe82b]) not in seq_to_filepath:
    print('coverage: missing unknown flag PUA fe82b')

示例#5

0

显示文件

文件： check_emoji_sequences.py 项目： danielfspencer/oxfordhack-2019

def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s missing target %s' %
                  (alias_str, target_str))
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s already exists as %s (%s)' %
                  (alias_str, target_str, seq_name(v)))
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji(age=age))
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print('coverage: missing single %04x (%s)' %
                  (cp, unicode_data.name(cp, '<no name>')))

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print('coverage: missing special %04x (%s)' %
                  (cp, unicode_data.name(cp)))

    # combining sequences
    comb_seq_to_name = sorted(
        unicode_data.get_emoji_combining_sequences(age=age).iteritems())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print('coverage: missing combining sequence %s (%s)' %
                      (unicode_data.seq_to_string(seq), name))

    # flag sequences
    flag_seq_to_name = sorted(
        unicode_data.get_emoji_flag_sequences(age=age).iteritems())
    for seq, name in flag_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing flag sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # skin tone modifier sequences
    mod_seq_to_name = sorted(
        unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
    for seq, name in mod_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing modifier sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # zwj sequences
    # some of ours include the emoji presentation variation selector and some
    # don't, and the same is true for the canonical sequences.  normalize all
    # of them to omit it to test coverage, but report the canonical sequence.
    zwj_seq_without_vs = set()
    for seq in seq_to_filepath:
        if ZWJ not in seq:
            continue
        if EMOJI_VS in seq:
            seq = tuple(cp for cp in seq if cp != EMOJI_VS)
        zwj_seq_without_vs.add(seq)

    for seq, name in sorted(
            unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
        if EMOJI_VS in seq:
            test_seq = tuple(s for s in seq if s != EMOJI_VS)
        else:
            test_seq = seq
        if test_seq not in zwj_seq_without_vs:
            print('coverage: missing (canonical) zwj sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')