def seq_name(seq): global _namedata if not _namedata: def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems()} _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_modifier_sequences()), strip_vs_map(unicode_data.get_emoji_zwj_sequences()), ] if len(seq) == 1: return unicode_data.name(seq[0], None) for data in _namedata: if seq in data: return data[seq] if EMOJI_VS in seq: non_vs_seq = unicode_data.strip_emoji_vs(seq) for data in _namedata: if non_vs_seq in data: return data[non_vs_seq] return None
def seq_name(seq): global _namedata if not _namedata: def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems() } _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_modifier_sequences()), strip_vs_map(unicode_data.get_emoji_zwj_sequences()), ] if len(seq) == 1: return unicode_data.name(seq[0], None) for data in _namedata: if seq in data: return data[seq] if EMOJI_VS in seq: non_vs_seq = unicode_data.strip_emoji_vs(seq) for data in _namedata: if non_vs_seq in data: return data[non_vs_seq] return None
def _check_zwj_sequences(seq_to_filepath): """Verify that zwj sequences are valid.""" zwj_sequence_to_name = unicode_data.get_emoji_zwj_sequences() # strip emoji variant selectors and add extra mappings zwj_sequence_without_vs_to_name_canonical = {} for seq, seq_name in zwj_sequence_to_name.iteritems(): if EMOJI_VS in seq: stripped_seq = strip_vs(seq) zwj_sequence_without_vs_to_name_canonical[stripped_seq] = ( seq_name, seq) zwj_seq_to_filepath = { seq: fp for seq, fp in seq_to_filepath.iteritems() if ZWJ in seq } for seq, fp in zwj_seq_to_filepath.iteritems(): if seq not in zwj_sequence_to_name: if seq not in zwj_sequence_without_vs_to_name_canonical: print >> sys.stderr, 'zwj sequence not defined: %s' % fp else: _, can = zwj_sequence_without_vs_to_name_canonical[seq]
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % ( alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print( 'coverage: missing single %04x (%s)' % ( cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % (alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print('coverage: missing single %04x (%s)' % (cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')