def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set(cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.iteritems(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print('check valid emoji cps: %d non-emoji cp found' % len(not_emoji), file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print('check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), file=sys.stderr)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set( cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.iteritems(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print( 'check valid emoji cps: %d non-emoji cp found' % len(not_emoji), file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print( 'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), file=sys.stderr)
def _get_name(key_tuple): CELL_PREFIX = '<td class="name">' if len(key_tuple) != 1: name = '' else: cp = key_tuple[0] if cp in unicode_data.proposed_emoji_cps(): name = '(proposed) ' + unicode_data.proposed_emoji_name(cp) else: name = unicode_data.name(cp, '(error)') return CELL_PREFIX + name
def _get_name(key_tuple, annotated_tuples): CELL_PREFIX = '<td%s>' % ('' if annotated_tuples is None or key_tuple not in annotated_tuples else ' class="aname"') if len(key_tuple) != 1: name = '(' + ' '.join('U+%04X' % cp for cp in key_tuple) + ')' else: cp = key_tuple[0] if cp in unicode_data.proposed_emoji_cps(): name = '(proposed) ' + unicode_data.proposed_emoji_name(cp) else: name = unicode_data.name(cp, '(error)') return CELL_PREFIX + name
def _check_valid_emoji(sorted_seqs): """Ensure all emoji are either valid emoji or specific chars.""" valid_cps = set(unicode_data.get_emoji() | unicode_data.proposed_emoji_cps()) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag not_emoji = set() for seq in sorted_seqs: for cp in seq: if cp not in valid_cps: not_emoji.add(cp) if len(not_emoji): print >> sys.stderr, '%d non-emoji found:' % len(not_emoji) for cp in sorted(not_emoji): print >> sys.stderr, '%04X' % cp
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" coverage_pass = True valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set(cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.items(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print(f'check valid emoji cps: {len(not_emoji)} non-emoji cp found', file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print( f'check the following cp: {cp} - {not_emoji.get(cp)[0]} (in {len(fps)} sequences)', file=sys.stderr) coverage_pass = False if not coverage_pass: exit( "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'" )
def _check_valid_emoji(sorted_seq_to_filepath): """Ensure all emoji are either valid emoji or specific chars.""" valid_cps = set(unicode_data.get_emoji() | unicode_data.proposed_emoji_cps()) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag not_emoji = {} for seq, fp in sorted_seq_to_filepath.iteritems(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print >> sys.stderr, '%d non-emoji found:' % len(not_emoji) for cp in sorted(not_emoji): print >> sys.stderr, '%04x (in %s)' % (cp, ', '.join( not_emoji[cp]))