示例#1
0
def _add_aliases(keys, aliases):
  for k, v in sorted(aliases.iteritems()):
    k_str = unicode_data.seq_to_string(k)
    v_str = unicode_data.seq_to_string(v)
    if k in keys:
      msg = '' if v in keys else ' but it\'s not present'
      print('have alias image %s, should use %s%s' % (k_str, v_str, msg))
    elif v not in keys:
      print('can\'t use alias %s, no image matching %s' % (k_str, v_str))
  to_add = {k for k, v in aliases.iteritems() if k not in keys and v in keys}
  return keys | to_add
    def _get_filepath(cp):
        def get_key_filepath(key):
            for i in range(len(dir_infos)):
                info = dir_infos[i]
                if key in info.filemap:
                    basepath = basepaths[i]
                    return path.join(basepath, info.filemap[key])
            return None

        cp_key = tuple([cp])
        cp_key = unicode_data.get_canonical_emoji_sequence(cp_key) or cp_key
        fp = get_key_filepath(cp_key)
        if not fp:
            if cp_key in aliases:
                fp = get_key_filepath(aliases[cp_key])
            else:
                print 'no alias for %s' % unicode_data.seq_to_string(cp_key)
        if not fp:
            print 'no part for %s in %s' % (unicode_data.seq_to_string(
                cp_key), unicode_data.seq_to_string(key_tuple))
        return fp
示例#3
0
def _get_name(key_tuple, annotated_tuples):
    CELL_PREFIX = '<td%s>' % ('' if annotated_tuples is None or key_tuple
                              not in annotated_tuples else ' class="aname"')

    seq_name = unicode_data.get_emoji_sequence_name(key_tuple)
    if seq_name == None:
        if key_tuple == (0x20e3, ):
            seq_name = '(combining enlosing keycap)'
        elif key_tuple == (0xfe82b, ):
            seq_name = '(unknown flag PUA codepoint)'
        else:
            print 'no name for %s' % unicode_data.seq_to_string(key_tuple)
            seq_name = '(oops)'
    return CELL_PREFIX + seq_name
示例#4
0
def _get_name(key_tuple, annotations):
  annotation = None if annotations is None else annotations.get(key_tuple)
  CELL_PREFIX = '<td%s>' % (
      '' if annotation is None else ' class="%s"' % annotation)

  seq_name = unicode_data.get_emoji_sequence_name(key_tuple)
  if seq_name == None:
    if key_tuple == (0x20e3,):
      seq_name = '(combining enlosing keycap)'
    elif key_tuple == (0xfe82b,):
      seq_name = '(unknown flag PUA codepoint)'
    else:
      print('no name for %s' % unicode_data.seq_to_string(key_tuple))
      seq_name = '(oops)'
  return CELL_PREFIX + seq_name
示例#5
0
def _parse_annotation_file(afile):
  """Parse file and return a map from sequences to one of 'ok', 'warning',
  or 'error'.

  The file format consists of two kinds of lines.  One defines the annotation
  to apply, it consists of the text 'annotation:' followed by one of 'ok',
  'warning', or 'error'.  The other defines a sequence that should get the most
  recently defined annotation, this is a series of codepoints expressed in hex
  separated by spaces.  The initial default annotation is 'error'.  '#' starts
  a comment to end of line, blank lines are ignored.
  """

  annotations = {}
  line_re = re.compile(r'annotation:\s*(ok|warning|error)|([0-9a-f ]+)')
  annotation = 'error'
  with open(afile, 'r') as f:
    for line in f:
      line = line.strip()
      if not line or line[0] == '#':
        continue
      m = line_re.match(line)
      if not m:
        raise Exception('could not parse annotation "%s"' % line)
      new_annotation = m.group(1)
      if new_annotation:
        annotation = new_annotation
      else:
        seq = tuple([int(s, 16) for s in m.group(2).split()])
        canonical_seq = unicode_data.get_canonical_emoji_sequence(seq)
        if canonical_seq:
          seq = canonical_seq
        if seq in annotations:
          raise Exception(
              'duplicate sequence %s in annotations' %
              unicode_data.seq_to_string(seq))
        annotations[seq] = annotation
  return annotations
def sequence_to_filename(seq, prefix, suffix):
  return ''.join((prefix, unicode_data.seq_to_string(seq), suffix))
def generate_names(src_dir,
                   dst_dir,
                   skip_limit=20,
                   omit_groups=None,
                   pretty_print=False,
                   verbose=False):
    srcdir = tool_utils.resolve_path(src_dir)
    if not path.isdir(srcdir):
        print('%s is not a directory' % src_dir, file=sys.stderr)
        return

    if omit_groups:
        unknown_groups = set(omit_groups) - set(
            unicode_data.get_emoji_groups())
        if unknown_groups:
            print(
                'did not recognize %d group%s: %s' %
                (len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
                 ', '.join('"%s"' % g
                           for g in omit_groups if g in unknown_groups)),
                file=sys.stderr)
            print('valid groups are:\n  %s' %
                  ('\n  '.join(g for g in unicode_data.get_emoji_groups())),
                  file=sys.stderr)
            return
        print('omitting %d group%s: %s' %
              (len(omit_groups), '' if len(omit_groups) == 1 else 's',
               ', '.join('"%s"' % g for g in omit_groups)))
    else:
        # might be None
        print('keeping all groups')
        omit_groups = []

    # make sure the destination exists
    dstdir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir))

    # _get_image_data returns canonical cp sequences
    print('src dir:', srcdir)
    seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
    print('seq to file has %d sequences' % len(seq_to_file))

    # Aliases add non-gendered versions using gendered images for the most part.
    # But when we display the images, we don't distinguish genders in the
    # naming, we rely on the images-- so these look redundant. So we
    # intentionally don't generate images for these.
    # However, the alias file also includes the flag aliases, which we do want,
    # and it also fails to exclude the unknown flag pua (since it doesn't
    # map to anything), so we need to adjust for this.
    canonical_aliases = generate_emoji_html._get_canonical_aliases()

    aliases = set([
        cps for cps in canonical_aliases.keys()
        if not unicode_data.is_regional_indicator_seq(cps)
    ])
    aliases.add((0xfe82b, ))  # unknown flag PUA
    excluded = aliases | generate_emoji_html._get_canonical_excluded()

    # The flag aliases have distinct names, so we _do_ want to show them
    # multiple times.
    to_add = {}
    for seq in canonical_aliases:
        if unicode_data.is_regional_indicator_seq(seq):
            replace_seq = canonical_aliases[seq]
            if seq in seq_to_file:
                print('warning, alias %s has file %s' %
                      (unicode_data.regional_indicator_seq_to_string(seq),
                       seq_to_file[seq]))
                continue
            replace_file = seq_to_file.get(replace_seq)
            if replace_file:
                to_add[seq] = replace_file
    seq_to_file.update(to_add)

    data = []
    last_skipped_group = None
    skipcount = 0
    for group in unicode_data.get_emoji_groups():
        if group in omit_groups:
            continue
        name_data = []
        for seq in unicode_data.get_emoji_in_group(group):
            if seq in excluded:
                continue
            seq_file = seq_to_file.get(seq, None)
            if seq_file is None:
                skipcount += 1
                if verbose:
                    if group != last_skipped_group:
                        print('group %s' % group)
                        last_skipped_group = group
                    print('  %s (%s)' %
                          (unicode_data.seq_to_string(seq), ', '.join(
                              unicode_data.name(cp, 'x') for cp in seq)))
                if skip_limit >= 0 and skipcount > skip_limit:
                    raise Exception('skipped too many items')
            else:
                name_data.append(_name_data(seq, seq_file))
        data.append({'category': group, 'emojis': name_data})

    outfile = path.join(dstdir, 'data.json')
    with open(outfile, 'w') as f:
        indent = 2 if pretty_print else None
        separators = None if pretty_print else (',', ':')
        json.dump(data, f, indent=indent, separators=separators)
    print('wrote %s' % outfile)
def _check_coverage(seq_to_filepath, unicode_version):
  """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

  age = unicode_version

  non_vs_to_canonical = {}
  for k in seq_to_filepath:
    if EMOJI_VS in k:
      non_vs = unicode_data.strip_emoji_vs(k)
      non_vs_to_canonical[non_vs] = k

  aliases = add_aliases.read_default_emoji_aliases()
  for k, v in sorted(aliases.items()):
    if v not in seq_to_filepath and v not in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s missing target %s' % (alias_str, target_str))
      continue
    if k in seq_to_filepath or k in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s already exists as %s (%s)' % (
          alias_str, target_str, seq_name(v)))
      continue
    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
  emoji = sorted(unicode_data.get_emoji(age=age))
  for cp in emoji:
    if tuple([cp]) not in seq_to_filepath:
      print(
          'coverage: missing single %04x (%s)' % (
              cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
      non_vs_seq = unicode_data.strip_emoji_vs(seq)
      if non_vs_seq not in seq_to_filepath:
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))

  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)

  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
  if tuple([0xfe82b]) not in seq_to_filepath:
    print('coverage: missing unknown flag PUA fe82b')
示例#9
0
def sequence_to_filename(seq, prefix, suffix):
    return ''.join((prefix, unicode_data.seq_to_string(seq), suffix))
def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s missing target %s' %
                  (alias_str, target_str))
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s already exists as %s (%s)' %
                  (alias_str, target_str, seq_name(v)))
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji(age=age))
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print('coverage: missing single %04x (%s)' %
                  (cp, unicode_data.name(cp, '<no name>')))

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print('coverage: missing special %04x (%s)' %
                  (cp, unicode_data.name(cp)))

    # combining sequences
    comb_seq_to_name = sorted(
        unicode_data.get_emoji_combining_sequences(age=age).iteritems())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print('coverage: missing combining sequence %s (%s)' %
                      (unicode_data.seq_to_string(seq), name))

    # flag sequences
    flag_seq_to_name = sorted(
        unicode_data.get_emoji_flag_sequences(age=age).iteritems())
    for seq, name in flag_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing flag sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # skin tone modifier sequences
    mod_seq_to_name = sorted(
        unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
    for seq, name in mod_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing modifier sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # zwj sequences
    # some of ours include the emoji presentation variation selector and some
    # don't, and the same is true for the canonical sequences.  normalize all
    # of them to omit it to test coverage, but report the canonical sequence.
    zwj_seq_without_vs = set()
    for seq in seq_to_filepath:
        if ZWJ not in seq:
            continue
        if EMOJI_VS in seq:
            seq = tuple(cp for cp in seq if cp != EMOJI_VS)
        zwj_seq_without_vs.add(seq)

    for seq, name in sorted(
            unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
        if EMOJI_VS in seq:
            test_seq = tuple(s for s in seq if s != EMOJI_VS)
        else:
            test_seq = seq
        if test_seq not in zwj_seq_without_vs:
            print('coverage: missing (canonical) zwj sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    coverage_pass = True
    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(f'coverage: alias {alias_str} missing target {target_str}')
            coverage_pass = False
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(
                f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})'
            )
            coverage_pass = False
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji())
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing single {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing special {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # combining sequences
    comb_seq_to_name = sorted(unicode_data._emoji_sequence_data.items())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print(
                    f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})'
                )
                coverage_pass = False

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
        coverage_pass = False

    if not coverage_pass:
        exit(
            "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'"
        )
def generate_names(
    src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False,
    verbose=False):
  srcdir = tool_utils.resolve_path(src_dir)
  if not path.isdir(srcdir):
    print('%s is not a directory' % src_dir, file=sys.stderr)
    return

  if omit_groups:
    unknown_groups = set(omit_groups) - set(unicode_data.get_emoji_groups())
    if unknown_groups:
      print('did not recognize %d group%s: %s' % (
          len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
          ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr)
      print('valid groups are:\n  %s' % (
          '\n  '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr)
      return
    print('omitting %d group%s: %s' % (
        len(omit_groups), '' if len(omit_groups) == 1 else 's',
        ', '.join('"%s"' % g for g in omit_groups)))
  else:
    # might be None
    print('keeping all groups')
    omit_groups = []

  # make sure the destination exists
  dstdir = tool_utils.ensure_dir_exists(
      tool_utils.resolve_path(dst_dir))

  # _get_image_data returns canonical cp sequences
  print('src dir:', srcdir)
  seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
  print('seq to file has %d sequences' % len(seq_to_file))

  # Aliases add non-gendered versions using gendered images for the most part.
  # But when we display the images, we don't distinguish genders in the
  # naming, we rely on the images-- so these look redundant. So we
  # intentionally don't generate images for these.
  # However, the alias file also includes the flag aliases, which we do want,
  # and it also fails to exclude the unknown flag pua (since it doesn't
  # map to anything), so we need to adjust for this.
  canonical_aliases = generate_emoji_html._get_canonical_aliases()

  aliases = set([
      cps for cps in canonical_aliases.keys()
      if not unicode_data.is_regional_indicator_seq(cps)])
  aliases.add((0xfe82b,))  # unknown flag PUA
  excluded = aliases | generate_emoji_html._get_canonical_excluded()

  # The flag aliases have distinct names, so we _do_ want to show them
  # multiple times.
  to_add = {}
  for seq in canonical_aliases:
    if unicode_data.is_regional_indicator_seq(seq):
      replace_seq = canonical_aliases[seq]
      if seq in seq_to_file:
        print('warning, alias %s has file %s' % (
            unicode_data.regional_indicator_seq_to_string(seq),
            seq_to_file[seq]))
        continue
      replace_file = seq_to_file.get(replace_seq)
      if replace_file:
        to_add[seq] = replace_file
  seq_to_file.update(to_add)

  data = []
  last_skipped_group = None
  skipcount = 0
  for group in unicode_data.get_emoji_groups():
    if group in omit_groups:
      continue
    name_data = []
    for seq in unicode_data.get_emoji_in_group(group):
      if seq in excluded:
        continue
      seq_file = seq_to_file.get(seq, None)
      if seq_file is None:
        skipcount += 1
        if verbose:
          if group != last_skipped_group:
            print('group %s' % group)
            last_skipped_group = group
          print('  %s (%s)' % (
              unicode_data.seq_to_string(seq),
              ', '.join(unicode_data.name(cp, 'x') for cp in seq)))
        if skip_limit >= 0 and skipcount > skip_limit:
          raise Exception('skipped too many items')
      else:
        name_data.append(_name_data(seq, seq_file))
    data.append({'category': group, 'emojis': name_data})

  outfile = path.join(dstdir, 'data.json')
  with open(outfile, 'w') as f:
    indent = 2 if pretty_print else None
    separators = None if pretty_print else (',', ':')
    json.dump(data, f, indent=indent, separators=separators)
  print('wrote %s' % outfile)