示例#1
0
def ReadData(stream):
  category_map = defaultdict(list)
  stream = code_generator_util.SkipLineComment(stream)
  stream = code_generator_util.ParseColumnStream(stream, delimiter=b'\t')
  stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12])
  for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name,
       category_index) in stream:
    if bool(code) != bool(japanese_name):
      if code:
        logging.fatal('No Japanese name for %s found.' % code)
      else:
        logging.fatal('No Unicode code point for %s found.' % japanese_name)
      sys.exit(-1)
    if not code:
      # Use dummy code point
      code = b'0'
    if not pua_code:
      # Use dummy code point
      pua_code = b'0'
    if pua_code[0:1] == b'>':
      # Don't skip entires which has non-primary PUA codepoint since they also
      # has unique Unicode codepoint.
      # e.g. "BLACK SQUARE BUTTON" and "LARGE BLUE CIRCLE"
      pua_code = pua_code[1:]

    code_values = [int(c, 16) for c in re.split(br' +', code.strip())]
    pua_code_value = int(pua_code, 16)
    (category, index) = category_index.split(b'-')
    index = int(index) + _CATEGORY_MAP[category]['offset']
    category = _CATEGORY_MAP[category]['category']
    category_map[category].append(
        (index, code_values, pua_code_value,
         japanese_name, docomo_name, softbank_name, kddi_name))
  return category_map
示例#2
0
def ReadData(stream):
    category_map = defaultdict(list)
    stream = code_generator_util.SkipLineComment(stream)
    stream = code_generator_util.ParseColumnStream(stream, delimiter='\t')
    stream = code_generator_util.SelectColumn(stream, [2, 9, 10, 11, 12])
    for (code, docomo_name, softbank_name, kddi_name,
         category_index) in stream:
        if not code or code[0] == '>':
            continue
        (category, index) = category_index.split('-')
        category_map[category].append(
            (index, int(code, 16), docomo_name, softbank_name, kddi_name))
    return category_map
示例#3
0
def ReadData(stream):
    category_map = defaultdict(list)
    stream = code_generator_util.SkipLineComment(stream)
    stream = code_generator_util.ParseColumnStream(stream, delimiter='\t')
    stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12])
    for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name,
         category_index) in stream:
        if not pua_code or pua_code[0] == '>':
            continue
        if not code:
            if japanese_name:
                logging.fatal('No Unicode emoji code point found.')
                sys.exit(-1)
            # Use dummy code point
            code = '0'

        (category, index) = category_index.split('-')
        category_map[category].append(
            (index, int(code, 16), int(pua_code, 16), japanese_name,
             docomo_name, softbank_name, kddi_name))
    return category_map