def ReadData(stream): category_map = defaultdict(list) stream = code_generator_util.SkipLineComment(stream) stream = code_generator_util.ParseColumnStream(stream, delimiter=b'\t') stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12]) for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name, category_index) in stream: if bool(code) != bool(japanese_name): if code: logging.fatal('No Japanese name for %s found.' % code) else: logging.fatal('No Unicode code point for %s found.' % japanese_name) sys.exit(-1) if not code: # Use dummy code point code = b'0' if not pua_code: # Use dummy code point pua_code = b'0' if pua_code[0:1] == b'>': # Don't skip entires which has non-primary PUA codepoint since they also # has unique Unicode codepoint. # e.g. "BLACK SQUARE BUTTON" and "LARGE BLUE CIRCLE" pua_code = pua_code[1:] code_values = [int(c, 16) for c in re.split(br' +', code.strip())] pua_code_value = int(pua_code, 16) (category, index) = category_index.split(b'-') index = int(index) + _CATEGORY_MAP[category]['offset'] category = _CATEGORY_MAP[category]['category'] category_map[category].append( (index, code_values, pua_code_value, japanese_name, docomo_name, softbank_name, kddi_name)) return category_map
def ReadData(stream): category_map = defaultdict(list) stream = code_generator_util.SkipLineComment(stream) stream = code_generator_util.ParseColumnStream(stream, delimiter='\t') stream = code_generator_util.SelectColumn(stream, [2, 9, 10, 11, 12]) for (code, docomo_name, softbank_name, kddi_name, category_index) in stream: if not code or code[0] == '>': continue (category, index) = category_index.split('-') category_map[category].append( (index, int(code, 16), docomo_name, softbank_name, kddi_name)) return category_map
def ReadData(stream): category_map = defaultdict(list) stream = code_generator_util.SkipLineComment(stream) stream = code_generator_util.ParseColumnStream(stream, delimiter='\t') stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12]) for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name, category_index) in stream: if not pua_code or pua_code[0] == '>': continue if not code: if japanese_name: logging.fatal('No Unicode emoji code point found.') sys.exit(-1) # Use dummy code point code = '0' (category, index) = category_index.split('-') category_map[category].append( (index, int(code, 16), int(pua_code, 16), japanese_name, docomo_name, softbank_name, kddi_name)) return category_map