Exemplo n.º 1
def parse_iamcal_emoji_data_json(path):
	modification = time.gmtime(os.path.getmtime(path))
	emoji_version = time.strftime('%A %d %B %Y', modification)
	print(emoji_version, modification)
	source_info['emoji_version'] = emoji_version

	input_list = json_load(path)
	emoji_map = {}
	non_qualified = {}
	input_map = {}
	for info in input_list:
		code = info['unified']
		unified = True
		if '-' in code:
			code = info['non_qualified']
			if not code or '-' in code:
			unified = False

		ch = int(code, 16)
		assert ch > 0x80, info
		character = chr(ch)
		code = 'U+' + code
		name = info['name'].title().strip()
		if not name:
			name = UnicodeData.getCharacterName(character)
		short_name = info['short_name']
		short_names = info['short_names']
		assert short_name in short_names
		for sequence in short_names:
			if sequence in input_map:
				assert character == input_map[sequence], (sequence, info)
				input_map[sequence] = character
			if unified:
				emoji_map[sequence] = {
					'code': code,
					'character': character,
					'sequence': sequence,
					'name': name
				non_qualified[sequence] = {
					'code': code,
					'character': character,
					'sequence': sequence,
					'name': name

	return emoji_map, non_qualified
Exemplo n.º 2
def GenerateUnicodeControlCharacters():
    # for kUnicodeControlCharacterTable in Edit.c
    ucc_table = [
        "\u200E",  # U+200E	LRM		Left-to-right mark
        "\u200F",  # U+200F	RLM		Right-to-left mark
        "\u200D",  # U+200D	ZWJ		Zero width joiner
        "\u200C",  # U+200C	ZWNJ	Zero width non-joiner
        "\u202A",  # U+202A	LRE		Start of left-to-right embedding
        "\u202B",  # U+202B	RLE		Start of right-to-left embedding
        "\u202D",  # U+202D	LRO		Start of left-to-right override
        "\u202E",  # U+202E	RLO		Start of right-to-left override
        "\u202C",  # U+202C	PDF		Pop directional formatting
        "\u206E",  # U+206E	NADS	National digit shapes substitution
        "\u206F",  # U+206F	NODS	Nominal (European) digit shapes
        "\u206B",  # U+206B	ASS		Activate symmetric swapping
        "\u206A",  # U+206A	ISS		Inhibit symmetric swapping
        "\u206D",  # U+206D	AAFS	Activate Arabic form shaping
        "\u206C",  # U+206C	IAFS	Inhibit Arabic form shaping
        "\u001E",  # U+001E	RS		Record Separator (Block separator)
        "\u001F",  # U+001F	US		Unit Separator (Segment separator)
        "\u2028",  # U+2028	LS		Line Separator
        "\u2029",  # U+2029	PS		Paragraph Separator
        "\u200B",  # U+200B	ZWSP	Zero width space
        "\u2060",  # U+2060	WJ		Word joiner
        "\u2066",  # U+2066	LRI		Left-to-right isolate
        "\u2067",  # U+2067	RLI		Right-to-left isolate
        "\u2068",  # U+2068	FSI		First strong isolate
        "\u2069",  # U+2069	PDI		Pop directional isolate
        "\u061C",  # U+061C	ALM		Arabic letter mark

    for ucc in ucc_table:
        utf8bytes = ucc.encode('utf-8')
        utf8str = ''.join(f'\\x{b:02x}' for b in utf8bytes)
        print(utf8str, f'U+{ord(ucc):04X}', unicodedata.category(ucc),