def detectScript(txt): charScript = [script(c) for c in txt] for i, ch in enumerate(txt): scr = charScript[i] if scr in UNKNOWN_SCRIPT: if i: scr = charScript[i - 1] else: scr = None cat = category(ch) if ch in MIRRORED and cat == "Pe": scr = None charScript[i] = scr # Any unknowns should be mapped to the _next_ script prev = None for i in range(len(txt) - 1, -1, -1): if charScript[i] is None: charScript[i] = prev else: prev = charScript[i] # There may be unknowns at the end of the string, fall back to # preceding script prev = "Zxxx" # last resort for i in range(len(txt)): if charScript[i] is None: charScript[i] = prev else: prev = charScript[i] assert None not in charScript return charScript
def unicodeScriptDirection(uv): sc = unicodedata.script(chr(uv)) if sc in DFLT_SCRIPTS: return None return unicodedata.script_horizontal_direction(sc)
def test_script(): assert unicodedata.script("a") == "Latn" assert unicodedata.script(unichr(0)) == "Zyyy" assert unicodedata.script(unichr(0x0378)) == "Zzzz" assert unicodedata.script(unichr(0x10FFFF)) == "Zzzz" # these were randomly sampled, one character per script assert unicodedata.script(unichr(0x1E918)) == 'Adlm' assert unicodedata.script(unichr(0x1170D)) == 'Ahom' assert unicodedata.script(unichr(0x145A0)) == 'Hluw' assert unicodedata.script(unichr(0x0607)) == 'Arab' assert unicodedata.script(unichr(0x056C)) == 'Armn' assert unicodedata.script(unichr(0x10B27)) == 'Avst' assert unicodedata.script(unichr(0x1B41)) == 'Bali' assert unicodedata.script(unichr(0x168AD)) == 'Bamu' assert unicodedata.script(unichr(0x16ADD)) == 'Bass' assert unicodedata.script(unichr(0x1BE5)) == 'Batk' assert unicodedata.script(unichr(0x09F3)) == 'Beng' assert unicodedata.script(unichr(0x11C5B)) == 'Bhks' assert unicodedata.script(unichr(0x3126)) == 'Bopo' assert unicodedata.script(unichr(0x1103B)) == 'Brah' assert unicodedata.script(unichr(0x2849)) == 'Brai' assert unicodedata.script(unichr(0x1A0A)) == 'Bugi' assert unicodedata.script(unichr(0x174E)) == 'Buhd' assert unicodedata.script(unichr(0x18EE)) == 'Cans' assert unicodedata.script(unichr(0x102B7)) == 'Cari' assert unicodedata.script(unichr(0x1053D)) == 'Aghb' assert unicodedata.script(unichr(0x11123)) == 'Cakm' assert unicodedata.script(unichr(0xAA1F)) == 'Cham' assert unicodedata.script(unichr(0xAB95)) == 'Cher' assert unicodedata.script(unichr(0x1F0C7)) == 'Zyyy' assert unicodedata.script(unichr(0x2C85)) == 'Copt' assert unicodedata.script(unichr(0x12014)) == 'Xsux' assert unicodedata.script(unichr(0x1082E)) == 'Cprt' assert unicodedata.script(unichr(0xA686)) == 'Cyrl' assert unicodedata.script(unichr(0x10417)) == 'Dsrt' assert unicodedata.script(unichr(0x093E)) == 'Deva' assert unicodedata.script(unichr(0x1BC4B)) == 'Dupl' assert unicodedata.script(unichr(0x1310C)) == 'Egyp' assert unicodedata.script(unichr(0x1051C)) == 'Elba' assert unicodedata.script(unichr(0x2DA6)) == 'Ethi' assert unicodedata.script(unichr(0x10AD)) == 'Geor' assert unicodedata.script(unichr(0x2C52)) == 'Glag' assert unicodedata.script(unichr(0x10343)) == 'Goth' assert unicodedata.script(unichr(0x11371)) == 'Gran' assert unicodedata.script(unichr(0x03D0)) == 'Grek' assert unicodedata.script(unichr(0x0AAA)) == 'Gujr' assert unicodedata.script(unichr(0x0A4C)) == 'Guru' assert unicodedata.script(unichr(0x23C9F)) == 'Hani' assert unicodedata.script(unichr(0xC259)) == 'Hang' assert unicodedata.script(unichr(0x1722)) == 'Hano' assert unicodedata.script(unichr(0x108F5)) == 'Hatr' assert unicodedata.script(unichr(0x05C2)) == 'Hebr' assert unicodedata.script(unichr(0x1B072)) == 'Hira' assert unicodedata.script(unichr(0x10847)) == 'Armi' assert unicodedata.script(unichr(0x033A)) == 'Zinh' assert unicodedata.script(unichr(0x10B66)) == 'Phli' assert unicodedata.script(unichr(0x10B4B)) == 'Prti' assert unicodedata.script(unichr(0xA98A)) == 'Java' assert unicodedata.script(unichr(0x110B2)) == 'Kthi' assert unicodedata.script(unichr(0x0CC6)) == 'Knda' assert unicodedata.script(unichr(0x3337)) == 'Kana' assert unicodedata.script(unichr(0xA915)) == 'Kali' assert unicodedata.script(unichr(0x10A2E)) == 'Khar' assert unicodedata.script(unichr(0x17AA)) == 'Khmr' assert unicodedata.script(unichr(0x11225)) == 'Khoj' assert unicodedata.script(unichr(0x112B6)) == 'Sind' assert unicodedata.script(unichr(0x0ED7)) == 'Laoo' assert unicodedata.script(unichr(0xAB3C)) == 'Latn' assert unicodedata.script(unichr(0x1C48)) == 'Lepc' assert unicodedata.script(unichr(0x1923)) == 'Limb' assert unicodedata.script(unichr(0x1071D)) == 'Lina' assert unicodedata.script(unichr(0x100EC)) == 'Linb' assert unicodedata.script(unichr(0xA4E9)) == 'Lisu' assert unicodedata.script(unichr(0x10284)) == 'Lyci' assert unicodedata.script(unichr(0x10926)) == 'Lydi' assert unicodedata.script(unichr(0x11161)) == 'Mahj' assert unicodedata.script(unichr(0x0D56)) == 'Mlym' assert unicodedata.script(unichr(0x0856)) == 'Mand' assert unicodedata.script(unichr(0x10AF0)) == 'Mani' assert unicodedata.script(unichr(0x11CB0)) == 'Marc' assert unicodedata.script(unichr(0x11D28)) == 'Gonm' assert unicodedata.script(unichr(0xABDD)) == 'Mtei' assert unicodedata.script(unichr(0x1E897)) == 'Mend' assert unicodedata.script(unichr(0x109B0)) == 'Merc' assert unicodedata.script(unichr(0x10993)) == 'Mero' assert unicodedata.script(unichr(0x16F5D)) == 'Plrd' assert unicodedata.script(unichr(0x1160B)) == 'Modi' assert unicodedata.script(unichr(0x18A8)) == 'Mong' assert unicodedata.script(unichr(0x16A48)) == 'Mroo' assert unicodedata.script(unichr(0x1128C)) == 'Mult' assert unicodedata.script(unichr(0x105B)) == 'Mymr' assert unicodedata.script(unichr(0x108AF)) == 'Nbat' assert unicodedata.script(unichr(0x19B3)) == 'Talu' assert unicodedata.script(unichr(0x1143D)) == 'Newa' assert unicodedata.script(unichr(0x07F4)) == 'Nkoo' assert unicodedata.script(unichr(0x1B192)) == 'Nshu' assert unicodedata.script(unichr(0x169C)) == 'Ogam' assert unicodedata.script(unichr(0x1C56)) == 'Olck' assert unicodedata.script(unichr(0x10CE9)) == 'Hung' assert unicodedata.script(unichr(0x10316)) == 'Ital' assert unicodedata.script(unichr(0x10A93)) == 'Narb' assert unicodedata.script(unichr(0x1035A)) == 'Perm' assert unicodedata.script(unichr(0x103D5)) == 'Xpeo' assert unicodedata.script(unichr(0x10A65)) == 'Sarb' assert unicodedata.script(unichr(0x10C09)) == 'Orkh' assert unicodedata.script(unichr(0x0B60)) == 'Orya' assert unicodedata.script(unichr(0x104CF)) == 'Osge' assert unicodedata.script(unichr(0x104A8)) == 'Osma' assert unicodedata.script(unichr(0x16B12)) == 'Hmng' assert unicodedata.script(unichr(0x10879)) == 'Palm' assert unicodedata.script(unichr(0x11AF1)) == 'Pauc' assert unicodedata.script(unichr(0xA869)) == 'Phag' assert unicodedata.script(unichr(0x10909)) == 'Phnx' assert unicodedata.script(unichr(0x10B81)) == 'Phlp' assert unicodedata.script(unichr(0xA941)) == 'Rjng' assert unicodedata.script(unichr(0x16C3)) == 'Runr' assert unicodedata.script(unichr(0x0814)) == 'Samr' assert unicodedata.script(unichr(0xA88C)) == 'Saur' assert unicodedata.script(unichr(0x111C8)) == 'Shrd' assert unicodedata.script(unichr(0x1045F)) == 'Shaw' assert unicodedata.script(unichr(0x115AD)) == 'Sidd' assert unicodedata.script(unichr(0x1D8C0)) == 'Sgnw' assert unicodedata.script(unichr(0x0DB9)) == 'Sinh' assert unicodedata.script(unichr(0x110F9)) == 'Sora' assert unicodedata.script(unichr(0x11A60)) == 'Soyo' assert unicodedata.script(unichr(0x1B94)) == 'Sund' assert unicodedata.script(unichr(0xA81F)) == 'Sylo' assert unicodedata.script(unichr(0x0740)) == 'Syrc' assert unicodedata.script(unichr(0x1714)) == 'Tglg' assert unicodedata.script(unichr(0x1761)) == 'Tagb' assert unicodedata.script(unichr(0x1965)) == 'Tale' assert unicodedata.script(unichr(0x1A32)) == 'Lana' assert unicodedata.script(unichr(0xAA86)) == 'Tavt' assert unicodedata.script(unichr(0x116A5)) == 'Takr' assert unicodedata.script(unichr(0x0B8E)) == 'Taml' assert unicodedata.script(unichr(0x1754D)) == 'Tang' assert unicodedata.script(unichr(0x0C40)) == 'Telu' assert unicodedata.script(unichr(0x07A4)) == 'Thaa' assert unicodedata.script(unichr(0x0E42)) == 'Thai' assert unicodedata.script(unichr(0x0F09)) == 'Tibt' assert unicodedata.script(unichr(0x2D3A)) == 'Tfng' assert unicodedata.script(unichr(0x114B0)) == 'Tirh' assert unicodedata.script(unichr(0x1038B)) == 'Ugar' assert unicodedata.script(unichr(0xA585)) == 'Vaii' assert unicodedata.script(unichr(0x118CF)) == 'Wara' assert unicodedata.script(unichr(0xA066)) == 'Yiii' assert unicodedata.script(unichr(0x11A31)) == 'Zanb'
def script(value): char = chr(value) return unicodedata.script_name(unicodedata.script(char), default="Unknown")
cps_to_value = dict[range, str]() for line in path.read_text().splitlines(): if not (content := line.partition("#")[0].strip()): continue field_0, value = [i.strip() for i in content.split(";")] start, _, stop = field_0.partition("..") cps_to_value[range(int(start, 16), int(stop or start, 16) + 1)] = value script_to_value_to_cps = dict[str, dict[str, list[int]]]() for cp, value in sorted( (i, v) for k, v in cps_to_value.items() for i in k): value_to_cps = script_to_value_to_cps.setdefault( unicodedata.script_name(unicodedata.script(chr(cp))), {}) value_to_cps.setdefault(value, list()).append(cp) # yaml.add_representer( # int, # lambda dumper, data: dumper.represent_scalar("tag:yaml.org,2002:int", f"0x{data:04X}"), # ) path = directory / "tests" / (property_name + ".yaml") with path.open("w") as f: yaml.dump( { k: { value: [{ i: unicodedata.name(chr(i)) } for i in cps] for value, cps in v.items()
], stdin=subprocess.PIPE) process.communicate( json.dumps(font, ensure_ascii=False, separators=(',', ':')).encode()) baseName = sys.argv[1] scaleZh = int(sys.argv[2]) scaleEn = int(sys.argv[3]) font = ReadFont("src/font/{}.otf".format(baseName)) del font['GSUB'] del font['GPOS'] del font['GDEF'] Gc(font) scriptMap = {} for cp, name in font['cmap'].items(): cp = int(cp) isZh = script(chr(cp)) == "Hani" scriptMap[name] = scriptMap.get(name) or isZh for name, glyph in font['glyf'].items(): if not scriptMap.get(name): Transform(glyph, scaleEn / scaleZh, 0, 0, scaleEn / scaleZh, 0, 0, True) WriteFont(font, "build/{}.otf".format(ResolveFileName(baseName, scaleZh, scaleEn)))