def render(self, uids, ftml, keyUID=0, descUIDs=None): """ general purpose (but not required) function to generate ftml for a character sequence """ if len(uids) == 0: return # Make a copy so we don't affect caller uids = list(uids) # Remember first uid and original length for later startUID = uids[0] uidLen = len(uids) # if keyUID wasn't supplied, use startUID if keyUID == 0: keyUID = startUID # Construct label from uids: if not descUIDs: descUIDs = uids label = '\n'.join(['U+{0:04X}'.format(u) for u in descUIDs]) # Construct comment from glyph names: comment = ' '.join([self._charFromUID[u].basename for u in descUIDs]) if get_ucd(startUID, 'gc') == 'Mn': # First char is a NSM... prefix a suitable base uids.insert(0, self.diacBase) elif get_ucd(startUID, 'WSpace'): # First char is whitespace -- prefix with baseline brackets: uids.insert(0, 0xF130) lastNonMark = [x for x in uids if get_ucd(x, 'gc') != 'Mn'][-1] if get_ucd(lastNonMark, 'WSpace'): # Last non-mark is whitespace -- append baseline brackets: uids.append(0xF131) s = ''.join([chr(uid) for uid in uids]) if uidLen > 1: ftml.addToTest(keyUID, s, label=label, comment=comment) else: ftml.addToTest( keyUID, s, comment=comment) # label will be set based on keyUID
def __init__(self, uids, basename, logger): self.logger = logger self.uids = uids self.basename = basename # a couple of properties based on the first uid: try: self.general = get_ucd(uids[0],'gc') except KeyError: self.logger.log('USV %04X not defined; no properties known' % uids[0], 'W') self.feats = set() # feat tags that affect this char self.langs = set() # lang tags that affect this char self.altnames = {} # alternate glyph names.
def intersperse(main, *extras): """Takes a list of strings. Intersperse substrings from extras into the clusters of the string such that the substrings are ordered according to normalization rules. extras is list of tuples (str, combiningorder)""" def isbase(char): return get_ucd(char, 'gc').startswith("L") res = [] extras = list(extras) #extras.sort(cmp=lambda a,b : cmp(a[1], b[1])) for m in main: groups = [] base = "" for v in groupby(m, lambda x: get_ucd(x, 'gc')[0]): k = v[0] d = "".join(v[1]) if k == "L": if base: groups.extend((base, "")) for c in d[:-1]: groups.extend((c, "")) base = d[-1] elif k == "M": base = base + d else: groups.extend((base, d)) base = "" if base: groups.extend((base, "")) # groups is now 2n list where list[n] is base+dias, list[n+1] is punc separators for i in range(0, len(groups), 2): dias = list(groups[i][1:]) orders = [get_ucd(c, 'ccc') for c in dias] bases = list(zip(dias, orders)) new = sorted(bases + extras, cmp=lambda a, b: cmp(a[1], b[1])) results = list(zip(*new)) groups[i] = "".join([groups[i][0]] + list(results[0])) res.append("".join(groups)) return res
def __init__(self, uid, basename, logger): self.logger = logger self.uid = uid self.basename = basename try: self.general = get_ucd(uid,'gc') except KeyError: self.logger.log('USV %04X not defined; no properties known' % uid, 'W') self.feats = set() # feat tags that affect this char self.langs = set() # lang tags that affect this char self.aps = set() self.altnames = {} # alternate glyph names. # the above is a dict keyed by either: # lang tag e.g., 'ur', or # feat tag and value, e.g., 'cv24=3' # and returns a the glyphname for that alternate. # Additional info from UFO: self.takesMarks = self.isMark = self.isBase = False
def render(self, uids, ftml, keyUID = 0, addBreaks = True, rtl = None, dualJoinMode = 3, label = None, comment = None): """ general purpose (but not required) function to generate ftml for a character sequence """ if len(uids) == 0: return # Make a copy so we don't affect caller uids = list(uids) # Remember first uid and original length for later startUID = uids[0] uidLen = len(uids) # if keyUID wasn't supplied, use startUID if keyUID == 0: keyUID = startUID if label is None: # Construct label from uids: label = '\n'.join(['U+{0:04X}'.format(u) for u in uids]) if comment is None: # Construct comment from glyph names: comment = ' '.join([self._charFromUID[u].basename for u in uids]) # see if uid list includes a mirrored char hasMirrored = bool(len([x for x in uids if get_ucd(x,'Bidi_M')])) # Analyze first and last joining char joiningChars = [x for x in uids if get_ucd(x, 'jt') != 'T'] if len(joiningChars): # If first or last non-TRANSPARENT char is a joining char, then we need to emit examples with zwj # Assumes any non-TRANSPARENT char that is bc != L must be a rtl character of some sort uid = joiningChars[0] zwjBefore = (get_ucd(uid,'jt') == 'D' or (get_ucd(uid,'bc') == 'L' and get_ucd(uid,'jt') == 'L') or (get_ucd(uid,'bc') != 'L' and get_ucd(uid,'jt') == 'R')) uid = joiningChars[-1] zwjAfter = (get_ucd(uid,'jt') == 'D' or (get_ucd(uid,'bc') == 'L' and get_ucd(uid,'jt') == 'R') or (get_ucd(uid,'bc') != 'L' and get_ucd(uid,'jt') == 'L')) else: zwjBefore = zwjAfter = False if get_ucd(startUID,'gc') == 'Mn': # First char is a NSM... prefix a suitable base uids.insert(0, self.diacBase) zwjBefore = False # No longer any need to put zwj before elif get_ucd(startUID, 'WSpace'): # First char is whitespace -- prefix with baseline brackets: uids.insert(0, 0xF130) lastNonMark = [x for x in uids if get_ucd(x,'gc') != 'Mn'][-1] if get_ucd(lastNonMark, 'WSpace'): # Last non-mark is whitespace -- append baseline brackets: uids.append(0xF131) s = ''.join([chr(uid) for uid in uids]) if zwjBefore or zwjAfter: # Show contextual forms: # Start with isolate t = u'{0} '.format(s) if zwjBefore and zwjAfter: # For sequences that show dual-joining behavior, what we show depends on dualJoinMode: if dualJoinMode & 1: # show initial, medial, final separated by space: t += u'{0}\u200D \u200D{0}\u200D \u200D{0} '.format(s) if dualJoinMode & 2: # show 3 joined forms in sequence: t += u'{0}{0}{0} '.format(s) elif zwjAfter: t += u'{0}\u200D '.format(s) elif zwjBefore: t += u'\u200D{0} '.format(s) if addBreaks: ftml.closeTest() ftml.addToTest(keyUID, t, label = label, comment = comment, rtl = rtl) if addBreaks: ftml.closeTest() elif hasMirrored and self.rtlEnable: # Contains mirrored and rtl enabled: if addBreaks: ftml.closeTest() ftml.addToTest(keyUID, u'{0} LTR: \u202A{0}\u202C RTL: \u202B{0}\u202C'.format(s), label = label, comment = comment, rtl = rtl) if addBreaks: ftml.closeTest() # elif is LRE, RLE, PDF # elif is LRI, RLI, FSI, PDI elif uidLen > 1: ftml.addToTest(keyUID, s , label = label, comment = comment, rtl = rtl) else: ftml.addToTest(keyUID, s , comment = comment, rtl = rtl)
def doit(args): logger = args.logger # Read input csv builder = FB.FTMLBuilder(logger, incsv=args.input, fontcode=args.fontcode, font=args.ifont, ap=args.ap, rtlenable=args.rtl, langs=args.langs) # Override default base (25CC) for displaying combining marks: builder.diacBase = 0x0B95 # ka # Specify blocks of primary and secondary scripts comb = range(0x0300, 0x036F+1) taml = range(0x0B80, 0x0BFF+1) deva = range(0x0900, 0x0097F+1) vedic = range(0x1CD0, 0x1CFF+1) gran = range(0x11300, 0x1137F+1) block = list(comb) + list(taml) + list(deva) + list(vedic) + list(gran) # Useful ranges of codepoints uids = sorted(builder.uids()) vowels = [uid for uid in uids if get_ucd(uid, 'InSC') == 'Vowel_Independent'] consonants = [uid for uid in uids if get_ucd(uid, 'InSC') == 'Consonant'] matras = [uid for uid in uids if 'VOWEL SIGN' in get_ucd(uid, 'na')] digits = [uid for uid in uids if builder.char(uid).general == 'Nd' and uid in block] # Initialize FTML document: # Default name for test: AllChars or something based on the csvdata file: test = args.test or 'AllChars (NG)' widths = None if args.width: try: width, units = re.match(r'(\d+)(.*)$', args.width).groups() if len(args.fontsrc): width = int(round(int(width)/len(args.fontsrc))) widths = {'string': f'{width}{units}'} logger.log(f'width: {args.width} --> {widths["string"]}', 'I') except: logger.log(f'Unable to parse width argument "{args.width}"', 'W') # split labels from fontsource parameter fontsrc = [] labels = [] for sl in args.fontsrc: try: s, l = sl.split('=',1) fontsrc.append(s) labels.append(l) except ValueError: fontsrc.append(sl) labels.append(None) ftml = FB.FTML(test, logger, rendercheck=not args.norendercheck, fontscale=args.scale, widths=widths, xslfn=args.xsl, fontsrc=fontsrc, fontlabel=labels, defaultrtl=args.rtl) if test.lower().startswith("allchars"): # all chars that should be in the font: ftml.startTestGroup('Encoded characters') for uid in uids: if uid < 32: continue c = builder.char(uid) # iterate over all permutations of feature settings that might affect this character: for featlist in builder.permuteFeatures(uids = (uid,)): ftml.setFeatures(featlist) builder.render((uid,), ftml) # Don't close test -- collect consecutive encoded chars in a single row ftml.clearFeatures() if len(c.langs): for langID in builder.allLangs: ftml.setLang(langID) builder.render((uid,), ftml) ftml.clearLang() # Add unencoded specials and ligatures -- i.e., things with a sequence of USVs in the glyph_data: ftml.startTestGroup('Specials & ligatures from glyph_data') for basename in builder.specials(): special = builder.special(basename) # iterate over all permutations of feature settings that might affect this special for featlist in builder.permuteFeatures(uids = special.uids): ftml.setFeatures(featlist) builder.render(special.uids, ftml) # close test so each special is on its own row: ftml.closeTest() ftml.clearFeatures() if len(special.langs): for langID in builder.allLangs: ftml.setLang(langID) builder.render(special.uids, ftml) ftml.closeTest() ftml.clearLang() # Characters used to create SILE test data ftml.startTestGroup('Proof') for uid in vowels: builder.render((uid,), ftml) ftml.closeTest() for uid in matras: builder.render((uid,), ftml) ftml.closeTest() for uid in consonants: builder.render((uid,), ftml) ftml.closeTest() for uid in digits: builder.render((uid,), ftml) ftml.closeTest() below_marks = (0x0323, 0x1133B, 0x1133C) # 0x1CDC, 0x1CDD, 0x1CDE, 0x1CDF above_marks = (0x0307, 0x0B82, 0x0BCD) # 0x1CDA marks = below_marks + above_marks if test.lower().startswith("diac"): # Diac attachment: # Representative base and diac chars: repDiac = list(filter(lambda x: x in builder.uids(), marks)) repBase = list(filter(lambda x: x in builder.uids(), (0x0B95, 0x0B85))) ftml.startTestGroup('Representative diacritics on all bases that take diacritics') for uid in uids: # ignore bases outside of the primary script: if uid not in block: continue c = builder.char(uid) # Always process Lo, but others only if that take marks: if c.general == 'Lo' or c.isBase: for diac in repDiac: for featlist in builder.permuteFeatures(uids = (uid,diac)): ftml.setFeatures(featlist) # Don't automatically separate connecting or mirrored forms into separate lines: builder.render((uid,diac), ftml, addBreaks = False) ftml.clearFeatures() ftml.closeTest() ftml.startTestGroup('All diacritics on representative bases') for uid in uids: # ignore bases outside of the primary and Latin scripts: if uid < 0x0300 or uid in range(0xFE00, 0xFE10): continue c = builder.char(uid) if c.general == 'Mn': for base in repBase: for featlist in builder.permuteFeatures(uids = (uid,base)): ftml.setFeatures(featlist) builder.render((base,uid), ftml, keyUID = uid, addBreaks = False) ftml.clearFeatures() ftml.closeTest() if test.lower().startswith("matras"): # Combinations with matras: ftml.startTestGroup('Consonants with matras') for c in consonants: for m in matras: builder.render((c,m), ftml, label=f'{c:04X}', comment=builder.char(c).basename) ftml.closeTest() if test.lower().startswith("nuktas"): # Nuktas: ftml.startTestGroup('Nuktas') test_name = test.lower().split()[0] with open(f'tests/{test_name}.template') as nuktas: line_number = 0 for line in nuktas: line = line.strip() line_number += 1 if line == '': continue for n in below_marks: for v in above_marks: text = line.replace('N', chr(n)) text = text.replace('V', chr(v)) ftml.addToTest(None, text, label=f'line {line_number}', comment=f'n={n:04X} v={v:04X}') ftml.closeTest() # Write the output ftml file ftml.writeFile(args.output)
def isbase(char): return get_ucd(char, 'gc').startswith("L")
def doit(args): logger = args.logger # Read input csv builder = FB.FTMLBuilder(logger, incsv=args.input, fontcode=args.fontcode, font=args.ifont, ap=args.ap, rtlenable=True, langs=args.langs) # Override default base (25CC) for displaying combining marks builder.diacBase = 0x0628 # beh def basenameSortKey(uid: int): return builder.char(uid).basename.lower() # Initialize FTML document: test = args.test or "AllChars (NG)" # Default to AllChars ftml = FB.FTML(test, logger, rendercheck=not args.norendercheck, fontscale=args.scale, xslfn=args.xsl, fontsrc=args.fontsrc, defaultrtl=args.rtl) if test.lower().startswith("allchars"): # all chars that should be in the font: ftml.startTestGroup('Encoded characters') for uid in sorted(builder.uids()): if uid < 32: continue c = builder.char(uid) for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) builder.render((uid, ), ftml) ftml.clearFeatures() if len(c.langs): for langID in builder.allLangs: ftml.setLang(langID) builder.render((uid, ), ftml) ftml.clearLang() # Add specials and ligatures that were in the glyph_data: ftml.startTestGroup('Specials & ligatures from glyph_data') for basename in sorted(builder.specials()): special = builder.special(basename) for featlist in builder.permuteFeatures(uids=special.uids, feats=special.feats): ftml.setFeatures(featlist) builder.render(special.uids, ftml) ftml.closeTest() ftml.clearFeatures() if len(special.langs): for langID in builder.allLangs: ftml.setLang(langID) builder.render(special.uids, ftml) ftml.closeTest() ftml.clearLang() # Add Lam-Alef data manually ftml.startTestGroup('Lam-Alef') lamlist = list( filter(lambda x: x in builder.uids(), (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6))) aleflist = list( filter(lambda x: x in builder.uids(), (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673, 0x0675, 0x0773, 0x0774))) for lam in lamlist: for alef in aleflist: for featlist in builder.permuteFeatures(uids=(lam, alef)): ftml.setFeatures(featlist) builder.render((lam, alef), ftml) ftml.closeTest() ftml.clearFeatures() if lam == 0x0644 and 'cv02' in builder.features: # Also test lam with hamza above for warsh variants for featlist in builder.permuteFeatures(uids=(lam, 0x0654, alef), feats=('cv02', )): ftml.setFeatures(featlist) builder.render((lam, 0x0654, alef), ftml) ftml.closeTest() ftml.clearFeatures() # Add Allah data manually ftml.startTestGroup('Allah ligatures') ftml.addToTest(0xFDF2, r"\uFDF2", comment="Rule 1") ftml.closeTest() ftml.addToTest(None, r"\u0641\u0644\u0644\u0647", label="f-l-l-h", comment="shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0651\u0670\u0647", label="a-l-l-s-da-hf", comment="Rule 2 (daggeralef)") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0670\u0651\u0647", label="a-l-l-da-s-hf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0651\u0670\u06C1", label="a-l-l-s-da-hgf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0670\u0651\u06C1", label="a-l-l-da-s-hgf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0651\u064E\u0647", label="a-l-l-s-f-hf", comment="Rule 2 (fatha)") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u064E\u0651\u0647", label="a-l-l-f-s-hf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0651\u064E\u06C1", label="a-l-l-s-f-hgf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u064E\u0651\u06C1", label="a-l-l-f-s-hgf") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u06EB\u0644\u064E\u0651\u06C1", label="a-l-M-l-s-da-hgf", comment="Rule 2c: shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0641\u0644\u0644\u064E\u0651\u06C1", label="f-l-l-s-da-hgf", comment="Rule 2d: non-alef") ftml.closeTest() ftml.addToTest(None, r"\u0641\u0627\u0644\u0644\u064E\u0651\u06C1", label="f-a-l-l-s-da-hgf", comment="Rule 2d: not isolate alef") ftml.closeTest() ftml.addToTest(None, r"\u0627\u06EB\u0644\u0644\u064E\u0651\u06C1", label="a-M-l-l-s-da-hgf", comment="Rule 2d: Mark") ftml.closeTest() ftml.addToTest(None, r" \u0644\u0644\u0651\u064E\u0647", label="space-l-l-s-da-hf", comment="Rule 2d: shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u0644\u0647", label="a-l-l-h", comment="Rule 3") ftml.closeTest() ftml.addToTest(None, r"\u0622\u0644\u0644\u0647", label="aM-l-l-h") ftml.closeTest() ftml.addToTest(None, r"\u0623\u0644\u0644\u0647", label="aH-l-l-h") ftml.closeTest() ftml.addToTest(None, r"\u0671\u0644\u0644\u0647", label="aW-l-l-h", comment="won't work") ftml.closeTest() ftml.addToTest(None, r"\u0627\u06EB\u0644\u0644\u0647", label="a-M-l-l-h") ftml.closeTest() ftml.addToTest(None, r"\u0641\u0627\u0644\u0644\u0647", label="f-a-l-l-h", comment="Rule 3a: shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u06EB\u0644\u0647", label="a-l-M-l-h", comment="Rule 3d: shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u200D\u0644\u0647", label="a-l-zwj-l-h", comment="Rule 4a: shouldn't match") ftml.closeTest() ftml.addToTest(None, r"\u0627\u0644\u200D\u0644\u0651\u0670\u0647", label="a-l-zwj-l-s-da-h", comment="Rule 4a: shouldn't match") ftml.closeTest() if test.lower().startswith("al sorted"): # all AL chars, sorted by shape: ftml.startTestGroup('Arabic Letters') for uid in sorted(filter(lambda u: get_ucd(u, 'bc') == 'AL', builder.uids()), key=joinGoupSortKey): c = builder.char(uid) for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) builder.render((uid, ), ftml) ftml.clearFeatures() if len(c.langs): for langID in builder.allLangs: ftml.setLang(langID) builder.render((uid, ), ftml) ftml.clearLang() if test.lower().startswith("diac"): # Diac attachment: doLongTest = 'short' not in test.lower() # Representative base and diac chars: if doLongTest: repDiac = list( filter(lambda x: x in builder.uids(), (0x064E, 0x0650, 0x065E, 0x0670, 0x0616, 0x06E3, 0x08F0, 0x08F2))) repBase = list( filter( lambda x: x in builder.uids(), (0x0627, 0x0628, 0x062B, 0x0647, 0x064A, 0x77F, 0x08AC))) lamlist = list( filter( lambda x: x in builder.uids(), (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6))) aleflist = list( filter(lambda x: x in builder.uids(), (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673, 0x0675, 0x0773, 0x0774))) else: repDiac = list( filter(lambda x: x in builder.uids(), (0x064E, 0x0650, 0x0670))) repBase = list( filter(lambda x: x in builder.uids(), (0x0627, 0x0628))) lamlist = list( filter( lambda x: x in builder.uids(), (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6))) aleflist = list( filter(lambda x: x in builder.uids(), (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673, 0x0675, 0x0773, 0x0774))) ftml.startTestGroup( 'Representative diacritics on all bases that take diacritics') for uid in sorted(builder.uids()): if uid < 32 or uid in (0xAA, 0xBA): continue c = builder.char(uid) # Always process Lo, but others only if that take marks: if c.general == 'Lo' or c.isBase: for diac in repDiac: for featlist in builder.permuteFeatures(uids=(uid, diac)): ftml.setFeatures(featlist) builder.render((uid, diac), ftml, addBreaks=False, dualJoinMode=2) if doLongTest: if diac != 0x0651: # If not shadda # include shadda, in either order: builder.render((uid, diac, 0x0651), ftml, addBreaks=False, dualJoinMode=2) builder.render((uid, 0x0651, diac), ftml, addBreaks=False, dualJoinMode=2) if diac != 0x0654: # If not hamza above # include hamza above, in either order: builder.render((uid, diac, 0x0654), ftml, addBreaks=False, dualJoinMode=2) builder.render((uid, 0x0654, diac), ftml, addBreaks=False, dualJoinMode=2) ftml.clearFeatures() ftml.closeTest() ftml.startTestGroup('All Arabic diacritics on representative bases') for uid in sorted(builder.uids()): # ignore non-ABS marks if uid < 0x600 or uid in range(0xFE00, 0xFE10): continue c = builder.char(uid) if c.general == 'Mn': for base in repBase: for featlist in builder.permuteFeatures(uids=(uid, base)): ftml.setFeatures(featlist) builder.render((base, uid), ftml, keyUID=uid, addBreaks=False, dualJoinMode=2) if doLongTest: if uid != 0x0651: # if not shadda # include shadda, in either order: builder.render((base, uid, 0x0651), ftml, keyUID=uid, addBreaks=False, dualJoinMode=2) builder.render((base, 0x0651, uid), ftml, keyUID=uid, addBreaks=False, dualJoinMode=2) if diac != 0x0670: # If not superscript alef # include superscript alef, in either order: builder.render((uid, diac, 0x0670), ftml, addBreaks=False, dualJoinMode=2) builder.render((uid, 0x0670, diac), ftml, addBreaks=False, dualJoinMode=2) ftml.clearFeatures() ftml.closeTest() ftml.startTestGroup('Special cases') builder.render((0x064A, 0x064E), ftml) # Yeh + Fatha should keep dots builder.render((0x064A, 0x0654), ftml) # Yeh + Hamza should loose dots ftml.closeTest() ftml.startTestGroup('LamAlef ligatures') diaB = 0x064D diaA = 0x064B for lam in lamlist: for alef in aleflist: for featlist in builder.permuteFeatures(uids=(lam, alef)): ftml.setFeatures(featlist) builder.render((lam, alef), ftml, addBreaks=False) builder.render((lam, diaA, alef, diaA), ftml, addBreaks=False) builder.render((lam, diaB, alef), ftml, addBreaks=False) builder.render((lam, alef, diaB), ftml, addBreaks=False) builder.render((lam, diaB, alef, diaB), ftml, addBreaks=False) ftml.clearFeatures() ftml.closeTest() if test.lower().startswith("subtending"): # Generates sample data for all subtending marks. Data includes sequences of 0 to n+1 # digits, where n is the maximum expected to be supported on the mark. Latin, Arbic-Indic, # and Extended Arabic-Indic digits are included. for digitSample in filter(lambda x: x in builder.uids(), (0x0032, 0x0668, 0x06F8)): digitOne = (digitSample & 0xFFF0) + 1 for uid, lgt in filter(lambda x: x[0] in builder.uids(), ([0x600, 3], [0x0601, 4], [0x0602, 2], [ 0x0603, 4 ], [0x0604, 4], [0x0605, 4], [0x06DD, 3])): c = chr(uid) label = "U+{0:04X} {1}".format( uid, 'latn' if digitOne == 0x0031 else 'arab' if digitOne == 0x0661 else 'urdu') comment = builder.char(uid).basename for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) ftml.addToTest(uid, "\u0628" + c + "\u0645", label, comment) for ln in range(1, lgt + 1): ftml.addToTest(uid, c + chr(digitSample) * ln) ftml.addToTest(uid, c + chr(digitOne) + chr(digitOne + 1)) ftml.clearFeatures() ftml.closeTest() if uid == 0x06DD and digitOne == 0x06F1: # Extra items for Eastern digits for featlist in builder.permuteFeatures(uids=(uid, 0x06F7)): ftml.setFeatures(featlist) ftml.addToTest(uid, c + "\u06F4\u06F6\u06F7", label, "4 6 7") ftml.clearFeatures() for langID in builder.allLangs: ftml.setLang(langID) for featlist in ((None, ), (['cv80', '1'], ), (['cv80', '2'], )): ftml.setFeatures(featlist) ftml.addToTest(uid, c + "\u06F4\u06F6\u06F7", label, "4 6 7") ftml.clearFeatures() ftml.clearLang() ftml.closeTest() if test.lower().startswith("showinv"): # Sample data for chars that have a "show invisible" feature # The 'r', 'a', 'ra' indicates whether this is standard in Roman fonts, Arabic fonts, or both. invlist = [(0x034F, 'r'), (0x061C, 'a'), (0x200B, 'r'), (0x200C, 'ra'), (0x200D, 'ra'), (0x200E, 'ra'), (0x200F, 'ra'), (0x202A, 'ra'), (0x202B, 'ra'), (0x202C, 'ra'), (0x202D, 'ra'), (0x202E, 'ra'), (0x202E, 'r'), (0x2060, 'r'), (0x2061, 'r'), (0x2062, 'r'), (0x2063, 'r'), (0x2066, 'a'), (0x2067, 'a'), (0x2068, 'a'), (0x2069, 'a'), (0xFE00, 'ra'), (0xFE01, 'ra'), (0xFE02, 'ra'), (0xFE03, 'ra'), (0xFE04, 'ra'), (0xFE05, 'ra'), (0xFE06, 'ra'), (0xFE07, 'ra'), (0xFE08, 'ra'), (0xFE09, 'ra'), (0xFE0A, 'ra'), (0xFE0B, 'ra'), (0xFE0C, 'ra'), (0xFE0D, 'ra'), (0xFE0E, 'ra'), (0xFE0F, 'ra')] featlist = (('invs', '1'), ('ss06', '1')) ftml.setFeatures(featlist) for inv in invlist: uid = inv[0] c = chr(uid) label = 'U+{0:04X} ({1})'.format(uid, inv[1]) comment = builder.char( uid).basename if uid in builder.uids() else "" ftml.addToTest(uid, " " + c + " ", label, comment) ftml.closeTest() ftml.clearFeatures() if test.lower().startswith('daggeralef'): for uid in sorted(builder.uids(), key=joinGoupSortKey): if get_ucd(uid, 'jg') not in ('Sad', 'Seen', 'Yeh'): # If not Yeh, Sad or seen joining group we're not interested continue for featlist in builder.permuteFeatures(uids=(uid, 0x0670)): ftml.setFeatures(featlist) builder.render((uid, 0x0670), ftml) ftml.clearFeatures() ftml.closeTest() if test.lower().startswith('kern'): rehs = sorted( filter(lambda uid: get_ucd(uid, 'jg') == 'Reh', builder.uids())) waws = sorted( filter(lambda uid: get_ucd(uid, 'jg') == 'Waw', builder.uids())) uids = sorted(filter( lambda uid: get_ucd(uid, 'jt') in ('D', 'R') or uid == 0xFD3E, builder.uids()), key=joinGoupSortKey) # NB: I wondered about including punctuation, i.e., get_ucd(uid, 'gc').startswith('P'), but the default # spacing is pretty good and graphite collision avoidance makes it worse, so the only one we need is FDFE dbehf = chr(0x066E) + chr(0x200D) # dotless beh final alef = chr(0x0627) # alef zwj = chr(0x200D) # Zero width joiner ma = 0x064B # Mark above (fathatan) mb = 0x064D # chr(0x064D) # Mark below (kasratan) if "data" not in test.lower(): ftml.startTestGroup('All the rehs') for uid in rehs: c = chr(uid) label = 'U+{0:04X}'.format(uid) comment = builder.char(uid).basename for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) ftml.addToTest(uid, c + dbehf + ' ' + zwj + c + dbehf, label, comment) ftml.clearFeatures() ftml.closeTest() ftml.startTestGroup('All the waws') for uid in waws: c = chr(uid) label = 'U+{0:04X}'.format(uid) comment = builder.char(uid).basename for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) ftml.addToTest(uid, c + dbehf + ' ' + zwj + c + dbehf, label, comment) ftml.clearFeatures() ftml.closeTest() # reh or waw plus the others for uid1 in (0x631, 0x648): # (reh, waw) ftml.startTestGroup('{} + all the others'.format( get_ucd(uid1, 'jg'))) c1 = chr(uid1) for uid2 in uids: c2 = chr(uid2) comment = builder.char(uid2).basename label = 'U+{:04X}'.format(uid2) for featlist in builder.permuteFeatures(uids=(uid1, uid2)): ftml.setFeatures(featlist) if get_ucd(uid2, 'jt') == 'D': ftml.addToTest(uid2, zwj + c1 + c2 + zwj, label, comment) ftml.addToTest(uid2, c1 + c2 + zwj) ftml.addToTest(uid2, zwj + c1 + c2, label, comment) ftml.addToTest(uid2, c1 + c2) ftml.clearFeatures() ftml.closeTest() else: # exhaustive test for kerning data extraction ftml.defaultRTL = True addMarks = "with marks" in test.lower() for uid1 in rehs: # (rehs[0],) for uid2 in uids: for featlist in builder.permuteFeatures(uids=(uid1, uid2)): ftml.setFeatures(featlist) builder.render([uid1, uid2], ftml, addBreaks=False, rtl=True, dualJoinMode=1) if addMarks: builder.render([uid1, uid2, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, uid2, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, uid2, mb, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, uid2, ma, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, ma, uid2], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, ma, uid2, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, ma, uid2, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, ma, uid2, mb, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, ma, uid2, ma, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, mb, uid2], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, mb, uid2, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, mb, uid2, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, mb, uid2, mb, ma], ftml, addBreaks=False, rtl=True, dualJoinMode=1) builder.render([uid1, mb, uid2, ma, mb], ftml, addBreaks=False, rtl=True, dualJoinMode=1) ftml.clearFeatures() ftml.closeTest() if test.lower().startswith('chadian'): rehs = '[' + ''.join( map( chr, filter(lambda uid: get_ucd(uid, 'jg') == 'Reh', builder.uids()))) + ']' uids = '[' + ''.join( map( chr, filter( lambda uid: get_ucd(uid, 'jt') in ('D', 'R') or uid == 0xFD3E, builder.uids()))) + ']' marks = '[' + ''.join( map( chr, filter(lambda uid: get_ucd(uid, 'gc').startswith('M'), builder.uids()))) + ']' rehwordsRE = re.compile(f'({rehs}{marks}{uids}{marks}*)') with open('/SRC/ABS Text Samples/Chad/Chadian Arabic AS word list.txt', encoding="utf8") as f: for line_no, line in enumerate(f): res = '' matches = '' lastEnd = 0 for m in rehwordsRE.finditer(line): if m.start() > 0: res += line[lastEnd:m.start()] # I wish I could output <em> around the kerned pair, something like: # res += f'<em>{m.group()}</em>' # but apparently ftml.py doesn't support this :-( # So just append res += m.group() # Keep track af all matched strings for feature permutations matches += m.group() lastEnd = m.end() if len(res) > 0: # Add tail to result res += line[lastEnd:] # figure features based only on what matched matchedUids = map(ord, list(matches)) for featlist in builder.permuteFeatures(uids=matchedUids): ftml.setFeatures(featlist) # Add to test: ftml.addToTest(None, res, f'line {line_no}') ftml.clearFeatures() ftml.closeTest() if test.lower().startswith('yehbar'): # Yehbarree tail interacting with diacs below previous char uids = sorted(filter(lambda uid: get_ucd(uid, 'jt') in ('D', ), builder.uids()), key=basenameSortKey) markbelow = r'\u064D' # kasratan markabove = r'\u06EC' # dotStopabove-ar zwj = r'\u200D' # Zero width joiner ftml.startTestGroup('U+06D2 yehbarree') yehbarree = r'\u06D2' for uid in uids: if uid < 32: continue c = r'\u{:04X}'.format(uid) label = 'U+{:04X}'.format(uid) comment = builder.char(uid).basename for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) ftml.addToTest( uid, f"{c}{markabove}{yehbarree} {zwj}{c}{markabove}{yehbarree} {c}{markbelow}{markabove}{yehbarree} {zwj}{c}{markbelow}{markabove}{yehbarree}", label, comment) ftml.closeTest() ftml.clearFeatures() # Also test other forms of yehbarree (yehbarreeHamzaabove-ar, yehbarreeTwoabove, yehbarreeThreeabove-ar) ftml.startTestGroup('yehbarree-like') for yehbarree in filter(lambda x: x in builder.uids(), (0x06D3, 0x077A, 0x077B)): for uid in filter(lambda x: x in builder.uids(), (0x06A0, 0x08B3)): c = r'\u{:04X}'.format(uid) yb = r'\u{:04X}'.format(yehbarree) label = 'U+{:04X} U+{:04X}'.format(uid, yehbarree) comment = builder.char(uid).basename + ' ' + builder.char( yehbarree).basename for featlist in builder.permuteFeatures(uids=(uid, )): ftml.setFeatures(featlist) ftml.addToTest( uid, f"{c}{markabove}{yb} {zwj}{c}{markabove}{yb} {c}{markbelow}{markabove}{yb} {zwj}{c}{markbelow}{markabove}{yb}", label, comment) ftml.closeTest() ftml.clearFeatures() if test.lower().startswith('classes'): zwj = chr(0x200D) lsb = '' # chr(0xF130) rsb = '' # chr(0xF131) glyphsSeen = set() uids = sorted( filter(lambda uid: builder.char(uid).general == 'Lo' and uid > 255, builder.uids())) uids = sorted(uids, key=joinGoupSortKey) for uid in uids: c = chr(uid) thischar = builder.char(uid) label = 'U+{:04X}'.format(uid) for featlist in builder.permuteFeatures(uids=(uid, )): gname = thischar.basename if len(featlist) == 1 and featlist[0] is not None: # See if we can find an alternate glyph name: feat = '{}={}'.format(featlist[0][0], featlist[0][1]) gname = thischar.altnames.get(feat, gname) if gname not in glyphsSeen: glyphsSeen.add(gname) comment = gname ftml.setFeatures(featlist) ftml.addToTest(uid, lsb + c + rsb, label, comment) #isolate if get_ucd(uid, 'jt') == 'D': ftml.addToTest(uid, lsb + c + zwj + rsb) # initial ftml.addToTest(uid, lsb + zwj + c + zwj + rsb) # medial if get_ucd(uid, 'jt') in ('R', 'D'): ftml.addToTest(uid, lsb + zwj + c + rsb) # final ftml.clearFeatures() ftml.closeTest() ftml.writeFile(args.output)
def joinGoupSortKey(uid: int): return joinGroupKeys.get(get_ucd(uid, 'jg'), 99) * 65536 + uid