Python get_ucd примеры использования

Язык программирования: Python

Пространство имен/Пакет: palaso.unicode.ucd

Метод/Функция: get_ucd

Примеров на hotexamples.com: 9

Python get_ucd - 9 примеров найдено. Это лучшие примеры Python кода для palaso.unicode.ucd.get_ucd, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

 def render(self, uids, ftml, keyUID=0, descUIDs=None):
     """ general purpose (but not required) function to generate ftml for a character sequence """
     if len(uids) == 0:
         return
     # Make a copy so we don't affect caller
     uids = list(uids)
     # Remember first uid and original length for later
     startUID = uids[0]
     uidLen = len(uids)
     # if keyUID wasn't supplied, use startUID
     if keyUID == 0: keyUID = startUID
     # Construct label from uids:
     if not descUIDs:
         descUIDs = uids
     label = '\n'.join(['U+{0:04X}'.format(u) for u in descUIDs])
     # Construct comment from glyph names:
     comment = ' '.join([self._charFromUID[u].basename for u in descUIDs])
     if get_ucd(startUID, 'gc') == 'Mn':
         # First char is a NSM... prefix a suitable base
         uids.insert(0, self.diacBase)
     elif get_ucd(startUID, 'WSpace'):
         # First char is whitespace -- prefix with baseline brackets:
         uids.insert(0, 0xF130)
     lastNonMark = [x for x in uids if get_ucd(x, 'gc') != 'Mn'][-1]
     if get_ucd(lastNonMark, 'WSpace'):
         # Last non-mark is whitespace -- append baseline brackets:
         uids.append(0xF131)
     s = ''.join([chr(uid) for uid in uids])
     if uidLen > 1:
         ftml.addToTest(keyUID, s, label=label, comment=comment)
     else:
         ftml.addToTest(
             keyUID, s,
             comment=comment)  # label will be set based on keyUID

Пример #2

Показать файл

 def __init__(self, uids, basename, logger):
     self.logger = logger
     self.uids = uids
     self.basename = basename
     # a couple of properties based on the first uid:
     try:
         self.general = get_ucd(uids[0],'gc')
     except KeyError:
         self.logger.log('USV %04X not defined; no properties known' % uids[0], 'W')
     self.feats = set()  # feat tags that affect this char
     self.langs = set()  # lang tags that affect this char
     self.altnames = {}  # alternate glyph names.

Пример #3

Показать файл

Файл: syllable.py Проект: silnrsi/palaso-python

def intersperse(main, *extras):
    """Takes a list of strings. Intersperse substrings from extras into the clusters of the string
        such that the substrings are ordered according to normalization rules.
        extras is list of tuples (str, combiningorder)"""
    def isbase(char):
        return get_ucd(char, 'gc').startswith("L")

    res = []
    extras = list(extras)
    #extras.sort(cmp=lambda a,b : cmp(a[1], b[1]))
    for m in main:
        groups = []
        base = ""
        for v in groupby(m, lambda x: get_ucd(x, 'gc')[0]):
            k = v[0]
            d = "".join(v[1])
            if k == "L":
                if base: groups.extend((base, ""))
                for c in d[:-1]:
                    groups.extend((c, ""))
                base = d[-1]
            elif k == "M":
                base = base + d
            else:
                groups.extend((base, d))
                base = ""
        if base: groups.extend((base, ""))
        # groups is now 2n list where list[n] is base+dias, list[n+1] is punc separators
        for i in range(0, len(groups), 2):
            dias = list(groups[i][1:])
            orders = [get_ucd(c, 'ccc') for c in dias]
            bases = list(zip(dias, orders))
            new = sorted(bases + extras, cmp=lambda a, b: cmp(a[1], b[1]))
            results = list(zip(*new))
            groups[i] = "".join([groups[i][0]] + list(results[0]))
        res.append("".join(groups))
    return res

Пример #4

Показать файл

 def __init__(self, uid, basename, logger):
     self.logger = logger
     self.uid = uid
     self.basename = basename
     try:
         self.general = get_ucd(uid,'gc')
     except KeyError:
         self.logger.log('USV %04X not defined; no properties known' % uid, 'W')
     self.feats = set()  # feat tags that affect this char
     self.langs = set()  # lang tags that affect this char
     self.aps = set()
     self.altnames = {}  # alternate glyph names.
         # the above is a dict keyed by either:
         #   lang tag e.g., 'ur', or
         #   feat tag and value, e.g., 'cv24=3'
         # and returns a the glyphname for that alternate.
     # Additional info from UFO:
     self.takesMarks = self.isMark = self.isBase = False

Пример #5

Показать файл

 def render(self, uids, ftml, keyUID = 0, addBreaks = True, rtl = None, dualJoinMode = 3, label = None, comment = None):
     """ general purpose (but not required) function to generate ftml for a character sequence """
     if len(uids) == 0:
         return
     # Make a copy so we don't affect caller
     uids = list(uids)
     # Remember first uid and original length for later
     startUID = uids[0]
     uidLen = len(uids)
     # if keyUID wasn't supplied, use startUID
     if keyUID == 0: keyUID = startUID
     if label is None:
         # Construct label from uids:
         label = '\n'.join(['U+{0:04X}'.format(u) for u in uids])
     if comment is None:
         # Construct comment from glyph names:
         comment = ' '.join([self._charFromUID[u].basename for u in uids])
     # see if uid list includes a mirrored char
     hasMirrored = bool(len([x for x in uids if get_ucd(x,'Bidi_M')]))
     # Analyze first and last joining char
     joiningChars = [x for x in uids if get_ucd(x, 'jt') != 'T']
     if len(joiningChars):
         # If first or last non-TRANSPARENT char is a joining char, then we need to emit examples with zwj
         # Assumes any non-TRANSPARENT char that is bc != L must be a rtl character of some sort
         uid = joiningChars[0]
         zwjBefore = (get_ucd(uid,'jt') == 'D'
                      or (get_ucd(uid,'bc') == 'L' and get_ucd(uid,'jt') == 'L')
                      or (get_ucd(uid,'bc') != 'L' and get_ucd(uid,'jt') == 'R'))
         uid = joiningChars[-1]
         zwjAfter = (get_ucd(uid,'jt') == 'D'
                      or (get_ucd(uid,'bc') == 'L' and get_ucd(uid,'jt') == 'R')
                      or (get_ucd(uid,'bc') != 'L' and get_ucd(uid,'jt') == 'L'))
     else:
         zwjBefore = zwjAfter = False
     if get_ucd(startUID,'gc') == 'Mn':
         # First char is a NSM... prefix a suitable base
         uids.insert(0, self.diacBase)
         zwjBefore = False   # No longer any need to put zwj before
     elif get_ucd(startUID, 'WSpace'):
         # First char is whitespace -- prefix with baseline brackets:
         uids.insert(0, 0xF130)
     lastNonMark = [x for x in uids if get_ucd(x,'gc') != 'Mn'][-1]
     if get_ucd(lastNonMark, 'WSpace'):
         # Last non-mark is whitespace -- append baseline brackets:
         uids.append(0xF131)
     s = ''.join([chr(uid) for uid in uids])
     if zwjBefore or zwjAfter:
         # Show contextual forms:
         # Start with isolate
         t = u'{0} '.format(s)
         if zwjBefore and zwjAfter:
             # For sequences that show dual-joining behavior, what we show depends on dualJoinMode:
             if dualJoinMode & 1:
                 # show initial, medial, final separated by space:
                 t += u'{0}\u200D \u200D{0}\u200D \u200D{0} '.format(s)
             if dualJoinMode & 2:
                 # show 3 joined forms in sequence:
                 t += u'{0}{0}{0} '.format(s)
         elif zwjAfter:
             t += u'{0}\u200D '.format(s)
         elif zwjBefore:
             t += u'\u200D{0} '.format(s)
         if addBreaks: ftml.closeTest()
         ftml.addToTest(keyUID, t, label = label, comment = comment, rtl = rtl)
         if addBreaks: ftml.closeTest()
     elif hasMirrored and self.rtlEnable:
         # Contains mirrored and rtl enabled:
         if addBreaks: ftml.closeTest()
         ftml.addToTest(keyUID, u'{0} LTR: \u202A{0}\u202C RTL: \u202B{0}\u202C'.format(s), label = label, comment = comment, rtl = rtl)
         if addBreaks: ftml.closeTest()
     # elif is LRE, RLE, PDF
     # elif is LRI, RLI, FSI, PDI
     elif uidLen > 1:
         ftml.addToTest(keyUID, s , label = label, comment = comment, rtl = rtl)
     else:
         ftml.addToTest(keyUID, s , comment = comment, rtl = rtl)

Пример #6

Показать файл

def doit(args):
    logger = args.logger

    # Read input csv
    builder = FB.FTMLBuilder(logger, incsv=args.input, fontcode=args.fontcode, font=args.ifont, ap=args.ap,
                             rtlenable=args.rtl, langs=args.langs)

    # Override default base (25CC) for displaying combining marks:
    builder.diacBase = 0x0B95   # ka

    # Specify blocks of primary and secondary scripts
    comb = range(0x0300, 0x036F+1)
    taml = range(0x0B80, 0x0BFF+1)
    deva = range(0x0900, 0x0097F+1)
    vedic = range(0x1CD0, 0x1CFF+1)
    gran = range(0x11300, 0x1137F+1)
    block = list(comb) + list(taml) + list(deva) + list(vedic) + list(gran)

    # Useful ranges of codepoints
    uids = sorted(builder.uids())
    vowels = [uid for uid in uids if get_ucd(uid, 'InSC') == 'Vowel_Independent']
    consonants = [uid for uid in uids if get_ucd(uid, 'InSC') == 'Consonant']
    matras = [uid for uid in uids if 'VOWEL SIGN' in get_ucd(uid, 'na')]
    digits = [uid for uid in uids if builder.char(uid).general == 'Nd' and uid in block]

    # Initialize FTML document:
    # Default name for test: AllChars or something based on the csvdata file:
    test = args.test or 'AllChars (NG)'
    widths = None
    if args.width:
        try:
            width, units = re.match(r'(\d+)(.*)$', args.width).groups()
            if len(args.fontsrc):
                width = int(round(int(width)/len(args.fontsrc)))
            widths = {'string': f'{width}{units}'}
            logger.log(f'width: {args.width} --> {widths["string"]}', 'I')
        except:
            logger.log(f'Unable to parse width argument "{args.width}"', 'W')
    # split labels from fontsource parameter
    fontsrc = []
    labels = []
    for sl in args.fontsrc:
        try:
            s, l = sl.split('=',1)
            fontsrc.append(s)
            labels.append(l)
        except ValueError:
            fontsrc.append(sl)
            labels.append(None)
    ftml = FB.FTML(test, logger, rendercheck=not args.norendercheck, fontscale=args.scale,
                   widths=widths, xslfn=args.xsl, fontsrc=fontsrc, fontlabel=labels, defaultrtl=args.rtl)

    if test.lower().startswith("allchars"):
        # all chars that should be in the font:
        ftml.startTestGroup('Encoded characters')
        for uid in uids:
            if uid < 32: continue
            c = builder.char(uid)
            # iterate over all permutations of feature settings that might affect this character:
            for featlist in builder.permuteFeatures(uids = (uid,)):
                ftml.setFeatures(featlist)
                builder.render((uid,), ftml)
                # Don't close test -- collect consecutive encoded chars in a single row
            ftml.clearFeatures()
            if len(c.langs):
                for langID in builder.allLangs:
                    ftml.setLang(langID)
                    builder.render((uid,), ftml)
                ftml.clearLang()

        # Add unencoded specials and ligatures -- i.e., things with a sequence of USVs in the glyph_data:
        ftml.startTestGroup('Specials & ligatures from glyph_data')
        for basename in builder.specials():
            special = builder.special(basename)
            # iterate over all permutations of feature settings that might affect this special
            for featlist in builder.permuteFeatures(uids = special.uids):
                ftml.setFeatures(featlist)
                builder.render(special.uids, ftml)
                # close test so each special is on its own row:
                ftml.closeTest()
            ftml.clearFeatures()
            if len(special.langs):
                for langID in builder.allLangs:
                    ftml.setLang(langID)
                    builder.render(special.uids, ftml)
                    ftml.closeTest()
                ftml.clearLang()

        # Characters used to create SILE test data
        ftml.startTestGroup('Proof')
        for uid in vowels:
            builder.render((uid,), ftml)
        ftml.closeTest()
        for uid in matras:
            builder.render((uid,), ftml)
        ftml.closeTest()
        for uid in consonants:
            builder.render((uid,), ftml)
        ftml.closeTest()
        for uid in digits:
            builder.render((uid,), ftml)
        ftml.closeTest()

    below_marks = (0x0323, 0x1133B, 0x1133C)  # 0x1CDC, 0x1CDD, 0x1CDE, 0x1CDF
    above_marks = (0x0307, 0x0B82, 0x0BCD)  # 0x1CDA
    marks = below_marks + above_marks

    if test.lower().startswith("diac"):
        # Diac attachment:

        # Representative base and diac chars:
        repDiac = list(filter(lambda x: x in builder.uids(), marks))
        repBase = list(filter(lambda x: x in builder.uids(), (0x0B95, 0x0B85)))

        ftml.startTestGroup('Representative diacritics on all bases that take diacritics')
        for uid in uids:
            # ignore bases outside of the primary script:
            if uid not in block: continue
            c = builder.char(uid)
            # Always process Lo, but others only if that take marks:
            if c.general == 'Lo' or c.isBase:
                for diac in repDiac:
                    for featlist in builder.permuteFeatures(uids = (uid,diac)):
                        ftml.setFeatures(featlist)
                        # Don't automatically separate connecting or mirrored forms into separate lines:
                        builder.render((uid,diac), ftml, addBreaks = False)
                    ftml.clearFeatures()
                ftml.closeTest()

        ftml.startTestGroup('All diacritics on representative bases')
        for uid in uids:
            # ignore bases outside of the primary and Latin scripts:
            if uid < 0x0300 or uid in range(0xFE00, 0xFE10): continue
            c = builder.char(uid)
            if c.general == 'Mn':
                for base in repBase:
                    for featlist in builder.permuteFeatures(uids = (uid,base)):
                        ftml.setFeatures(featlist)
                        builder.render((base,uid), ftml, keyUID = uid, addBreaks = False)
                    ftml.clearFeatures()
                ftml.closeTest()

    if test.lower().startswith("matras"):
        # Combinations with matras:

        ftml.startTestGroup('Consonants with matras')
        for c in consonants:
            for m in matras:
                builder.render((c,m), ftml, label=f'{c:04X}', comment=builder.char(c).basename)
            ftml.closeTest()

    if test.lower().startswith("nuktas"):
        # Nuktas:
        ftml.startTestGroup('Nuktas')
        test_name = test.lower().split()[0]
        with open(f'tests/{test_name}.template') as nuktas:
            line_number = 0
            for line in nuktas:
                line = line.strip()
                line_number += 1
                if line == '':
                    continue
                for n in below_marks:
                    for v in above_marks:
                        text = line.replace('N', chr(n))
                        text = text.replace('V', chr(v))
                        ftml.addToTest(None, text, label=f'line {line_number}', comment=f'n={n:04X} v={v:04X}')
                        ftml.closeTest()

    # Write the output ftml file
    ftml.writeFile(args.output)

Пример #7

Показать файл

Файл: syllable.py Проект: silnrsi/palaso-python

 def isbase(char):
     return get_ucd(char, 'gc').startswith("L")

Пример #8

Показать файл

def doit(args):
    logger = args.logger

    # Read input csv
    builder = FB.FTMLBuilder(logger,
                             incsv=args.input,
                             fontcode=args.fontcode,
                             font=args.ifont,
                             ap=args.ap,
                             rtlenable=True,
                             langs=args.langs)

    # Override default base (25CC) for displaying combining marks
    builder.diacBase = 0x0628  # beh

    def basenameSortKey(uid: int):
        return builder.char(uid).basename.lower()

    # Initialize FTML document:
    test = args.test or "AllChars (NG)"  # Default to AllChars
    ftml = FB.FTML(test,
                   logger,
                   rendercheck=not args.norendercheck,
                   fontscale=args.scale,
                   xslfn=args.xsl,
                   fontsrc=args.fontsrc,
                   defaultrtl=args.rtl)

    if test.lower().startswith("allchars"):
        # all chars that should be in the font:
        ftml.startTestGroup('Encoded characters')
        for uid in sorted(builder.uids()):
            if uid < 32: continue
            c = builder.char(uid)
            for featlist in builder.permuteFeatures(uids=(uid, )):
                ftml.setFeatures(featlist)
                builder.render((uid, ), ftml)
            ftml.clearFeatures()
            if len(c.langs):
                for langID in builder.allLangs:
                    ftml.setLang(langID)
                    builder.render((uid, ), ftml)
                ftml.clearLang()

        # Add specials and ligatures that were in the glyph_data:
        ftml.startTestGroup('Specials & ligatures from glyph_data')
        for basename in sorted(builder.specials()):
            special = builder.special(basename)
            for featlist in builder.permuteFeatures(uids=special.uids,
                                                    feats=special.feats):
                ftml.setFeatures(featlist)
                builder.render(special.uids, ftml)
                ftml.closeTest()
            ftml.clearFeatures()
            if len(special.langs):
                for langID in builder.allLangs:
                    ftml.setLang(langID)
                    builder.render(special.uids, ftml)
                    ftml.closeTest()
                ftml.clearLang()

        # Add Lam-Alef data manually
        ftml.startTestGroup('Lam-Alef')
        lamlist = list(
            filter(lambda x: x in builder.uids(),
                   (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6)))
        aleflist = list(
            filter(lambda x: x in builder.uids(),
                   (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673,
                    0x0675, 0x0773, 0x0774)))
        for lam in lamlist:
            for alef in aleflist:
                for featlist in builder.permuteFeatures(uids=(lam, alef)):
                    ftml.setFeatures(featlist)
                    builder.render((lam, alef), ftml)
                    ftml.closeTest()
                ftml.clearFeatures()
                if lam == 0x0644 and 'cv02' in builder.features:
                    # Also test lam with hamza above for warsh variants
                    for featlist in builder.permuteFeatures(uids=(lam, 0x0654,
                                                                  alef),
                                                            feats=('cv02', )):
                        ftml.setFeatures(featlist)
                        builder.render((lam, 0x0654, alef), ftml)
                        ftml.closeTest()
                    ftml.clearFeatures()

        # Add Allah data manually
        ftml.startTestGroup('Allah ligatures')
        ftml.addToTest(0xFDF2, r"\uFDF2", comment="Rule 1")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0641\u0644\u0644\u0647",
                       label="f-l-l-h",
                       comment="shouldn't match")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0651\u0670\u0647",
                       label="a-l-l-s-da-hf",
                       comment="Rule 2 (daggeralef)")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0670\u0651\u0647",
                       label="a-l-l-da-s-hf")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0651\u0670\u06C1",
                       label="a-l-l-s-da-hgf")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0670\u0651\u06C1",
                       label="a-l-l-da-s-hgf")
        ftml.closeTest()

        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0651\u064E\u0647",
                       label="a-l-l-s-f-hf",
                       comment="Rule 2 (fatha)")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u064E\u0651\u0647",
                       label="a-l-l-f-s-hf")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0651\u064E\u06C1",
                       label="a-l-l-s-f-hgf")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u064E\u0651\u06C1",
                       label="a-l-l-f-s-hgf")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u06EB\u0644\u064E\u0651\u06C1",
                       label="a-l-M-l-s-da-hgf",
                       comment="Rule 2c: shouldn't match")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0641\u0644\u0644\u064E\u0651\u06C1",
                       label="f-l-l-s-da-hgf",
                       comment="Rule 2d: non-alef")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0641\u0627\u0644\u0644\u064E\u0651\u06C1",
                       label="f-a-l-l-s-da-hgf",
                       comment="Rule 2d: not isolate alef")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u06EB\u0644\u0644\u064E\u0651\u06C1",
                       label="a-M-l-l-s-da-hgf",
                       comment="Rule 2d: Mark")
        ftml.closeTest()
        ftml.addToTest(None,
                       r" \u0644\u0644\u0651\u064E\u0647",
                       label="space-l-l-s-da-hf",
                       comment="Rule 2d: shouldn't match")
        ftml.closeTest()

        ftml.addToTest(None,
                       r"\u0627\u0644\u0644\u0647",
                       label="a-l-l-h",
                       comment="Rule 3")
        ftml.closeTest()
        ftml.addToTest(None, r"\u0622\u0644\u0644\u0647", label="aM-l-l-h")
        ftml.closeTest()
        ftml.addToTest(None, r"\u0623\u0644\u0644\u0647", label="aH-l-l-h")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0671\u0644\u0644\u0647",
                       label="aW-l-l-h",
                       comment="won't work")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u06EB\u0644\u0644\u0647",
                       label="a-M-l-l-h")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0641\u0627\u0644\u0644\u0647",
                       label="f-a-l-l-h",
                       comment="Rule 3a: shouldn't match")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u06EB\u0644\u0647",
                       label="a-l-M-l-h",
                       comment="Rule 3d: shouldn't match")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u200D\u0644\u0647",
                       label="a-l-zwj-l-h",
                       comment="Rule 4a: shouldn't match")
        ftml.closeTest()
        ftml.addToTest(None,
                       r"\u0627\u0644\u200D\u0644\u0651\u0670\u0647",
                       label="a-l-zwj-l-s-da-h",
                       comment="Rule 4a: shouldn't match")
        ftml.closeTest()

    if test.lower().startswith("al sorted"):
        # all AL chars, sorted by shape:
        ftml.startTestGroup('Arabic Letters')
        for uid in sorted(filter(lambda u: get_ucd(u, 'bc') == 'AL',
                                 builder.uids()),
                          key=joinGoupSortKey):
            c = builder.char(uid)
            for featlist in builder.permuteFeatures(uids=(uid, )):
                ftml.setFeatures(featlist)
                builder.render((uid, ), ftml)
            ftml.clearFeatures()
            if len(c.langs):
                for langID in builder.allLangs:
                    ftml.setLang(langID)
                    builder.render((uid, ), ftml)
                ftml.clearLang()

    if test.lower().startswith("diac"):
        # Diac attachment:

        doLongTest = 'short' not in test.lower()

        # Representative base and diac chars:
        if doLongTest:
            repDiac = list(
                filter(lambda x: x in builder.uids(),
                       (0x064E, 0x0650, 0x065E, 0x0670, 0x0616, 0x06E3, 0x08F0,
                        0x08F2)))
            repBase = list(
                filter(
                    lambda x: x in builder.uids(),
                    (0x0627, 0x0628, 0x062B, 0x0647, 0x064A, 0x77F, 0x08AC)))
            lamlist = list(
                filter(
                    lambda x: x in builder.uids(),
                    (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6)))
            aleflist = list(
                filter(lambda x: x in builder.uids(),
                       (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673,
                        0x0675, 0x0773, 0x0774)))
        else:
            repDiac = list(
                filter(lambda x: x in builder.uids(),
                       (0x064E, 0x0650, 0x0670)))
            repBase = list(
                filter(lambda x: x in builder.uids(), (0x0627, 0x0628)))
            lamlist = list(
                filter(
                    lambda x: x in builder.uids(),
                    (0x0644, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x076A, 0x08A6)))
            aleflist = list(
                filter(lambda x: x in builder.uids(),
                       (0x0627, 0x0622, 0x0623, 0x0625, 0x0671, 0x0672, 0x0673,
                        0x0675, 0x0773, 0x0774)))

        ftml.startTestGroup(
            'Representative diacritics on all bases that take diacritics')
        for uid in sorted(builder.uids()):
            if uid < 32 or uid in (0xAA, 0xBA): continue
            c = builder.char(uid)
            # Always process Lo, but others only if that take marks:
            if c.general == 'Lo' or c.isBase:
                for diac in repDiac:
                    for featlist in builder.permuteFeatures(uids=(uid, diac)):
                        ftml.setFeatures(featlist)
                        builder.render((uid, diac),
                                       ftml,
                                       addBreaks=False,
                                       dualJoinMode=2)
                        if doLongTest:
                            if diac != 0x0651:  # If not shadda
                                # include shadda, in either order:
                                builder.render((uid, diac, 0x0651),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                                builder.render((uid, 0x0651, diac),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                            if diac != 0x0654:  # If not hamza above
                                # include hamza above, in either order:
                                builder.render((uid, diac, 0x0654),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                                builder.render((uid, 0x0654, diac),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                    ftml.clearFeatures()
                ftml.closeTest()

        ftml.startTestGroup('All Arabic diacritics on representative bases')
        for uid in sorted(builder.uids()):
            # ignore non-ABS marks
            if uid < 0x600 or uid in range(0xFE00, 0xFE10): continue
            c = builder.char(uid)
            if c.general == 'Mn':
                for base in repBase:
                    for featlist in builder.permuteFeatures(uids=(uid, base)):
                        ftml.setFeatures(featlist)
                        builder.render((base, uid),
                                       ftml,
                                       keyUID=uid,
                                       addBreaks=False,
                                       dualJoinMode=2)
                        if doLongTest:
                            if uid != 0x0651:  # if not shadda
                                # include shadda, in either order:
                                builder.render((base, uid, 0x0651),
                                               ftml,
                                               keyUID=uid,
                                               addBreaks=False,
                                               dualJoinMode=2)
                                builder.render((base, 0x0651, uid),
                                               ftml,
                                               keyUID=uid,
                                               addBreaks=False,
                                               dualJoinMode=2)
                            if diac != 0x0670:  # If not superscript alef
                                # include superscript alef, in either order:
                                builder.render((uid, diac, 0x0670),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                                builder.render((uid, 0x0670, diac),
                                               ftml,
                                               addBreaks=False,
                                               dualJoinMode=2)
                    ftml.clearFeatures()
                ftml.closeTest()

        ftml.startTestGroup('Special cases')
        builder.render((0x064A, 0x064E), ftml)  # Yeh + Fatha should keep dots
        builder.render((0x064A, 0x0654), ftml)  # Yeh + Hamza should loose dots
        ftml.closeTest()

        ftml.startTestGroup('LamAlef ligatures')
        diaB = 0x064D
        diaA = 0x064B
        for lam in lamlist:
            for alef in aleflist:
                for featlist in builder.permuteFeatures(uids=(lam, alef)):
                    ftml.setFeatures(featlist)
                    builder.render((lam, alef), ftml, addBreaks=False)
                    builder.render((lam, diaA, alef, diaA),
                                   ftml,
                                   addBreaks=False)
                    builder.render((lam, diaB, alef), ftml, addBreaks=False)
                    builder.render((lam, alef, diaB), ftml, addBreaks=False)
                    builder.render((lam, diaB, alef, diaB),
                                   ftml,
                                   addBreaks=False)
                    ftml.clearFeatures()
                ftml.closeTest()

    if test.lower().startswith("subtending"):
        # Generates sample data for all subtending marks. Data includes sequences of 0 to n+1
        # digits, where n is the maximum expected to be supported on the mark. Latin, Arbic-Indic,
        # and Extended Arabic-Indic digits are included.
        for digitSample in filter(lambda x: x in builder.uids(),
                                  (0x0032, 0x0668, 0x06F8)):
            digitOne = (digitSample & 0xFFF0) + 1
            for uid, lgt in filter(lambda x: x[0] in builder.uids(),
                                   ([0x600, 3], [0x0601, 4], [0x0602, 2], [
                                       0x0603, 4
                                   ], [0x0604, 4], [0x0605, 4], [0x06DD, 3])):
                c = chr(uid)
                label = "U+{0:04X} {1}".format(
                    uid, 'latn' if digitOne == 0x0031 else
                    'arab' if digitOne == 0x0661 else 'urdu')
                comment = builder.char(uid).basename
                for featlist in builder.permuteFeatures(uids=(uid, )):
                    ftml.setFeatures(featlist)
                    ftml.addToTest(uid, "\u0628" + c + "\u0645", label,
                                   comment)
                    for ln in range(1, lgt + 1):
                        ftml.addToTest(uid, c + chr(digitSample) * ln)
                    ftml.addToTest(uid, c + chr(digitOne) + chr(digitOne + 1))
                ftml.clearFeatures()
                ftml.closeTest()

                if uid == 0x06DD and digitOne == 0x06F1:
                    # Extra items for Eastern digits
                    for featlist in builder.permuteFeatures(uids=(uid,
                                                                  0x06F7)):
                        ftml.setFeatures(featlist)
                        ftml.addToTest(uid, c + "\u06F4\u06F6\u06F7", label,
                                       "4 6 7")
                    ftml.clearFeatures()
                    for langID in builder.allLangs:
                        ftml.setLang(langID)
                        for featlist in ((None, ), (['cv80',
                                                     '1'], ), (['cv80',
                                                                '2'], )):
                            ftml.setFeatures(featlist)
                            ftml.addToTest(uid, c + "\u06F4\u06F6\u06F7",
                                           label, "4 6 7")
                        ftml.clearFeatures()
                    ftml.clearLang()
                    ftml.closeTest()

    if test.lower().startswith("showinv"):
        # Sample data for chars that have a "show invisible" feature
        # The 'r', 'a', 'ra' indicates whether this is standard in Roman fonts, Arabic fonts, or both.
        invlist = [(0x034F, 'r'), (0x061C, 'a'), (0x200B, 'r'), (0x200C, 'ra'),
                   (0x200D, 'ra'), (0x200E, 'ra'), (0x200F, 'ra'),
                   (0x202A, 'ra'), (0x202B, 'ra'), (0x202C, 'ra'),
                   (0x202D, 'ra'), (0x202E, 'ra'), (0x202E, 'r'),
                   (0x2060, 'r'), (0x2061, 'r'), (0x2062, 'r'), (0x2063, 'r'),
                   (0x2066, 'a'), (0x2067, 'a'), (0x2068, 'a'), (0x2069, 'a'),
                   (0xFE00, 'ra'), (0xFE01, 'ra'), (0xFE02, 'ra'),
                   (0xFE03, 'ra'), (0xFE04, 'ra'), (0xFE05, 'ra'),
                   (0xFE06, 'ra'), (0xFE07, 'ra'), (0xFE08, 'ra'),
                   (0xFE09, 'ra'), (0xFE0A, 'ra'), (0xFE0B, 'ra'),
                   (0xFE0C, 'ra'), (0xFE0D, 'ra'), (0xFE0E, 'ra'),
                   (0xFE0F, 'ra')]
        featlist = (('invs', '1'), ('ss06', '1'))
        ftml.setFeatures(featlist)
        for inv in invlist:
            uid = inv[0]
            c = chr(uid)
            label = 'U+{0:04X} ({1})'.format(uid, inv[1])
            comment = builder.char(
                uid).basename if uid in builder.uids() else ""
            ftml.addToTest(uid, " " + c + " ", label, comment)
            ftml.closeTest()
        ftml.clearFeatures()

    if test.lower().startswith('daggeralef'):
        for uid in sorted(builder.uids(), key=joinGoupSortKey):
            if get_ucd(uid, 'jg') not in ('Sad', 'Seen', 'Yeh'):
                # If not Yeh, Sad or seen joining group we're not interested
                continue
            for featlist in builder.permuteFeatures(uids=(uid, 0x0670)):
                ftml.setFeatures(featlist)
                builder.render((uid, 0x0670), ftml)
            ftml.clearFeatures()
            ftml.closeTest()

    if test.lower().startswith('kern'):
        rehs = sorted(
            filter(lambda uid: get_ucd(uid, 'jg') == 'Reh', builder.uids()))
        waws = sorted(
            filter(lambda uid: get_ucd(uid, 'jg') == 'Waw', builder.uids()))
        uids = sorted(filter(
            lambda uid: get_ucd(uid, 'jt') in ('D', 'R') or uid == 0xFD3E,
            builder.uids()),
                      key=joinGoupSortKey)
        # NB: I wondered about including punctuation, i.e.,  get_ucd(uid, 'gc').startswith('P'), but the default
        #     spacing is pretty good and graphite collision avoidance makes it worse, so the only one we need is FDFE

        dbehf = chr(0x066E) + chr(0x200D)  # dotless beh final
        alef = chr(0x0627)  # alef
        zwj = chr(0x200D)  # Zero width joiner
        ma = 0x064B  # Mark above (fathatan)
        mb = 0x064D  # chr(0x064D)     # Mark below (kasratan)

        if "data" not in test.lower():
            ftml.startTestGroup('All the rehs')
            for uid in rehs:
                c = chr(uid)
                label = 'U+{0:04X}'.format(uid)
                comment = builder.char(uid).basename
                for featlist in builder.permuteFeatures(uids=(uid, )):
                    ftml.setFeatures(featlist)
                    ftml.addToTest(uid, c + dbehf + ' ' + zwj + c + dbehf,
                                   label, comment)
                ftml.clearFeatures()
                ftml.closeTest()

            ftml.startTestGroup('All the waws')
            for uid in waws:
                c = chr(uid)
                label = 'U+{0:04X}'.format(uid)
                comment = builder.char(uid).basename
                for featlist in builder.permuteFeatures(uids=(uid, )):
                    ftml.setFeatures(featlist)
                    ftml.addToTest(uid, c + dbehf + ' ' + zwj + c + dbehf,
                                   label, comment)
                ftml.clearFeatures()
                ftml.closeTest()

            # reh or waw plus the others
            for uid1 in (0x631, 0x648):  # (reh, waw)
                ftml.startTestGroup('{} + all the others'.format(
                    get_ucd(uid1, 'jg')))
                c1 = chr(uid1)
                for uid2 in uids:
                    c2 = chr(uid2)
                    comment = builder.char(uid2).basename
                    label = 'U+{:04X}'.format(uid2)
                    for featlist in builder.permuteFeatures(uids=(uid1, uid2)):
                        ftml.setFeatures(featlist)
                        if get_ucd(uid2, 'jt') == 'D':
                            ftml.addToTest(uid2, zwj + c1 + c2 + zwj, label,
                                           comment)
                            ftml.addToTest(uid2, c1 + c2 + zwj)
                        ftml.addToTest(uid2, zwj + c1 + c2, label, comment)
                        ftml.addToTest(uid2, c1 + c2)
                    ftml.clearFeatures()
                    ftml.closeTest()

        else:
            # exhaustive test for kerning data extraction
            ftml.defaultRTL = True
            addMarks = "with marks" in test.lower()
            for uid1 in rehs:  # (rehs[0],)
                for uid2 in uids:
                    for featlist in builder.permuteFeatures(uids=(uid1, uid2)):
                        ftml.setFeatures(featlist)
                        builder.render([uid1, uid2],
                                       ftml,
                                       addBreaks=False,
                                       rtl=True,
                                       dualJoinMode=1)
                        if addMarks:
                            builder.render([uid1, uid2, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, uid2, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, uid2, mb, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, uid2, ma, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, ma, uid2],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, ma, uid2, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, ma, uid2, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, ma, uid2, mb, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, ma, uid2, ma, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, mb, uid2],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, mb, uid2, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, mb, uid2, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, mb, uid2, mb, ma],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                            builder.render([uid1, mb, uid2, ma, mb],
                                           ftml,
                                           addBreaks=False,
                                           rtl=True,
                                           dualJoinMode=1)
                    ftml.clearFeatures()
                    ftml.closeTest()

    if test.lower().startswith('chadian'):
        rehs = '[' + ''.join(
            map(
                chr,
                filter(lambda uid: get_ucd(uid, 'jg') == 'Reh',
                       builder.uids()))) + ']'
        uids = '[' + ''.join(
            map(
                chr,
                filter(
                    lambda uid: get_ucd(uid, 'jt') in
                    ('D', 'R') or uid == 0xFD3E, builder.uids()))) + ']'
        marks = '[' + ''.join(
            map(
                chr,
                filter(lambda uid: get_ucd(uid, 'gc').startswith('M'),
                       builder.uids()))) + ']'
        rehwordsRE = re.compile(f'({rehs}{marks}{uids}{marks}*)')
        with open('/SRC/ABS Text Samples/Chad/Chadian Arabic AS word list.txt',
                  encoding="utf8") as f:
            for line_no, line in enumerate(f):
                res = ''
                matches = ''
                lastEnd = 0
                for m in rehwordsRE.finditer(line):
                    if m.start() > 0:
                        res += line[lastEnd:m.start()]
                    # I wish I could output <em> around the kerned pair, something like:
                    #     res += f'<em>{m.group()}</em>'
                    # but apparently ftml.py doesn't support this :-(
                    # So just append
                    res += m.group()
                    # Keep track af all matched strings for feature permutations
                    matches += m.group()
                    lastEnd = m.end()
                if len(res) > 0:
                    # Add tail to result
                    res += line[lastEnd:]
                    # figure features based only on what matched
                    matchedUids = map(ord, list(matches))
                    for featlist in builder.permuteFeatures(uids=matchedUids):
                        ftml.setFeatures(featlist)
                        # Add to test:
                        ftml.addToTest(None, res, f'line {line_no}')
                        ftml.clearFeatures()
                        ftml.closeTest()

    if test.lower().startswith('yehbar'):
        # Yehbarree tail interacting with diacs below previous char
        uids = sorted(filter(lambda uid: get_ucd(uid, 'jt') in ('D', ),
                             builder.uids()),
                      key=basenameSortKey)
        markbelow = r'\u064D'  # kasratan
        markabove = r'\u06EC'  # dotStopabove-ar
        zwj = r'\u200D'  # Zero width joiner

        ftml.startTestGroup('U+06D2 yehbarree')
        yehbarree = r'\u06D2'
        for uid in uids:
            if uid < 32: continue
            c = r'\u{:04X}'.format(uid)
            label = 'U+{:04X}'.format(uid)
            comment = builder.char(uid).basename
            for featlist in builder.permuteFeatures(uids=(uid, )):
                ftml.setFeatures(featlist)
                ftml.addToTest(
                    uid,
                    f"{c}{markabove}{yehbarree} {zwj}{c}{markabove}{yehbarree} {c}{markbelow}{markabove}{yehbarree} {zwj}{c}{markbelow}{markabove}{yehbarree}",
                    label, comment)
                ftml.closeTest()
            ftml.clearFeatures()

        # Also test other forms of yehbarree (yehbarreeHamzaabove-ar, yehbarreeTwoabove, yehbarreeThreeabove-ar)
        ftml.startTestGroup('yehbarree-like')
        for yehbarree in filter(lambda x: x in builder.uids(),
                                (0x06D3, 0x077A, 0x077B)):
            for uid in filter(lambda x: x in builder.uids(), (0x06A0, 0x08B3)):
                c = r'\u{:04X}'.format(uid)
                yb = r'\u{:04X}'.format(yehbarree)
                label = 'U+{:04X} U+{:04X}'.format(uid, yehbarree)
                comment = builder.char(uid).basename + ' ' + builder.char(
                    yehbarree).basename
                for featlist in builder.permuteFeatures(uids=(uid, )):
                    ftml.setFeatures(featlist)
                    ftml.addToTest(
                        uid,
                        f"{c}{markabove}{yb} {zwj}{c}{markabove}{yb} {c}{markbelow}{markabove}{yb} {zwj}{c}{markbelow}{markabove}{yb}",
                        label, comment)
                    ftml.closeTest()
                ftml.clearFeatures()

    if test.lower().startswith('classes'):
        zwj = chr(0x200D)
        lsb = ''  # chr(0xF130)
        rsb = ''  # chr(0xF131)

        glyphsSeen = set()

        uids = sorted(
            filter(lambda uid: builder.char(uid).general == 'Lo' and uid > 255,
                   builder.uids()))
        uids = sorted(uids, key=joinGoupSortKey)
        for uid in uids:
            c = chr(uid)
            thischar = builder.char(uid)
            label = 'U+{:04X}'.format(uid)
            for featlist in builder.permuteFeatures(uids=(uid, )):
                gname = thischar.basename
                if len(featlist) == 1 and featlist[0] is not None:
                    # See if we can find an alternate glyph name:
                    feat = '{}={}'.format(featlist[0][0], featlist[0][1])
                    gname = thischar.altnames.get(feat, gname)
                if gname not in glyphsSeen:
                    glyphsSeen.add(gname)
                    comment = gname
                    ftml.setFeatures(featlist)
                    ftml.addToTest(uid, lsb + c + rsb, label,
                                   comment)  #isolate
                    if get_ucd(uid, 'jt') == 'D':
                        ftml.addToTest(uid, lsb + c + zwj + rsb)  # initial
                        ftml.addToTest(uid,
                                       lsb + zwj + c + zwj + rsb)  # medial
                    if get_ucd(uid, 'jt') in ('R', 'D'):
                        ftml.addToTest(uid, lsb + zwj + c + rsb)  # final
            ftml.clearFeatures()
            ftml.closeTest()

    ftml.writeFile(args.output)

Пример #9

Показать файл

def joinGoupSortKey(uid: int):
    return joinGroupKeys.get(get_ucd(uid, 'jg'), 99) * 65536 + uid