def __init__(self, uid, basename, logger): self.logger = logger self.uid = uid self.basename = basename if Char.isdefined(uid): self.general = Char.charType(uid) self.cc = Char.getCombiningClass(uid) self.icuGC = Char.charType(uid) self.icuJT = Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE) self.icuJG = Char.getIntPropertyValue(uid, UProperty.JOINING_GROUP) else: self.logger.log('USV %04X not in ICU; no properties known' % uid, 'W') self.feats = set() # feat tags that affect this char self.langs = set() # lang tags that affect this char # Additional info from UFO: self.takesMarks = self.isMark = self.isBase = False
def is_exemplar_wordbreak(char): """True if the character has the Word_Break properties Katakana, ALetter, or MidLetter.""" # The following should be exposed by PyICU, but does not seem to be implemented. # There are other values, but these are the ones need for this function. WB_ALETTER = 1 WB_KATAKANA = 3 WB_MIDLETTER = 4 numeric_wordbreak_type = Char.getIntPropertyValue(char, UProperty.WORD_BREAK) if (numeric_wordbreak_type == WB_KATAKANA or numeric_wordbreak_type == WB_ALETTER or numeric_wordbreak_type == WB_MIDLETTER): return True return False
def is_exemplar_wordbreak(char): """True if the character has the Word_Break properties Katakana, ALetter, or MidLetter.""" # The following should be exposed by PyICU, but does not seem to be implemented. # There are other values, but these are the ones need for this function. WB_ALETTER = 1 WB_KATAKANA = 3 # WB_MIDLETTER = 4 numeric_wordbreak_type = Char.getIntPropertyValue(char, UProperty.WORD_BREAK) if (numeric_wordbreak_type == WB_KATAKANA or # numeric_wordbreak_type == WB_MIDLETTER or numeric_wordbreak_type == WB_ALETTER): return True return False
def render(self, uids, ftml, keyUID=0, addBreaks=True, rtl=None): """ general purpose (but not required) function to generate ftml for a character sequence """ if len(uids) == 0: return # Make a copy so we don't affect caller uids = list(uids) # Remember first uid and original length for later startUID = uids[0] uidLen = len(uids) # if keyUID wasn't supplied, use startUID if keyUID == 0: keyUID = startUID # Construct label from uids: label = '\n'.join(['U+{0:04X}'.format(u) for u in uids]) # Construct comment from glyph names: comment = ' '.join([self._charFromUID[u].basename for u in uids]) # see if uid list includes a mirrored char hasMirrored = bool(len([x for x in uids if Char.isMirrored(x)])) # Analyze first and last joining char joiningChars = [ x for x in uids if Char.getIntPropertyValue(x, UProperty.JOINING_TYPE) != TRANSPARENT ] if len(joiningChars): # If first or last non-TRANSPARENT char is a joining char, then we need to emit examples with zwj uid = joiningChars[0] zwjBefore = Char.getIntPropertyValue( uid, UProperty.JOINING_TYPE) == DUAL_JOINING or ( Char.charDirection(uid) == UCharDirection.LEFT_TO_RIGHT and Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE) == LEFT_JOINING) or ( Char.charDirection(uid) != UCharDirection.LEFT_TO_RIGHT and Char.getIntPropertyValue( uid, UProperty.JOINING_TYPE) == RIGHT_JOINING) uid = joiningChars[-1] zwjAfter = Char.getIntPropertyValue( uid, UProperty.JOINING_TYPE) == DUAL_JOINING or ( Char.charDirection(uid) == UCharDirection.LEFT_TO_RIGHT and Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE) == RIGHT_JOINING) or ( Char.charDirection(uid) != UCharDirection.LEFT_TO_RIGHT and Char.getIntPropertyValue( uid, UProperty.JOINING_TYPE) == LEFT_JOINING) else: zwjBefore = zwjAfter = False if Char.charType(startUID) == UCharCategory.NON_SPACING_MARK: # First char is a NSM... prefix a suitable base uids.insert(0, self.diacBase) zwjBefore = False # No longer any need to put zwj before elif Char.isUWhiteSpace(startUID): # First char is whitespace -- prefix with baseline brackets: uids.insert(0, 0xF130) lastNonMark = [ x for x in uids if Char.charType(x) != UCharCategory.NON_SPACING_MARK ][-1] if Char.isUWhiteSpace(lastNonMark): # Last non-mark is whitespace -- append baseline brackets: uids.append(0xF131) s = ''.join([chr(uid) for uid in uids]) if zwjBefore or zwjAfter: # Show contextual forms: t = u'{0} '.format(s) if zwjAfter: t += u'{0}\u200D '.format(s) if zwjBefore: t += u'\u200D{0}\u200D '.format(s) if zwjBefore: t += u'\u200D{0} '.format(s) if zwjBefore and zwjAfter: t += u'{0}{0}{0}'.format(s) if addBreaks: ftml.closeTest() ftml.addToTest(keyUID, t, label=label, comment=comment, rtl=rtl) if addBreaks: ftml.closeTest() elif hasMirrored and self.rtlEnable: # Contains mirrored and rtl enabled: if addBreaks: ftml.closeTest() ftml.addToTest( keyUID, u'{0} LTR: \u202A{0}\u202C RTL: \u202B{0}\u202C'.format(s), label=label, comment=comment, rtl=rtl) if addBreaks: ftml.closeTest() # elif is LRE, RLE, PDF # elif is LRI, RLI, FSI, PDI elif uidLen > 1: ftml.addToTest(keyUID, s, label=label, comment=comment, rtl=rtl) else: ftml.addToTest(keyUID, s, comment=comment, rtl=rtl)