Exemplo n.º 1
0
    def normalize_unicode_buffer(self):
        unicodes = [item.codepoint for item in self.buffer.items]
        newunicodes = []
        for cp in unicodes:
            newunicodes.extend([ord(x) for x in unicodedata.normalize("NFD", chr(cp))])
        # Now recompose
        newstring = ""
        ix = 0
        while ix < len(newunicodes):
            a = newunicodes[ix]
            if ix + 1 == len(newunicodes):
                newstring = newstring + chr(a)
                break

            b = newunicodes[ix + 1]
            s = chr(a) + chr(b)
            composed = unicodedata.normalize("NFC", s)
            if ucd_data(a)["General_Category"][0] == "M":
                newstring = newstring + chr(a)
                ix = ix + 1
                continue
            elif composed != unicodedata.normalize("NFD", s):
                assert len(s) == 1
                newunicodes[ix] = ord(x)
                del newunicodes[ix + 1]
                continue
            else:
                newstring = newstring + chr(a)
                ix = ix + 1

        self.buffer.store_unicode(newstring)
Exemplo n.º 2
0
 def substitute_default(self):
     super().substitute_default()
     state = 0
     prev_item = None
     for item in self.buffer.items:
         item.arabic_joining = "NONE"
         ucd = ucd_data(item.codepoint)
         joining = ucd.get("Joining_Type")
         if not joining:
             if ucd.get("General_Category") in ["Mn", "Cf", "Em"]:
                 joining = "T"
             else:
                 joining = "U"
         if joining == "T": continue
         if joining == "C": joining = "D"  # Mongolian
         if ucd.get("Joining_Group") == "ALAPH": joining = "ALAPH"
         if ucd.get("Joining_Group") == "DALATH RISH":
             joining = "DALATH_RISH"
         prev, this, state = state_table[state][jts[joining]]
         if prev_item:
             prev_item.arabic_joining = prev
         item.arabic_joining = this
         prev_item = item
     if self.buffer.script == "Mongolian":
         self.mongolian_variation_selectors()
     self.plan.msg("Assigned Arabic joining",
                   self.buffer,
                   serialize_options=["arabic_joining"])
     for f in arabic_features:
         if f not in self.plan.fontfeatures.features:
             continue
         for item in self.buffer.items:
             item.feature_masks[f] = item.arabic_joining != f
Exemplo n.º 3
0
def set_matra_position(item):
    script = ucd_data(item.codepoint)["Script"]
    u = item.codepoint
    if item.syllabic_position == IndicPosition.PRE_C:
        selector = matra_pos_left
    elif item.syllabic_position == IndicPosition.POST_C:
        selector = matra_pos_right
        if script == "Telugu":
            if u <= 0x0C42:
                item.syllabic_position = IndicPosition.BEFORE_SUB
            else:
                item.syllabic_position = IndicPosition.AFTER_SUB
            return
        if script == "Kannada":
            if u < 0x0CC3 or u > 0xCD6:
                item.syllabic_position = IndicPosition.BEFORE_SUB
            else:
                item.syllabic_position = IndicPosition.AFTER_SUB
            return
    elif item.syllabic_position == IndicPosition.ABOVE_C:
        selector = matra_pos_top
    elif item.syllabic_position == IndicPosition.BELOW_C:
        selector = matra_pos_bottom
    else:
        return
    item.syllabic_position = selector.get(script, selector["Default"])
Exemplo n.º 4
0
 def assign_category(self, item):
     # Base behavior is Indic
     ucd = ucd_data(item.codepoint)
     item.syllabic_category = syllabic_category_map.get(
         ucd.get("Indic_Syllabic_Category", "Other"), "X")
     item.positional_category = ucd.get("Indic_Positional_Category", "x")
     item.syllabic_position = IndicPositionalCategory2IndicPosition(
         item.positional_category)
     self.reassign_category(item)
Exemplo n.º 5
0
 def guess_segment_properties(self):
     for u in self.items:
         # Guess segment properties
         if not self.script:
             thisScript = ucd_data(u.codepoint)["Script"]
             if thisScript not in ["Common", "Unknown", "Inherited"]:
                 self.script = thisScript
     if not self.direction:
         from fontFeatures.shaperLib.Shaper import _script_direction
         self.direction = _script_direction(self.script)
Exemplo n.º 6
0
    def normalize_unicode_buffer(self):
        unistring = "".join([chr(item.codepoint) for item in self.buffer.items])
        self.buffer.store_unicode(unicodedata.normalize("NFC", unistring))

        # Some fix-ups from hb-ot-shape-normalize
        for item in self.buffer.items:
            if ucd_data(item.codepoint)[
                "General_Category"
            ] == "Zs" and self.font.glyphForCodepoint(0x20, False):
                item.codepoint = 0x20
                # Harfbuzz adjusts the width here, in _hb_ot_shape_fallback_spaces
            if item.codepoint == 0x2011 and self.font.glyphForCodepoint(0x2010, False):
                item.codepoint = 0x2010
Exemplo n.º 7
0
 def normalize_to_glyphs(self, font):
     cmap = font.mapping
     self.info = [
         BufferInfo({
             "position": i,
             "original": x,
             "unicode_props": 0,
             "ucd_data": youseedee.ucd_data(ord(x)),
             "codepoint": ord(x),
             "glyph": cmap[ord(x)],
             "isMark": False,
             "mask": 0,
         }) for i, x in enumerate(self.characters)
     ]
Exemplo n.º 8
0
 def _fallback_categorize(self):
     if not self.codepoint:
         # Now what?
         self.category = ("unknown", None)
         return
     genCat = ucd_data(self.codepoint).get("General_Category", "L")
     if genCat[0] == "M":
         self.category = ("mark", None)
     elif genCat == "Ll":
         self.category = ("ligature", None)
     elif genCat[0] == "L":
         self.category = ("base", None)
     else:
         self.category = ("unknown", None)
Exemplo n.º 9
0
    def normalize_unicode_buffer(self):
        unicodes = [item.codepoint for item in self.buffer.items]
        newunicodes = []
        for cp in unicodes:
            if cp in [0x0931, 0x09DC, 0x09DD, 0x0B94]:
                newunicodes.append(cp)
            elif cp in [0x0DDA, 0x0DDC, 0x0DDD,
                        0x0DDE]:  # Sinhala split matras
                glyph = BufferItem.new_unicode(cp)
                glyph.map_to_glyph(self.buffer.font)
                if self.would_substitute("pstf", [glyph]):
                    newunicodes.extend([0x0DD9, cp])
                else:
                    newunicodes.append(cp)
            else:
                newunicodes.extend(
                    [ord(x) for x in unicodedata.normalize("NFD", chr(cp))])
        # Now recompose
        newstring = ""
        ix = 0
        while ix < len(newunicodes):
            a = newunicodes[ix]
            if ix + 1 == len(newunicodes):
                newstring = newstring + chr(a)
                break

            b = newunicodes[ix + 1]
            s = chr(a) + chr(b)
            composed = unicodedata.normalize("NFC", s)
            if ucd_data(a)["General_Category"][0] == "M":
                newstring = newstring + chr(a)
                ix = ix + 1
                continue
            elif a == 0x9af and b == 0x9bc:
                newstring = newstring + chr(0x9df)
                ix = ix + 2
                continue
            # elif composed != unicodedata.normalize("NFD", s):
            #     assert(len(s) == 1)
            #     newunicodes[ix] = ord(x)
            #     del newunicodes[ix+1]
            #     continue
            else:
                newstring = newstring + chr(a)
                ix = ix + 1

        self.buffer.store_unicode(newstring)
Exemplo n.º 10
0
 def assign_category(self, item):
     item.syllabic_category = ucd_data(item.codepoint).get("USE_Category", "X")
     # Separate positional categories are not used, it's all in the syllabic_category
     item.positional_category = "x"
Exemplo n.º 11
0
    def final_reordering_syllable(self, start, end):
        def cat(i):
            return self.buffer.items[i].syllabic_category

        def pos(i):
            return self.buffer.items[i].syllabic_position

        def swap(a, b):
            self.buffer.items[b], self.buffer.items[a] = self.buffer.items[
                a], self.buffer.items[b]

        def is_joiner(n):
            return cat(n) == "ZWJ" or cat(n) == "ZWNJ"

        def is_halant(n):
            return cat(n) == "H"

        def is_consonant(n):
            isc = cat(n)
            is_medial = isc == "CM"
            return isc in [
                "C", "CS", "Ra", "V", "PLACEHOLDER", "DOTTEDCIRCLE"
            ] or is_medial

        virama = self.config["virama"]
        virama_item = BufferItem.new_unicode(virama)
        virama_item.map_to_glyph(self.buffer.font)
        if virama_item.glyph != ".notdef":
            for i in range(start, end):
                if self.buffer.items[i].glyph == virama_item.glyph \
                    and self.buffer.items[i].ligated \
                    and self.buffer.items[i].multiplied:
                    self.buffer.items[i].syllabic_category = "H"
                    self.buffer.items[i].ligated = False
                    self.buffer.items[i].multiplied = False
        try_pref = any([
            "pref" in item.feature_masks
            and item.feature_masks["pref"] == False
            for item in self.buffer.items
        ])
        base = start
        while base < end:
            if pos(base) >= IndicPosition.BASE_C:
                if try_pref and base + 1 < end:
                    for i in range(base + 1, end):
                        item = self.buffer.items[i]
                        if not item.feature_masks.get("pref", True):
                            if not (item.substituted and
                                    (item.ligated and not item.multiplied)):
                                base = i
                                while base < end and is_halant(base):
                                    base = base + 1
                                self.buffer.items[
                                    base].syllabic_positional_category = IndicPosition.BASE_C
                                try_pref = false
                            break
                if self.buffer.script == "Malayalam":
                    i = base + 1
                    while i < end:
                        while i < end and is_joiner(i):
                            i = i + 1
                        if i == end or not is_halant(i):
                            break
                        i = i + 1
                        while i < end and is_joiner(i):
                            i = i + 1
                        if i < end and is_consonant(i) and pos(
                                i) == IndicPosition.BELOW_C:
                            base = i
                            self.buffer.items[
                                base].syllabic_positional_category = IndicPosition.BASE_C
                        i = i + 1
                if start < base and pos(base) > IndicPosition.BASE_C:
                    base = base - 1
                break
            base = base + 1
        if base == end and start < base and cat(base - i) == "ZWJ":
            base = base - 1
        if base < end:
            while start < base and cat(base) in ["N", "H"]:
                base = base - 1

        # Reorder matras
        if start + 1 < end and start < base:
            new_pos = base - 1
            if base == end:
                new_pos = base - 2
            # XXX

        for i in range(start, end):
            self.buffer.items[i].feature_masks["init"] = True
        if pos(start) == IndicPosition.PRE_M:
            reverse_map = {
                k: v
                for k, v in self.buffer.font.unicode_map.items()
            }
            if start == 0 or ucd_data(
                    reverse_map.get(self.buffer.items[start - 1].glyph,
                                    0))["General_Category"] not in [
                                        "Cf", "Cn", "Co", "Cs", "Ll", "Lm",
                                        "Lo", "Lt", "Lu", "Mc", "Me", "Mn"
                                    ]:
                self.buffer.items[start].feature_masks["init"] = False
Exemplo n.º 12
0
from youseedee import ucd_data
from pprint import pprint
import sys

char = sys.argv[1]
if len(char) > 1:
  pprint(ucd_data(int(char,16)))
else:
  pprint(ucd_data(ord(char)))