示例#1
0
    def init_cangjie(self):
        version = self.settings.get_int("version")
        version = getattr(cangjie.versions, "CANGJIE%d"%version)

        filters = (cangjie.filters.BIG5 | cangjie.filters.HKSCS
                                        | cangjie.filters.PUNCTUATION)

        if self.settings.get_boolean("include-allzh"):
            filters |= cangjie.filters.CHINESE
        if self.settings.get_boolean("include-jp"):
            filters |= cangjie.filters.KANJI
            filters |= cangjie.filters.HIRAGANA
            filters |= cangjie.filters.KATAKANA
        if self.settings.get_boolean("include-zhuyin"):
            filters |= cangjie.filters.ZHUYIN
        if self.settings.get_boolean("include-symbols"):
            filters |= cangjie.filters.SYMBOLS

        self.cangjie = cangjie.Cangjie(version, filters)
import cangjie
import numpy as np

"""
reverse character look up. aka look up the Cangjie code from a character.
This will take 3-10 minutes.
Using Debian's pycangjie library:
https://salsa.debian.org/input-method-team/pycangjie
"""

# and I OOP (Object-oriented programming)
cj = cangjie.Cangjie(cangjie.versions.CANGJIE5, cangjie.filters.CHINESE)

lookup_list=list(range(97, 123))
# the look up list is the list of ascii codes to convert to letters so the letter string can be used to look up a character.
# this is responsible for a single letter.

combine_list=[0]
# these are indicies of lookup_list. The combine list creates the look up string by converting individual numbers from
# lookup list to letters. The resulting look up string is used to find the characters.


##
# ooooh boy this is gonna be fun
# chinese characters unicode range: 19968 -> 195103. Cangjie may not cover the entire range, but better be sure.
# using numpy to save space
lookup_table=np.empty_like(['abcde'], dtype="<U5", shape=(175135,))
lookup_freqs=np.empty_like(['abcde'], dtype=int, shape=(175135,))
##

character_count=0
示例#3
0
 def setUp(self):
     self.cj = cangjie.Cangjie(self.version, self.language)