def sortingvalue(self): """ DStringANG.sortingvalue Return a SortingValue object """ res = SortingValue() # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringANG' has no 'options' member" if self.options["sorting method"] == "default": # base character : data = [] for char in self: sorting_order = -1 if char.base_char in SORTING_ORDER: sorting_order = SORTING_ORDER[char.base_char] data.append( ({False:0, True:1}[char.unknown_char], sorting_order )) res.append( data ) # makron : data = [] for char in self: data.append( { False : 0, True : 1,}[char.makron] ) res.append(data) else: # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringANG' has no 'options' member" err_msg = "unknown sorting method '{0}'." raise DCharsError( context = "DStringANG.sortingvalue", message = err_msg.format(self.options["sorting method"]) ) return res
def sortingvalue(self): """ DStringLAT.sortingvalue Return a SortingValue object """ res = SortingValue() # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringLAT' has no 'options' member" if self.options["sorting method"] == "default": # base character : data = [] for char in self: data.append( ({False:0, True:1}[char.unknown_char], ord(char.base_char) )) res.append(data) # length : data = [] for char in self: data.append( { None : 0, "short" : 1, "long" : 2, }[char.length]) res.append(data) else: # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringLAT' has no 'options' member" err_msg = "unknown sorting method '{0}'." raise DCharsError( context = "DStringLAT.sortingvalue", message = err_msg.format(self.options["sorting method"]) ) return res
def sortingvalue(self): """ DCharacterFRO.sortingvalue Return an SortingValue object NB : this function has almost no interest; you must use DStringFRO.sortingvalue() to compare two strings. Use this function if you just want to compare two characters. """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) return res # known char : res.append(0) # # capital_letter : # if res.capital_letter: # res.append( 0 ) # else: # res.append( 1 ) # base_char : if self.base_char not in SORTING_ORDER: res.append(-1) else: res.append(SORTING_ORDER[self.base_char]) else: raise DCharsError( context="DCharacterFRO.sortingvalue", message="unknown sorting method =" + str(self.dstring_object.options["sorting method"]), ) return res
def sortingvalue(self): """ DCharacterLAT.sortingvalue Return an SortingValue object NB : this function has almost no interest; you must use DStringLAT.sortingvalue() to compare two strings. Use this function if you just want to compare two characters. """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) return res # known char : res.append(0) # # capital_letter : # if res.capital_letter: # res.append( 0 ) # else: # res.append( 1 ) # base_char : res.append( ord(self.base_char) ) # length : if self.length is None: res.append( 0 ) elif self.length == "short": res.append( 0 ) elif self.length == "long": res.append( 1 ) else: raise DCharsError( context = "DCharacterLAT.sortingvalue", message = "unknown value for length ="+\ str(self.length) ) # # stress : # if not res.stress: # self.append( 0 ) # else: # self.append( 1 ) # # diaeresis : # if not res.diaeresis: # self.append( 0 ) # else: # self.append( 1 ) else: raise DCharsError( context = "DCharacterLAT.sortingvalue", message = "unknown sorting method ="+\ str(self.dstring_object.options["sorting method"]) ) return res
def sortingvalue(self): """ DStringJPN.sortingvalue sorting methods : o "default" : cf Kanji & Kana, Hadamitzky and Spahn, p. 22 Return a SortingValue object """ res = SortingValue() # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringJPN' has no 'options' member" if self.options["sorting method"] == "default": # base character : data = [] previous_char = None for index, char in enumerate(self): if char.unknown_char: data.append( (1, 999) ) elif char.chartype == 'choonpu': # we treat the choonpu symbol as if it was the last vowel # cf Kanji & Kana, Hadamitzky and Spahn, p. 22 if index == 0: # problem : no preceding vowel data.append( (0, 0 )) else: vowel = VOWEL_IN_HIRAGANA[previous_char.base_char] if vowel is not None: # normal case : if char == 'か', vowel = 'あ' data.append( (0, HIRAGANA_ORDER[vowel] )) else: # abnormal case : if char == 'ん', there's no vowel... # so ... we take the order of ん. data.append( (0, HIRAGANA_ORDER['ん'] )) elif char.chartype in ('hiragana', 'katakana'): data.append( (0, HIRAGANA_ORDER[char.base_char] )) else: # other cases : kanji or unknown symbol. data.append( (0, ord(char.base_char) )) previous_char = char res.append(data) # small size : data = [] for char in self: data.append( { False:0, True:1, }[char.smallsize]) res.append(data) # diacritic : data = [] for char in self: data.append( { None:0, "dakuten":1, "handakuten":2 }[char.diacritic] ) res.append(data) # hiragana < katakana: data = [] for char in self: if char.chartype == 'hiragana': data.append( 0 ) elif char.chartype == 'choonpu': # we treat the choonpu symbol as a katakana # cf Kanji & Kana, Hadamitzky and Spahn, p. 22 data.append( 1 ) elif char.chartype == 'katakana': data.append( 1 ) else: data.append( 2 ) res.append(data) else: # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringJPN' has no 'options' member" err_msg = "unknown sorting method '{0}'." raise DCharsError( context = "DStringJPN.sortingvalue", message = err_msg.format(self.options["sorting method"]) ) return res
def sortingvalue(self): """ DCharacterSAN.sortingvalue Return an SortingValue object Output : a list of number : [(un)known_char, base_char, nukta, virama/dependent vowel, anusvara/candrabindu, anudatta, accent] !!! BEWARE : the number #4 (anusvara/candrabindu) will be equal to 999 if !!! this character is followed by a visarga : see DStringSAN.sortingvalue() !!! BEWARE : if you modify this function, don't forget to change the !!! attribute DCharacterSAN.sortingvalue_for_visarga """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) return res # known char : res.append(0) # base_char : if self.base_char == "SHORT A": res.append(0) elif self.base_char == "A": res.append(1) elif self.base_char == "AA": res.append(2) elif self.base_char == "I": res.append(3) elif self.base_char == "II": res.append(4) elif self.base_char == "U": res.append(5) elif self.base_char == "UU": res.append(6) elif self.base_char == "VOCALIC R": res.append(7) elif self.base_char == "VOCALIC RR": res.append(8) elif self.base_char == "VOCALIC L": res.append(9) elif self.base_char == "VOCALIC LL": res.append(10) elif self.base_char == "SHORT E": res.append(11) elif self.base_char == "E": res.append(12) elif self.base_char == "AI": res.append(13) elif self.base_char == "SHORT O": res.append(14) elif self.base_char == "O": res.append(15) elif self.base_char == "AU": res.append(16) elif self.base_char == "DEVANAGARI SIGN VISARGA": res.append(100) elif self.base_char == "KA": res.append(101) elif self.base_char == "KHA": res.append(102) elif self.base_char == "GA": res.append(103) elif self.base_char == "GHA": res.append(104) elif self.base_char == "NGA": res.append(105) elif self.base_char == "CA": res.append(106) elif self.base_char == "CHA": res.append(107) elif self.base_char == "JA": res.append(108) elif self.base_char == "JHA": res.append(109) elif self.base_char == "NYA": res.append(110) elif self.base_char == "TTA": res.append(111) elif self.base_char == "TTHA": res.append(112) elif self.base_char == "DDA": res.append(113) elif self.base_char == "DDHA": res.append(114) elif self.base_char == "NNA": res.append(115) elif self.base_char == "TA": res.append(116) elif self.base_char == "THA": res.append(117) elif self.base_char == "DA": res.append(118) elif self.base_char == "DHA": res.append(119) elif self.base_char == "NA": res.append(120) elif self.base_char == "PA": res.append(121) elif self.base_char == "PHA": res.append(122) elif self.base_char == "BA": res.append(123) elif self.base_char == "BHA": res.append(124) elif self.base_char == "MA": res.append(125) elif self.base_char == "YA": res.append(126) elif self.base_char == "RA": res.append(127) elif self.base_char == "LA": res.append(128) elif self.base_char == "LLA": res.append(129) elif self.base_char == "VA": res.append(130) elif self.base_char == "SHA": res.append(131) elif self.base_char == "SSA": res.append(132) elif self.base_char == "SA": res.append(133) elif self.base_char == "HA": res.append(134) else: # E.g. "1", ... base_char_num = 0 for index_char, char in enumerate(self.base_char): base_char_num += ord(char) << index_char res.append(1000 + base_char_num) # nukta : if not self.nukta: res.append(0) else: res.append(1) # dependent vowel : # virama : if self.virama: res.append(0) else: if self.dependentvowel is None: res.append(1) elif self.dependentvowel == "A": res.append(2) elif self.dependentvowel == "AA": res.append(3) elif self.dependentvowel == "I": res.append(4) elif self.dependentvowel == "II": res.append(5) elif self.dependentvowel == "U": res.append(6) elif self.dependentvowel == "UU": res.append(7) elif self.dependentvowel == "VOCALIC R": res.append(8) elif self.dependentvowel == "VOCALIC RR": res.append(9) elif self.dependentvowel == "VOCALIC L": res.append(10) elif self.dependentvowel == "VOCALIC LL": res.append(11) elif self.dependentvowel == "CANDRA E": res.append(12) elif self.dependentvowel == "SHORT E": res.append(13) elif self.dependentvowel == "E": res.append(14) elif self.dependentvowel == "AI": res.append(15) elif self.dependentvowel == "CANDRA O": res.append(16) elif self.dependentvowel == "SHORT O": res.append(17) elif self.dependentvowel == "O": res.append(18) elif self.dependentvowel == "AU": res.append(19) else: raise DCharsError( context="DCharacterSAN.sortingvalue", message="unknown value for dependentvowel =" + str(self.dependentvowel), ) # anusvara / candrabindu : if self.anusvara_candrabindu is None: res.append(0) elif self.anusvara_candrabindu == "DEVANAGARI SIGN ANUSVARA": res.append(1) elif self.anusvara_candrabindu == "DEVANAGARI SIGN CANDRABINDU": res.append(2) elif self.anusvara_candrabindu == "DEVANAGARI SIGN INVERTED CANDRABINDU": res.append(3) else: raise DCharsError( context="DCharacterSAN.sortingvalue", message="unknown value for anusvara_candrabindu =" + str(self.anusvara_candrabindu), ) # anudatta : if not self.anudatta: res.append(0) else: res.append(1) # accent : if self.accent is None: res.append(0) elif self.accent == "DEVANAGARI STRESS SIGN UDATTA": res.append(1) elif self.accent == "DEVANAGARI GRAVE ACCENT": res.append(2) elif self.accent == "DEVANAGARI ACUTE ACCENT": res.append(3) else: raise DCharsError( context="DCharacterSAN.sortingvalue", message="unknown value for accent =" + str(self.accent) ) else: raise DCharsError( context="DCharacterSAN.sortingvalue", message="unknown sorting method =" + str(self.dstring_object.options["sorting method"]), ) return res
def sortingvalue(self): """ DCharacterJPN.sortingvalue Return a SortingValue object NB : this function has almost no interest; you must use DStringJPN.sortingvalue() to compare two strings. Use this function if you just want to compare two characters. """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) res.append( ord(self.base_char) ) return res # known char : res.append(0) # base_char : res.append( HIRAGANA_ORDER[self.base_char] ) # small size ? res.append( {True:0, False:1}[self.smallsize] ) # hiragana < katakana : if self.chartype == 'hiragana': res.append(0) elif self.chartype == 'katakana': res.append(1) else: res.append(2) else: raise DCharsError( context = "DCharacterJPN.sortingvalue", message = "unknown sorting method ="+\ str(self.dstring_object.options["sorting method"]) ) return res
def sortingvalue(self): """ DStringGRC.sortingvalue Return a SortingValue object """ res = SortingValue() # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringGRC' has no 'options' member" if self.options["sorting method"] == "default": # base character : data = [] for char in self: data.append( ({False:0, True:1}[char.unknown_char], char.base_char )) res.append(data) # pneuma : data = [] for char in self: data.append( { None : 0, "ψιλὸν" : 1, "δασὺ" : 2, }[char.pneuma]) res.append(data) # tonos : data = [] for char in self: data.append( { None : 0, "ὀξεῖα" : 1, "βαρεῖα" : 2, "περισπωμένη" : 3, }[char.tonos]) res.append(data) # hypogegrammene : data = [] for char in self: data.append( { False : 0, True : 1, }[char.hypogegrammene]) res.append(data) # mekos : data = [] for char in self: data.append( { None : 0, "βραχύ" : 1, "μακρόν" : 2, }[char.mekos]) res.append(data) else: # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringGRC' has no 'options' member" err_msg = "unknown sorting method '{0}'." raise DCharsError( context = "DStringGRC.sortingvalue", message = err_msg.format(self.options["sorting method"]) ) return res
def sortingvalue(self): """ DCharacterGRC.sortingvalue Return a SortingValue object NB : this function has almost no interest; you must use DStringGRC.sortingvalue() to compare two strings. Use this function if you just want to compare two characters. """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) # Some base_char may contain more than one character, like "β2". base_char_num = 0 for index_char, char in enumerate(self.base_char): base_char_num += ord(char) << index_char res.append( base_char_num ) return res # known char : res.append(0) # base_char : # # Some base_char may contain more than one character, like "β2". # base_char_num = 0 for index_char, char in enumerate(self.base_char): base_char_num += ord(char) << index_char res.append( base_char_num ) # pneuma : if self.pneuma is None: res.append(0) elif self.pneuma == "ψιλὸν": res.append(1) elif self.pneuma == "δασὺ": res.append(2) else: raise DCharsError( context = "DCharacterGRC.sortingvalue", message = "unknown value for pneuma ="+\ str(self.pneuma) ) # tonos : if self.tonos is None: res.append(0) elif self.tonos == "ὀξεῖα": res.append(1) elif self.tonos == "βαρεῖα": res.append(2) elif self.tonos == "περισπωμένη": res.append(3) else: raise DCharsError( context = "DCharacterGRC.sortingvalue", message = "unknown value for tonos ="+\ str(self.tonos) ) # hypogegrammene : if not self.hypogegrammene: res.append(0) else: res.append(1) # mekos : if self.mekos is None: res.append( 0 ) elif self.mekos == "βραχύ": res.append( 0 ) elif self.mekos == "μακρόν": res.append( 1 ) else: raise DCharsError( context = "DCharacterGRC.sortingvalue", message = "unknown value for mekos ="+\ str(self.mekos) ) else: raise DCharsError( context = "DCharacterGRC.sortingvalue", message = "unknown sorting method ="+\ str(self.dstring_object.options["sorting method"]) ) return res
def sortingvalue(self): """ DStringHBO.sortingvalue Return a SortingValue object """ res = SortingValue() # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringHBO' has no 'options' member" if self.options["sorting method"] == "default": # base character : data = [] for char in self: data.append( ({False:0, True:1}[char.unknown_char], char.base_char )) res.append(data) # shin_sin_dot : data = [] for char in self: data.append( ({None : 0, "HEBREW POINT SHIN DOT" : 1, "HEBREW POINT SIN DOT" : 2,}[char.shin_sin_dot] )) res.append(data) # vowel : data = [] for char in self: data.append( ({None : 0, "HEBREW POINT PATAH" : 1, "HEBREW POINT SEGOL" : 2, "HEBREW POINT HIRIQ" : 3, "HEBREW POINT QUBUTS" : 4, "HEBREW POINT QAMATS" : 5, "HEBREW POINT QAMATS QATAN" : 6, "HEBREW POINT TSERE" : 7, "HEBREW POINT HOLAM" : 8, "HEBREW POINT HOLAM HASER FOR VAV": 9, "HEBREW POINT HATAF SEGOL" : 10, "HEBREW POINT HATAF PATAH" : 11, "HEBREW POINT HATAF QAMATS" : 12, "HEBREW POINT SHEVA" : 13,}[char.vowel] )) res.append(data) # daghesh_mapiq : data = [] for char in self: data.append( ({False:0, True:1}[char.daghesh_mapiq] )) res.append(data) # methegh : data = [] for char in self: data.append( ({False:0, True:1}[char.methegh] )) res.append(data) # raphe : data = [] for char in self: data.append( ({False:0, True:1}[char.raphe] )) res.append(data) # special point : data = [] for char in self: data.append( ({None : 0, "HEBREW MARK UPPER DOT" : 1, "HEBREW MARK LOWER DOT" : 2,}[char.specialpoint] )) res.append(data) else: # Pylint can't know that <self> has an 'options' member # created when <self> has been initialized by new_dstring() : # pylint: disable=E1101 # -> "Instance of 'DStringHBO' has no 'options' member" err_msg = "unknown sorting method '{0}'." raise DCharsError( context = "DStringHBO.sortingvalue", message = err_msg.format(self.options["sorting method"]) ) return res
def sortingvalue(self): """ DCharacterHBO.sortingvalue Return a SortingValue object NB : this function has almost no interest; you must use DStringHBO.sortingvalue() to compare two strings. Use this function if you just want to compare two characters. """ res = SortingValue() if self.dstring_object.options["sorting method"] == "default": if self.unknown_char: # unknown char : res.append(1) return res # known char : res.append(0) # base_char : res.append( ord(self.base_char) ) # shin_sin_dot : if self.shin_sin_dot is None: res.append(0) elif self.shin_sin_dot == "HEBREW POINT SHIN DOT": res.append(1) elif self.shin_sin_dot == "HEBREW POINT SIN DOT": res.append(2) else: raise DCharsError( context = "DCharacterHBO.sortingvalue", message = "unknown value for shin_sin_dot ="+\ str(self.shin_sin_dot) ) # vowel : if self.vowel is None: res.append(0) elif self.vowel == "HEBREW POINT PATAH": res.append(1) elif self.vowel == "HEBREW POINT SEGOL": res.append(2) elif self.vowel == "HEBREW POINT HIRIQ": res.append(3) elif self.vowel == "HEBREW POINT QUBUTS": res.append(4) elif self.vowel == "HEBREW POINT QAMATS": res.append(5) elif self.vowel == "HEBREW POINT QAMATS QATAN": res.append(6) elif self.vowel == "HEBREW POINT TSERE": res.append(7) elif self.vowel == "HEBREW POINT HOLAM": res.append(8) elif self.vowel == "HEBREW POINT HOLAM HASER FOR VAV": res.append(9) elif self.vowel == "HEBREW POINT HATAF SEGOL": res.append(10) elif self.vowel == "HEBREW POINT HATAF PATAH": res.append(11) elif self.vowel == "HEBREW POINT HATAF QAMATS": res.append(12) elif self.vowel == "HEBREW POINT SHEVA": res.append(13) else: raise DCharsError( context = "DCharacterHBO.sortingvalue", message = "unknown vowel ="+\ str(self.vowel) ) # daghesh_mapiq : if not self.daghesh_mapiq: res.append(0) else: res.append(1) # methegh : if not self.methegh: res.append(0) else: res.append(1) # raphe : if not self.raphe: res.append(0) else: res.append(1) # special point : if self.specialpoint is None: res.append(0) elif self.specialpoint == "HEBREW MARK UPPER DOT": res.append(1) elif self.specialpoint == "HEBREW MARK LOWER DOT": res.append(2) else: raise DCharsError( context = "DCharacterHBO.sortingvalue", message = "unknown value for special_point ="+\ str(self.specialpoint) ) else: raise DCharsError( context = "DCharacterHBO.sortingvalue", message = "unknown sorting method ="+\ str(self.dstring_object.options["sorting method"]) ) return res