示例#1
0
 def test_1(self):
     """
             TESTSDCharacterStringGRC.test_1
     """
     dictionary = {}
     reversed_dict = invertdict( dictionary )
     reversed_dict2 = invertdict( reversed_dict )
     self.assertEqual( reversed_dict, reversed_dict2 )
示例#2
0
    ";": "?",
    "!": "!",
    "·": ";",
    '"': '"',
    "'": "'",
    "—": "_",
    ":": ":",
    "\n": "\n",
    "\r": "\r",
    "\t": "\t",
    "‘": "<2018>",
    "’": "<2019>",
    "᾽": "<1FBD>",
}

LOWER_CASE_INVERSED = invertdict(LOWER_CASE, accept_duplicated_values=True)
LOWER_CASE_INVERSED["b"] = "β"
LOWER_CASE_INVERSED["s"] = "σ"
UPPER_CASE_INVERSED = invertdict(UPPER_CASE)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)

DIACRITICS = {
    "βαρεῖα": "\\",
    "ὀξεῖα": "/",
    "περισπωμένη": "/\\",
    "μακρόν": "_",
    "βραχύ": "-",
    "ψιλὸν": ")",
    "δασὺ": "(",
    "ὑπογεγραμμένη": "+i",
示例#3
0
                 'MARK GUG RTAGS GYON'                  : "<",
                 'MARK GUG RTAGS GYAS'                  : ">",
                 'MARK ANG KHANG GYON'                  : "(",
                 'MARK ANG KHANG GYAS'                  : ")",

                 # = Sanskrit avagraha (अवग्रह) = ऽ
                 'MARK PALUTA'                          : "&",
              }

DIACRITICS = {
                 # = Sanskrit visarga :
                 'SIGN RNAM BCAD'                       : 'H',

                 # = srog med = Sanskrit virama
                 'MARK HALANTA'                         : '?',

                 # = Sanskrit anusvara
                 'SIGN RJES SU NGA RO'                  : 'M',

                 # = Sanskrit candrabindu
                 'SIGN NYI ZLA NAA DA'                  : '~M`',
                 'SIGN SNA LDAN'                        : '~M',
             }

CONSONANTS_INVERSED = invertdict(CONSONANTS)
VOWELS_INVERSED = invertdict(VOWELS)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)
DIACRITICS_INVERSED = invertdict(DIACRITICS)

示例#4
0
              ' '                                       : ' ',
              '.'                                       : '<.>',
              '\n'                                      : '\n',
              '\r'                                      : '\r',
              '\t'                                      : '\t',
              }

DIACRITICS = {
      'DEVANAGARI STRESS SIGN UDATTA'           : chr(0x0301),
      'DEVANAGARI STRESS SIGN ANUDATTA'         : chr(0x0331),

      'DEVANAGARI SIGN CANDRABINDU'             : 'm̐', # 006D 0310
      'DEVANAGARI SIGN ANUSVARA'                : 'ṁ', # 1E41
    }

CONSONANTS_INVERSED = invertdict(CONSONANTS)
CONSONANTS_WITH_NUKTA_INVERSED = invertdict(CONSONANTS_WITH_NUKTA)
CONSONANTS_WITH_NUKTA_TO_CONSONANT_INVERSED = invertdict(CONSONANTS_WITH_NUKTA_TO_CONSONANT)
VOWELS_INVERSED = invertdict(VOWELS)
VOWELS_IN_HIATUS_INVERSED = invertdict(VOWELS_IN_HIATUS)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)
DIACRITICS_INVERSED = invertdict(DIACRITICS)

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
示例#5
0
           }

# OTHER_SYMBOLS[base_char] = transliterated character
OTHER_SYMBOLS = {
     '0'        : '0',
     '1'        : '1',
     '2'        : '2',
     '3'        : '3',
     '4'        : '4',
     '5'        : '5',
     '6'        : '6',
     '7'        : '7',
     '8'        : '8',
     '9'        : '9',
    }
OTHER_SYMBOLS_INVERSED = invertdict( OTHER_SYMBOLS )

# PUNCTUATION[base_char] = transliterated character
#
# ABOUT 'weird characters' : some characters are defined in this table only in order to pass tests.
PUNCTUATION = {
     ')'        : ')',
     '('        : '(',
     '['        : '[',
     ']'        : ']',
     '{'        : '{',
     '}'        : '}',
     ' '        : ' ',
     '\n'       : '\n',
     '\r'       : '\r',
     '\t'       : '\t',
示例#6
0
                # 'DIGIT HALF HEIGHT'            : "\\u0F33",
                # 'DIGIT HALF NINE'              : "\\u0F34",

                # = Sanskrit avagraha (अवग्रह) = ऽ
                'MARK PALUTA'                   : "ऽ",
    }

PUNCTUATION = {
                 'MARK INTERSYLLABIC TSHEG'     : " ",
                 'MARK SHAD'                    : chr(0x0964),  # = Sanskrit danda
              }

DIACRITICS = {
                 'SIGN RNAM BCAD'                       : chr(0x0903),

                 'MARK HALANTA'                         : chr(0x094D),

                 'SIGN RJES SU NGA RO'                  : chr(0x0902),
                 # 'SIGN NYI ZLA NAA DA'                  : '???',
                 'SIGN SNA LDAN'                        : chr(0x0901),
             }


CONSONANTS_INVERSED = invertdict(CONSONANTS)
DEPENDENT_VOWELS_INVERSED = invertdict(DEPENDENT_VOWELS)
INDEPENDENT_VOWELS_INVERSED = invertdict(INDEPENDENT_VOWELS)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)
DIACRITICS_INVERSED = invertdict(DIACRITICS)

示例#7
0
               '\n'                                      : '\n',                 # (???) see above
               '\r'                                      : '\r',                 # (???) see above
               '\t'                                      : '\t',                 # (???) see above
               }

DIACRITICS = {
      'DEVANAGARI SIGN INVERTED CANDRABINDU'    : '~.N',                # (???) see above
      'DEVANAGARI SIGN CANDRABINDU'             : '.N',
      'DEVANAGARI SIGN ANUSVARA'                : 'M',
      'DEVANAGARI STRESS SIGN UDATTA'           : "\\'",
      'DEVANAGARI STRESS SIGN ANUDATTA'         : '\\_',
      'DEVANAGARI GRAVE ACCENT'                 : "<GRAVE ACCENT>",     # (???) see above
      'DEVANAGARI ACUTE ACCENT'                 : "<ACUTE ACCENT>",     # (???) see above
    }

CONSONANTS_INVERSED = invertdict(CONSONANTS)
CONSONANTS_URDU_INVERSED = invertdict(CONSONANTS_URDU)
VOWELS_INVERSED = invertdict(VOWELS)
VOWELS_IN_HIATUS_INVERSED = invertdict(VOWELS_IN_HIATUS)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)
DIACRITICS_INVERSED = invertdict(DIACRITICS)
URDU_CONSONANT_2_CONSONANT_INVERSED = invertdict( URDU_CONSONANT_2_CONSONANT )

################################################################################
# transliteration's patterns :
# PATTERN  is used to cut one complex characters into its elements.
# PATTERN2 is used to cut several complex characters into a list of complex characters.
################################################################################

# in order to build the pattern strings for the regexes we have to SORT the
示例#8
0
               ' '       : ' ',
               '.'       : '.',
               ','       : ',',
               ';'       : '?',
               '!'       : '!',
               '·'       : ';',
               '"'       : '"',
               "'"       : "'",
               "—"       : "_",
               ":"       : ":",
               '\n'      : '\n',
               '\r'      : '\r',
               '\t'      : '\t',
              }

CHOONPU_INVERSED = invertdict(CHOONPU)
HIRAGANA_INVERSED = invertdict(HIRAGANA)
HIRAGANA_DAKUTEN_INVERSED = invertdict(HIRAGANA_DAKUTEN)
HIRAGANA_HANDAKUTEN_INVERSED = invertdict(HIRAGANA_HANDAKUTEN)
KATAKANA_INVERSED = invertdict(KATAKANA)
KATAKANA_DAKUTEN_INVERSED = invertdict(KATAKANA_DAKUTEN)
KATAKANA_HANDAKUTEN_INVERSED = invertdict(KATAKANA_HANDAKUTEN)
OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS)
PUNCTUATION_INVERSED = invertdict(PUNCTUATION)

# Be carefull : order matters, hence the use of an OrderedDict object.
COMPOSED_TRANSCRIPTIONS = OrderedDict((

        ("shi[-]ya"      , "sha"),
        ("shi[-]yu"      , "shu"),
        ("shi[-]yo"      , "sho"),