class PinyinICUTest(NeedsDatabaseTest, unittest.TestCase): """Test Pinyin tonemark conversion on ICU transformation rule.""" CONVERSION_DIRECTION = ('Pinyin', 'Pinyin') def setUp(self): NeedsDatabaseTest.setUp(self) self.f = ReadingFactory(dbConnectInst=self.db) try: import PyICU self.toNumeric = PyICU.Transliterator.createInstance( "Latin-NumericPinyin", PyICU.UTransDirection.UTRANS_FORWARD) self.fromNumeric = self.toNumeric.createInverse() except ImportError: pass def testToneMarkPlacement(self): """Test Pinyin tonemark conversion on ICU transformation rule.""" if not hasattr(self, 'toNumeric'): return for readingEntity in self.f.getReadingEntities('Pinyin'): if readingEntity in (u'hn\u0304g', u'h\u0144g', u'h\u0148g', u'h\u01f9g', u'n\u0304g', u'\u0144g', u'\u0148g', u'\u01f9g'): continue targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin', targetOptions={'toneMarkType': 'numbers', 'missingToneMark': 'fifth'}) self.assertEquals(targetEntity, self.toNumeric.transliterate(readingEntity)) for readingEntity in self.f.getReadingEntities('Pinyin', toneMarkType='numbers', missingToneMark='fifth'): if readingEntity in ('hng1', 'hng2', 'hng3', 'hng4', 'ng1', 'ng2', 'ng3', 'ng4', u'ê1', u'ê2', u'ê3', u'ê4'): continue targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'missingToneMark': 'fifth'}) self.assertEquals(targetEntity, self.fromNumeric.transliterate(readingEntity))
class CharacterLookupReadingMethodsTest(CharacterLookupTest, unittest.TestCase): """ Runs consistency checks on the reading methods of the :class:`~cjklib.characterlookup.CharacterLookup` class. .. todo:: * Impl: include script table from Unicode 5.2.0 to get character ranges for Hangul and Kana """ DIALECTS = {} SPECIAL_ENTITY_LIST = {} def setUp(self): CharacterLookupTest.setUp(self) self.f = ReadingFactory(dbConnectInst=self.db) def testReadingMappingAvailability(self): """ Test if the readings under ``CharacterLookup.CHARARACTER_READING_MAPPING`` are available for conversion. """ # mock to simulate availability of all tables in # characterLookup.CHARARACTER_READING_MAPPING tables = [table for table, _ \ in list(self.characterLookup.CHARARACTER_READING_MAPPING.values())] self.characterLookup.db.engine = EngineMock( self.characterLookup.db.engine, mockTables=tables) for reading in self.characterLookup.CHARARACTER_READING_MAPPING: # only if table exists table, _ = self.characterLookup.CHARARACTER_READING_MAPPING[ reading] self.assertTrue( self.characterLookup.hasMappingForReadingToCharacter(reading)) self.assertTrue( self.characterLookup.hasMappingForCharacterToReading(reading)) # test proper checking for all known readings for reading in self.f.getSupportedReadings(): self.assertTrue( self.characterLookup.hasMappingForReadingToCharacter(reading) \ in [True, False]) self.assertTrue( self.characterLookup.hasMappingForCharacterToReading(reading) \ in [True, False]) @attr('slow') def testGetCharactersForReadingAcceptsAllEntities(self): """Test if ``getCharactersForReading`` accepts all reading entities.""" for reading in self.f.getSupportedReadings(): if not self.characterLookup.hasMappingForReadingToCharacter( reading): continue dialects = [{}] if reading in self.DIALECTS: dialects.extend(self.DIALECTS[reading]) for dialect in dialects: if hasattr(self.f.getReadingOperatorClass(reading), 'getReadingEntities'): entities = self.f.getReadingEntities(reading, **dialect) elif reading in self.SPECIAL_ENTITY_LIST: entities = self.SPECIAL_ENTITY_LIST[reading] else: continue for entity in entities: try: results = self.characterLookup.getCharactersForReading( entity, reading, **dialect) self.assertEqual(type(results), type([]), "Method getCharactersForReading() doesn't return" \ + " a list for entity %s " % repr(entity) \ + ' (reading %s, dialect %s)' % (reading, dialect)) for entry in results: self.assertEqual(len(entry), 1, "Entry %s in result for %s has length != 1" \ % (repr(entry), repr(entity)) \ + ' (reading %s, dialect %s)' \ % (reading, dialect)) except exception.UnsupportedError: pass except exception.ConversionError: pass
class CharacterLookupReadingMethodsTest(CharacterLookupTest, unittest.TestCase): """ Runs consistency checks on the reading methods of the :class:`~cjklib.characterlookup.CharacterLookup` class. .. todo:: * Impl: include script table from Unicode 5.2.0 to get character ranges for Hangul and Kana """ DIALECTS = {} SPECIAL_ENTITY_LIST = {} def setUp(self): CharacterLookupTest.setUp(self) self.f = ReadingFactory(dbConnectInst=self.db) def testReadingMappingAvailability(self): """ Test if the readings under ``CharacterLookup.CHARARACTER_READING_MAPPING`` are available for conversion. """ # mock to simulate availability of all tables in # characterLookup.CHARARACTER_READING_MAPPING tables = [table for table, _ \ in self.characterLookup.CHARARACTER_READING_MAPPING.values()] self.characterLookup.db.engine = EngineMock( self.characterLookup.db.engine, mockTables=tables) for reading in self.characterLookup.CHARARACTER_READING_MAPPING: # only if table exists table, _ = self.characterLookup.CHARARACTER_READING_MAPPING[reading] self.assert_( self.characterLookup.hasMappingForReadingToCharacter(reading)) self.assert_( self.characterLookup.hasMappingForCharacterToReading(reading)) # test proper checking for all known readings for reading in self.f.getSupportedReadings(): self.assert_( self.characterLookup.hasMappingForReadingToCharacter(reading) \ in [True, False]) self.assert_( self.characterLookup.hasMappingForCharacterToReading(reading) \ in [True, False]) @attr('slow') def testGetCharactersForReadingAcceptsAllEntities(self): """Test if ``getCharactersForReading`` accepts all reading entities.""" for reading in self.f.getSupportedReadings(): if not self.characterLookup.hasMappingForReadingToCharacter( reading): continue dialects = [{}] if reading in self.DIALECTS: dialects.extend(self.DIALECTS[reading]) for dialect in dialects: if hasattr(self.f.getReadingOperatorClass(reading), 'getReadingEntities'): entities = self.f.getReadingEntities(reading, **dialect) elif reading in self.SPECIAL_ENTITY_LIST: entities = self.SPECIAL_ENTITY_LIST[reading] else: continue for entity in entities: try: results = self.characterLookup.getCharactersForReading( entity, reading, **dialect) self.assertEquals(type(results), type([]), "Method getCharactersForReading() doesn't return" \ + " a list for entity %s " % repr(entity) \ + ' (reading %s, dialect %s)' % (reading, dialect)) for entry in results: self.assertEquals(len(entry), 1, "Entry %s in result for %s has length != 1" \ % (repr(entry), repr(entity)) \ + ' (reading %s, dialect %s)' \ % (reading, dialect)) except exception.UnsupportedError: pass except exception.ConversionError: pass
def main(): language, output_encoding = locale.getdefaultlocale() if len(sys.argv) == 2: modus = sys.argv[1] if modus not in modi: print "invalid modus, choose one out of: " + ", ".join(modi.keys()) sys.exit(1) else: print "give a modus, choose one out of: " + ", ".join(modi.keys()) sys.exit(1) fromReading, toReading, entryFunc, readingOpt = modi[modus] initialRules = INITIAL_RULES[(fromReading, toReading)] finialRules = FINAL_RULES[(fromReading, toReading)] extraSyllables = EXTRA_SYLLABLES[(fromReading, toReading)] # entry set global entrySet entrySet = set() # build table and use scheme with almost perfect grouping according to # pronunciation, then use headers to get the initial's and final's # pronunciation. op = ReadingFactory().createReadingOperator(fromReading, **readingOpt) # get splitted syllables, finals in first row, initials in first column for syllable in op.getReadingEntities(): initial, final = op.getOnsetRhyme(syllable) # only apply rules if syllable isn't given an extra mapping in # EXTRA_SYLLABLES if not syllable in extraSyllables: # check if we have rules if initialRules[initial] != None and finialRules[final] != None: # check for ambiguous mappings if type(initialRules[initial]) == type({}): initialFeatures = initialRules[initial].keys() else: initialFeatures = [None] if type(finialRules[final]) == type({}): finalFeatures = finialRules[final].keys() else: finalFeatures = [None] # go through all mappings for initialFeature in initialFeatures: for finalFeature in finalFeatures: if initialFeature: targetInitial \ = initialRules[initial][initialFeature] else: targetInitial = initialRules[initial] if finalFeature: targetFinal = finialRules[final][finalFeature] else: targetFinal = finialRules[final] entry = entryFunc(syllable, targetInitial, targetFinal, initialFeature, finalFeature) if entry != None: entrySet.add(entry) else: print >> sys.stderr, ("missing rule(s) for syllable '" \ + syllable + "' with initial/final '" + initial + "'/'" \ + final + "'").encode(output_encoding) # print extra syllables for syllable in extraSyllables: if extraSyllables[syllable]: initialRule, finalRule = extraSyllables[syllable] # check for ambiguous mappings if type(initialRule) == type({}): initialFeatures = initialRule.keys() else: initialFeatures = [None] if type(finalRule) == type({}): finalFeatures = finalRule.keys() else: finalFeatures = [None] # go through all mappings for initialFeature in initialFeatures: for finalFeature in finalFeatures: if initialFeature: targetInitial = initialRule[initialFeature] else: targetInitial = initialRule if finalFeature: targetFinal = finalRule[finalFeature] else: targetFinal = finalRule entry = entryFunc(syllable, targetInitial, targetFinal, initialFeature, finalFeature) if entry != None: entrySet.add(entry) notIncludedSyllables = [syllable for syllable in extraSyllables \ if not extraSyllables[syllable]] if notIncludedSyllables: print >> sys.stderr, ("Syllables not included in table: '" \ + "', '".join(sorted(notIncludedSyllables)) + "'")\ .encode(output_encoding) entryList = list(entrySet) entryList.sort() print "\n".join(entryList).encode(output_encoding)