def pinyin_indexes(content): pinyin = find_pinyin(content) # assert type(pinyin) == unicode if not pinyin or pinyin == "_": return () indexes = set() # multiple pronunciations for pinyin in re.split(r",|;", pinyin): # find all pinyin ranges, use them to rip pinyin out py = [ r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin) ] # maybe no pinyin here if not py: return () # just pinyin, with diacritics, separated by whitespace indexes.add("%s." % color.utf(" ".join(py))) # pinyin with diacritics replaced by tone numbers indexes.add("%s." % color.utf(" ".join( ["%s%d" % ( color.lowercase_string_by_removing_pinyin_tones(p), color.determine_tone(p)) for p in py]))) return indexes
def pinyin_indexes(content): pinyin = find_pinyin(content) # assert type(pinyin) == unicode if not pinyin or pinyin == '_': return () indexes = set() # multiple pronunciations for pinyin in re.split(r',|;', pinyin): # find all pinyin ranges, use them to rip pinyin out py = [r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin)] # maybe no pinyin here if not py: return () # just pinyin, with diacritics, separated by whitespace indexes.add('%s.' % color.utf(' '.join(py))) # pinyin with diacritics replaced by tone numbers indexes.add('%s.' % color.utf(' '.join( ['%s%d' % ( color.lowercase_string_by_removing_pinyin_tones(p), color.determine_tone(p)) for p in py]))) return indexes
def pinyin_indexes(content): pinyin = find_pinyin(content) # assert type(pinyin) == unicode if not pinyin or pinyin == '_': return () indexes = set() # multiple pronunciations for pinyin in re.split(r',|;', pinyin): # find all pinyin ranges, use them to rip pinyin out py = [ r._slice(pinyin) for r in color.ranges_of_pinyin_in_string(pinyin) ] # maybe no pinyin here if not py: return () # just pinyin, with diacritics, separated by whitespace indexes.add('%s.' % color.utf(u' '.join(py))) # pinyin with diacritics replaced by tone numbers indexes.add('%s.' % color.utf(u' '.join( map( lambda p: '%s%d' % (color.lowercase_string_by_removing_pinyin_tones(p), color.determine_tone(p)), # for each sub word py)))) return indexes
def testFristTone(self): self.assertEqual(1, colorize_pinyin.determine_tone('fāng')) self.assertEqual(1, colorize_pinyin.determine_tone('yī'))
def testMixedPinyin(self): self.assertEqual(3, colorize_pinyin.determine_tone('bǎiwén'))
def testNonPinyin(self): self.assertEqual(0, colorize_pinyin.determine_tone('бурда'))
def testZeroTone(self): self.assertEqual(0, colorize_pinyin.determine_tone('de')) self.assertEqual(0, colorize_pinyin.determine_tone('ning'))
def testFourthTone(self, ): self.assertEqual(4, colorize_pinyin.determine_tone('àn')) self.assertEqual(4, colorize_pinyin.determine_tone('dìnggòu'))
def testThirdTone(self): self.assertEqual(3, colorize_pinyin.determine_tone('fǎn')) self.assertEqual(3, colorize_pinyin.determine_tone('lǚ'))
def testSecondTone(self): self.assertEqual(2, colorize_pinyin.determine_tone('gán')) self.assertEqual(2, colorize_pinyin.determine_tone('xún'))