示例#1
0
class KoelnerTestCases(unittest.TestCase):
    """Test Koelner Phonetic functions.

    test cases for abydos.phonetic.Koelner
    """

    pa = Koelner()

    def test_koelner_phonetik(self):
        """Test abydos.phonetic.Koelner."""
        self.assertEqual(self.pa.encode(''), '')

        # https://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik
        self.assertEqual(self.pa.encode('Müller-Lüdenscheidt'), '65752682')
        self.assertEqual(self.pa.encode('Wikipedia'), '3412')
        self.assertEqual(self.pa.encode('Breschnew'), '17863')

        # http://search.cpan.org/~maros/Text-Phonetic/lib/Text/Phonetic/Koeln.pm
        self.assertEqual(self.pa.encode('Müller'), '657')
        self.assertEqual(self.pa.encode('schmidt'), '862')
        self.assertEqual(self.pa.encode('schneider'), '8627')
        self.assertEqual(self.pa.encode('fischer'), '387')
        self.assertEqual(self.pa.encode('weber'), '317')
        self.assertEqual(self.pa.encode('meyer'), '67')
        self.assertEqual(self.pa.encode('wagner'), '3467')
        self.assertEqual(self.pa.encode('schulz'), '858')
        self.assertEqual(self.pa.encode('becker'), '147')
        self.assertEqual(self.pa.encode('hoffmann'), '0366')
        self.assertEqual(self.pa.encode('schäfer'), '837')
        self.assertEqual(self.pa.encode('cater'), '427')
        self.assertEqual(self.pa.encode('axel'), '0485')

        # etc. (for code coverage)
        self.assertEqual(self.pa.encode('Akxel'), '0485')
        self.assertEqual(self.pa.encode('Adz'), '08')
        self.assertEqual(self.pa.encode('Alpharades'), '053728')
        self.assertEqual(self.pa.encode('Cent'), '862')
        self.assertEqual(self.pa.encode('Acre'), '087')
        self.assertEqual(self.pa.encode('H'), '')

    def test_koelner_phonetik_alpha(self):
        """Test abydos.phonetic.Koelner.encode_alpha."""
        self.assertEqual(self.pa.encode_alpha('Müller-Lüdenscheidt'),
                         'NLRLTNST')
        self.assertEqual(self.pa.encode_alpha('Wikipedia'), 'FKPT')
        self.assertEqual(self.pa.encode_alpha('Breschnew'), 'PRSNF')
        self.assertEqual(self.pa.encode_alpha('Müller'), 'NLR')
        self.assertEqual(self.pa.encode_alpha('schmidt'), 'SNT')
        self.assertEqual(self.pa.encode_alpha('schneider'), 'SNTR')
        self.assertEqual(self.pa.encode_alpha('fischer'), 'FSR')
        self.assertEqual(self.pa.encode_alpha('weber'), 'FPR')
        self.assertEqual(self.pa.encode_alpha('meyer'), 'NR')
        self.assertEqual(self.pa.encode_alpha('wagner'), 'FKNR')
        self.assertEqual(self.pa.encode_alpha('schulz'), 'SLS')
        self.assertEqual(self.pa.encode_alpha('becker'), 'PKR')
        self.assertEqual(self.pa.encode_alpha('hoffmann'), 'AFNN')
        self.assertEqual(self.pa.encode_alpha('schäfer'), 'SFR')
        self.assertEqual(self.pa.encode_alpha('cater'), 'KTR')
        self.assertEqual(self.pa.encode_alpha('axel'), 'AKSL')
示例#2
0
 'caverphone_2': Caverphone().encode,
 'daitch_mokotoff_soundex': DaitchMokotoff().encode,
 'davidson': Davidson().encode,
 'dolby': Dolby().encode,
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
 'nysiis': NYSIIS().encode,
 'nysiis_modified': NYSIIS(modified=True).encode,
 'nysiis_ml_inf': NYSIIS(max_length=-1).encode,
 'onca': ONCA().encode,
 'onca_nopad_ml8': ONCA(max_length=8, zero_pad=False).encode,
 'parmar_kumbharana': ParmarKumbharana().encode,
示例#3
0
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
    Waahlin,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
daitch_mokotoff = DaitchMokotoff()
double_metaphone = DoubleMetaphone()
haase = Haase()
haase_primary = Haase(primary_only=True)
koelner = Koelner()
russell = RussellIndex()
sfinxbis = SfinxBis()
sfinxbis_6 = SfinxBis(max_length=6)
soundex_census = Soundex(var='Census')
spfc = SPFC()

algorithms = {
    'ainsworth':
    Ainsworth().encode,
    'alpha_sis':
    lambda _: ', '.join(alpha_sis.encode(_)),
    'bmpm':
    BeiderMorse().encode,
    'bmpm_german':
    BeiderMorse(language_arg='german').encode,
示例#4
0
    RethSchek,
    RogerRoot,
    RussellIndex,
    SPFC,
    SfinxBis,
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

russell = RussellIndex()
koelner = Koelner()
spfc = SPFC()

algorithms = {
    'russell_index': russell.encode,
    'russell_index_num_to_alpha': lambda _: russell._to_alpha(  # noqa: SF01
        russell.encode(_)
    ),
    'russell_index_alpha': russell.encode_alpha,
    'soundex': Soundex().encode,
    'reverse_soundex': Soundex(reverse=True).encode,
    'soundex_0pad_ml6': Soundex(zero_pad=True, max_length=6).encode,
    'soundex_special': Soundex(var='special').encode,
    'soundex_census': Soundex(var='Census').encode,
    'refined_soundex': RefinedSoundex().encode,
    'refined_soundex_vowels': RefinedSoundex(retain_vowels=True).encode,
示例#5
0
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
    Waahlin,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
daitch_mokotoff = DaitchMokotoff()
double_metaphone = DoubleMetaphone()
haase = Haase()
haase_primary = Haase(primary_only=True)
koelner = Koelner()
russell = RussellIndex()
sfinxbis = SfinxBis()
sfinxbis_6 = SfinxBis(max_length=6)
soundex_census = Soundex(var='Census')
spfc = SPFC()

algorithms = {
    'ainsworth':
    Ainsworth().encode,
    'alpha_sis':
    lambda _: ', '.join(alpha_sis.encode(_)),
    'bmpm':
    BeiderMorse().encode,
    'bmpm_german':
    BeiderMorse(language_arg='german').encode,
示例#6
0
from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()
phonex = Phonex()
phonix = Phonix()
pshp_soundex_first = PSHPSoundexFirst()