示例#1
0
class MetaSoundexTestCases(unittest.TestCase):
    """Test MetaSoundex functions.

    test cases for abydos.phonetic.MetaSoundex
    """

    pa = MetaSoundex()

    def test_metasoundex(self):
        """Test abydos.phonetic.MetaSoundex."""
        # Base cases
        self.assertEqual(self.pa.encode(''), '0000')
        self.assertEqual(self.pa.encode('', lang='en'), '0000')
        self.assertEqual(self.pa.encode('', lang='es'), '')

        # Top 10 Anglo surnames in US
        self.assertEqual(self.pa.encode('Smith', lang='en'), '4500')
        self.assertEqual(self.pa.encode('Johnson', lang='en'), '1525')
        self.assertEqual(self.pa.encode('Williams', lang='en'), '7452')
        self.assertEqual(self.pa.encode('Brown', lang='en'), '7650')
        self.assertEqual(self.pa.encode('Jones', lang='en'), '1520')
        self.assertEqual(self.pa.encode('Miller', lang='en'), '6460')
        self.assertEqual(self.pa.encode('Davis', lang='en'), '3120')
        self.assertEqual(self.pa.encode('Wilson', lang='en'), '7425')
        self.assertEqual(self.pa.encode('Anderson', lang='en'), '0536')
        self.assertEqual(self.pa.encode('Thomas', lang='en'), '6200')

        self.assertEqual(self.pa.encode('Smith', lang='es'), '4632')
        self.assertEqual(self.pa.encode('Johnson', lang='es'), '82646')
        self.assertEqual(self.pa.encode('Williams', lang='es'), '564')
        self.assertEqual(self.pa.encode('Brown', lang='es'), '196')
        self.assertEqual(self.pa.encode('Jones', lang='es'), '864')
        self.assertEqual(self.pa.encode('Miller', lang='es'), '659')
        self.assertEqual(self.pa.encode('Davis', lang='es'), '314')
        self.assertEqual(self.pa.encode('Wilson', lang='es'), '546')
        self.assertEqual(self.pa.encode('Anderson', lang='es'), '63946')
        self.assertEqual(self.pa.encode('Thomas', lang='es'), '364')

        # Top 10 Mexican surnames
        self.assertEqual(self.pa.encode('Hernández', lang='en'), '5653')
        self.assertEqual(self.pa.encode('García', lang='en'), '5620')
        self.assertEqual(self.pa.encode('Lòpez', lang='en'), '8120')
        self.assertEqual(self.pa.encode('Martìnez', lang='en'), '6635')
        self.assertEqual(self.pa.encode('Rodrìguez', lang='en'), '9362')
        self.assertEqual(self.pa.encode('González', lang='en'), '5524')
        self.assertEqual(self.pa.encode('Pérez', lang='en'), '7620')
        self.assertEqual(self.pa.encode('Sánchez', lang='en'), '4520')
        self.assertEqual(self.pa.encode('Gómez', lang='en'), '5520')
        self.assertEqual(self.pa.encode('Flores', lang='en'), '7462')

        self.assertEqual(self.pa.encode('Hernández', lang='es'), '96634')
        self.assertEqual(self.pa.encode('García', lang='es'), '894')
        self.assertEqual(self.pa.encode('Lòpez', lang='es'), '504')
        self.assertEqual(self.pa.encode('Martìnez', lang='es'), '69364')
        self.assertEqual(self.pa.encode('Rodrìguez', lang='es'), '93984')
        self.assertEqual(self.pa.encode('González', lang='es'), '86454')
        self.assertEqual(self.pa.encode('Pérez', lang='es'), '094')
        self.assertEqual(self.pa.encode('Sánchez', lang='es'), '4644')
        self.assertEqual(self.pa.encode('Gómez', lang='es'), '864')
        self.assertEqual(self.pa.encode('Flores', lang='es'), '2594')

        # Test wrapper
        self.assertEqual(metasoundex('Smith', lang='en'), '4500')
        self.assertEqual(metasoundex('Hernández', lang='es'), '96634')
示例#2
0
 HenryEarly().encode,
 'henry_early_ml8':
 HenryEarly(max_length=8).encode,
 'koelner_phonetik':
 koelner.encode,
 'koelner_phonetik_num_to_alpha': (
     lambda _: koelner._to_alpha(koelner.encode(_))  # noqa: SF01
 ),
 'koelner_phonetik_alpha':
 koelner.encode_alpha,
 'lein':
 LEIN().encode,
 'lein_nopad_ml8':
 LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex':
 MetaSoundex().encode,
 'metasoundex_es':
 MetaSoundex(lang='es').encode,
 'metaphone':
 Metaphone().encode,
 'mra':
 MRA().encode,
 'norphone':
 Norphone().encode,
 'nrl':
 NRL().encode,
 'nysiis':
 NYSIIS().encode,
 'nysiis_modified':
 NYSIIS(modified=True).encode,
 'nysiis_ml_inf':
示例#3
0
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
 'nysiis': NYSIIS().encode,
 'nysiis_modified': NYSIIS(modified=True).encode,
 'nysiis_ml_inf': NYSIIS(max_length=-1).encode,
 'onca': ONCA().encode,
 'onca_nopad_ml8': ONCA(max_length=8, zero_pad=False).encode,
 'parmar_kumbharana': ParmarKumbharana().encode,
 'phonem': Phonem().encode,
 'phonet_1': Phonet().encode,
 'phonet_2': Phonet(mode=2).encode,
 'phonet_1_none': Phonet(lang='none').encode,
示例#4
0
alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()
phonex = Phonex()
phonix = Phonix()
pshp_soundex_first = PSHPSoundexFirst()
pshp_soundex_last = PSHPSoundexLast()
refined_soundex = RefinedSoundex()
reth_schek = RethSchek()