def test_punctuated_input(self): # ensure that the syllabififer can syllabify delimited and punctuated # input F = FinnSyll(split=True, variation=False, rules=False, stress=False) lines = ( u'Ei olko kaipuumme kuin haave naisentai sairaan näky,\n' u'houre humalaisen.\n\n' u'Nuo äänet on kuorona rinnassas.\n' u'ja villi on leimaus katseessas.--\n' u'peru päiviltä muinaisilta se lie\n' u'kun käytiin katkera kostontie.\n\n' u'hypo_lemma' # hypothetical lemma ) expected = ( u'Ei ol.ko kai.puum.me kuin haa.ve nai.sen.tai sai.raan nä.ky,\n' u'hou.re hu.ma.lai.sen.\n\n' u'Nuo ää.net on kuo.ro.na rin.nas.sas.\n' u'ja vil.li on lei.ma.us kat.sees.sas.--\n' u'pe.ru päi.vil.tä mui.nais.il.ta se lie\n' u'kun käy.tiin kat.ke.ra kos.ton.tie.\n\n' u'hy.po_lem.ma' ) self.assertEqual(F.syllabify(lines), expected)
def test_variant_ordering_no_stress(self): # ensure that the syllabifier returns variants in order from most # preferred to least preferred F = FinnSyll(split=True, variation=True, rules=False, stress=False) with open('tests/ranked_sylls.pickle', 'rb') as f: pairs = pickle.load(f) errors = 0 for i, expected in pairs.items(): try: test = F.syllabify(unicode(i, 'utf-8').lower()) except (TypeError, NameError): test = F.syllabify(i.lower()) try: self.assertEqual(test, expected) except AssertionError as e: errors += 1 message = '' for line in e.message.split('\n'): if line.startswith('-'): message += line + '\n' elif line.startswith('+'): message += line print(message + '\n') if errors: raise AssertionError(errors)
def test_edge_cases(self): # ensure that the syllabifier can handle edge cases not included in # the Aamulehti corpus F = FinnSyll(split=True, variation=False, rules=False, stress=False) cases = { 'nauumme': u'nau.um.me', 'leuun': u'leu.un', 'riuun': u'riu.un', # 'ruoon': u'ruo.on', } with self.assertRaises(AssertionError): assert F.syllabify('ruoon') == u'ruo.on' error_helper(self, F.syllabify, cases)