def test_smash_words_together(): mst = textToFlex.MultiSylT('tests/dict.yaml', lang='es') line = "Tú el alfarero, yo el barro soy." result = textToFlex.syllabizeLine(line, 9, mst, lang='es') assert "Tú~el al -- fa -- re -- ro, yo~el ba -- rro soy." in result result = textToFlex.syllabizeLine(line, 8, mst, lang='es') assert [] == result
def test_alternates(): mst = textToFlex.MultiSylT('tests/dict.yaml') line = "offering offering" assert "off -- ering off -- ering" in textToFlex.syllabizeLine( line, 4, mst) off5 = textToFlex.syllabizeLine(line, 5, mst) assert "of -- fer -- ing off -- ering" in off5 assert "off -- ering of -- fer -- ing" in off5
def test_multiTokenize_plural_bug(): mst = textToFlex.MultiSylT('tests/dict.yaml') off = list(mst.multiTokenize('offerings')) assert ['off', 'erings'] in off assert ['of', 'fer', 'ings'] in off off = list(mst.multiTokenize('offerings')) assert ['off', 'erings'] in off assert ['of', 'fer', 'ings'] in off
def test_reformat_singular(): mst = textToFlex.MultiSylT('tests/dict.yaml') line = "All? all." syllabized = textToFlex.syllabizeLine(line, 2, mst) assert [line] == syllabized line2 = "all, All!" syllabized2 = textToFlex.syllabizeLine(line2, 2, mst) assert [line2] == syllabized2
def test_reformat_function(): mst = textToFlex.MultiSylT('tests/dict.yaml') assert ["All?"] == mst.reformat(["all"], "All?") assert ["ALL?"] == mst.reformat(["all"], "ALL?") assert ["Cal", "v\u2019ry\u2019s"] == mst.reformat(["cal", "v'ry's"], "Calv\u2019ry\u2019s") assert ["Cal", "v'ry\u2019s"] == mst.reformat(["cal", "v'ry's"], "Calv'ry\u2019s") ordered = "\u2018'\u2019" assert [ordered] == mst.reformat(["'''"], ordered) assert ["'Tis"] == mst.reformat(["'tis"], "'Tis")
def test_spanishTokenize(): mst = textToFlex.MultiSylT('tests/dict.yaml', lang='es') # Two strong vowels (see TODO for double-l) assert len(["to", "a", "lla"]) == len(mst._spanishTokenize("toalla")) # Weak + Strong vowel assert ["i", "gua", "na"] == mst._spanishTokenize("iguana") # Two weak vowels assert ["rei", "na"] == mst._spanishTokenize("reina") # Accented vowel assert ["tí", "o"] == mst._spanishTokenize("tío") # Ending consonant assert ["com", "pre", "sar"] == mst._spanishTokenize("compresar")
def test_cjk_syllabize(): mst = textToFlex.MultiSylT('tests/dict.yaml', lang='zh') c = '聖哉,聖哉,聖哉,慈悲全能主宰,' result = textToFlex.syllabizeLine(c, 12, mst, lang='zh') assert '聖 -- 哉, 聖 -- 哉, 聖 -- 哉, 慈 -- 悲 -- 全 -- 能 -- 主 -- 宰,' \ in result j = "よろずのくにびと、 " result = textToFlex.syllabizeLine(j, 8, mst, lang='ja') assert "よ -- ろ -- ず -- の -- く -- に -- び -- と、" in result k = "너희는 먼저- 추의 나라위" result = textToFlex.syllabizeLine(k, 10, mst, lang='ko') assert "너 -- 희 -- 는 먼 -- 저- 추 -- 의 나 -- 라 -- 위" in result
def guessMeter(paragraph, lang, mst=None): meter = '' if mst is None: mst = ttf.MultiSylT(lang=lang) for line in paragraph.split("\n"): count = 0 words = [x.strip() for x in line.split(' ') if x.strip() != ''] for word in words: if word.strip(wordSyl.puncs) != '': # Assuming the first word from the multiTokenizer is the best! count += len(mst.multiTokenize(word)[0]) meter = meter + str(count) + "." return meter.strip('.')
def main(argv): """ For a paragraph of text, will guess the syllables in each line, separated by a period, like "8.6.8.6". """ lines = "" mst = ttf.MultiSylT(lang=argv[1]) for line in fileinput.input(argv[2:]): if line.strip() != '': lines = lines + line elif lines.strip(): print(guessMeter(lines.strip(), argv[1], mst)) lines = '' if lines.strip(): print(guessMeter(lines.strip(), argv[1], mst))
def test_deformat_function(): mst = textToFlex.MultiSylT('tests/dict.yaml') assert "all" == mst.deformat('All?') assert "calv'ry's" == mst.deformat("Calv\u2019ry\u2019s") assert "'tis" == mst.deformat("\u2018tis")
def test_smash_words_ignore_character(): mst = textToFlex.MultiSylT('tests/dict.yaml', lang='es') line = "te adorará todo hombre" result = textToFlex.syllabizeLine(line, 7, mst, lang='es') assert "te~a -- do -- ra -- rá to -- do~hom -- bre" in result
def test_spanish_syllabize(): mst = textToFlex.MultiSylT('tests/dict.yaml', lang='es') line = "Santificado sea tu nombre" result = textToFlex.syllabizeLine(line, 10, mst) assert 'es' == mst.lang assert "San -- ti -- fi -- ca -- do se -- a tu nom -- bre" in result