def process_word(self, word, root, mb, flag): num_of_syl = syllabifier.syllabify(root) soften_flag = True if root in self.out_of_rules: soften_flag = False elif root.endswith(u"lç") or root.endswith(u"lk") or root.endswith( u"lp") or root.endswith(u"lt") or root.endswith( u"nç") or root.endswith(u"nt") or root.endswith( u"rç") or root.endswith(u"rk") or root.endswith( u"rp") or root.endswith(u"rs") or root.endswith( u"st") or root.endswith("rt") or root.endswith( "rk") or root.endswith("nk"): soften_flag = False if root in self.pronunce_dict: rootpro_lis = self.pronunce_dict[root] else: l = self.lookup_substrings(root) if l != -1: rootpro_lis = l else: rootpro_lis = [word_to_sampa.map_one_to_one(root)] if rootpro_lis == [[]]: rootpro_lis = [word_to_sampa.map_garbage(root)] return [ word_to_sampa.unite_root_and_suffix_seq(word, root, mb, pro, soften_flag) for pro in rootpro_lis ]
def map_one_to_one(word): resultlis=[] sform = syllabifier.syllabify(word) if sform == -1: return [] else: for syl in sform: resultstr="" syl_vow = filter(lambda x: word_to_sampa.is_a_vowel(x), syl) is_front = word_to_sampa.is_a_front_vowel(syl_vow) if len(syl)>1 and word_to_sampa.is_a_consonant(syl[0]) and word_to_sampa.is_a_consonant(syl[1]): for i, currchar in enumerate(syl): if currchar == u"k": if is_front : resultstr += "c " else: resultstr += "k " elif currchar == u"g": if is_front: resultstr+= "gj " else: resultstr+= "g " elif currchar == u"l": if is_front : resultstr+= "l " else: resultstr+= "5 " else: resultstr += word_to_sampa.phonemap[currchar] resultstr +=" " if i==0 : if is_front : resultstr += "i " else: resultstr += "1 " resultstr = resultstr[:-1] resultlis.append(resultstr) else: for currchar in syl: if currchar == u"k": if is_front : resultstr += "c " else: resultstr += "k " elif currchar == u"g": if is_front: resultstr+= "gj " else: resultstr+= "g " elif currchar == u"l": if is_front : resultstr+= "l " else: resultstr+= "5 " else: resultstr += word_to_sampa.phonemap[currchar] resultstr +=" " resultstr = resultstr[:-1] resultlis.append(resultstr) return " ".join(resultlis)
def map_one_to_one(word): resultlis = [] sform = syllabifier.syllabify(word) if sform == -1: return [] else: for syl in sform: resultstr = "" syl_vow = filter(lambda x: word_to_sampa.is_a_vowel(x), syl) is_front = word_to_sampa.is_a_front_vowel(syl_vow) if len(syl) > 1 and word_to_sampa.is_a_consonant( syl[0]) and word_to_sampa.is_a_consonant(syl[1]): for i, currchar in enumerate(syl): if currchar == u"k": if is_front: resultstr += "c " else: resultstr += "k " elif currchar == u"g": if is_front: resultstr += "gj " else: resultstr += "g " elif currchar == u"l": if is_front: resultstr += "l " else: resultstr += "5 " else: resultstr += word_to_sampa.phonemap[currchar] resultstr += " " if i == 0: if is_front: resultstr += "i " else: resultstr += "1 " resultstr = resultstr[:-1] resultlis.append(resultstr) else: for currchar in syl: if currchar == u"k": if is_front: resultstr += "c " else: resultstr += "k " elif currchar == u"g": if is_front: resultstr += "gj " else: resultstr += "g " elif currchar == u"l": if is_front: resultstr += "l " else: resultstr += "5 " else: resultstr += word_to_sampa.phonemap[currchar] resultstr += " " resultstr = resultstr[:-1] resultlis.append(resultstr) return " ".join(resultlis)
def process_word(self,word,root, mb, flag): num_of_syl = syllabifier.syllabify(root) soften_flag= True if root in self.out_of_rules: soften_flag=False elif root.endswith(u"lç") or root.endswith(u"lk") or root.endswith(u"lp") or root.endswith(u"lt") or root.endswith(u"nç") or root.endswith(u"nt") or root.endswith(u"rç") or root.endswith(u"rk") or root.endswith(u"rp") or root.endswith(u"rs") or root.endswith(u"st") or root.endswith("rt") or root.endswith("rk") or root.endswith("nk"): soften_flag=False if root in self.pronunce_dict: rootpro_lis = self.pronunce_dict[root] else: l = self.lookup_substrings(root) if l != -1: rootpro_lis = l else: rootpro_lis = [word_to_sampa.map_one_to_one(root)] if rootpro_lis ==[[]] : rootpro_lis = [word_to_sampa.map_garbage(root)] return [word_to_sampa.unite_root_and_suffix_seq(word, root, mb, pro, soften_flag) for pro in rootpro_lis]