def test_complex_data(self): """ class TestIPA2IPA.test_complex_data """ with open('tests/ipa2ipa.data.complex.txt', 'r') as sourcefile: for char in sourcefile: src = char.strip() if src != "": phoseg = PhoSegObject( ipa = src ) dest = phoseg.get_ipa_representation(insert_dot = False) print("[TestIPA2IPA.test_complex_data]", phoseg.error_msg, src, return_an_analysis_of_a_string(src), " != ", dest, return_an_analysis_of_a_string(dest)) self.assertEqual( phoseg.initialization_ok, True ) self.assertEqual( src, dest )
def init(self, _sipa_str): """ SIPA.init Initialization from the SIPA string <_sipa_str> ENTRY VALUE : * _sipa_str : (str) SIPA string """ sipa_str = IPATonalCharsToInternalChar(_sipa_str) # we check that every character in <ipa_str> is a known character : for char in sipa_str: if char not in IPA_PREART_KEYS and \ char not in IPA_MAINART_KEYS and \ char not in IPA_POSTART_KEYS and \ char not in SIPA_CHARACTERS: error_msg = "SIPA.init : unknwon character '{0}'({1}) in '{2}'." raise PhoSegError(error_msg.format(char, return_an_analysis_of_a_string(char), sipa_str)) for syllable in re.finditer(NAMED_SIPA_SYLLABLE_PATTERN, sipa_str): onset = syllable.group("onset") if onset is None: onset = "" nucleus = syllable.group("nucleus") if nucleus is None: nucleus = "" coda = syllable.group("coda") if coda is None: coda = "" self.append( { "onset" : IPA(onset), "nucleus" : IPA(nucleus), "coda" : IPA(coda) } ) # We count the number of parenthesis in <sipa_str> and we compare # this number to the number of syllables created. if sipa_str.count('(') != len(self): error_msg = "(SIPA.init) Wrong initialization : " error_msg += "the number of parenthesis differs from the number of syllables;" error_msg += "nbr of parenthesis = "+str(sipa_str.count('('))+"; " error_msg += "sipa_str='{0}'; ".format(sipa_str) error_msg += "len(self)="+str(len(self)) raise PhoSegError(error_msg)
def test(self): """ class TestSIPA2SIPA.test """ with open('tests/sipa2sipa.data.txt', 'r') as sourcefile: for char in sourcefile: src = char.strip() if src != "": phoseg = PhoSegObject( sipa = src ) dest = phoseg.get_sipa_representation() print("[TestSIPA2SIPA.test]", src, return_an_analysis_of_a_string(src), " != ", dest, return_an_analysis_of_a_string(dest)) self.assertEqual( phoseg.initialization_ok, True ) self.assertEqual( src, dest )
def init_the_names(self, _ipa_str): """ IPA.init_the_names Initialization with the keywords found in the IPA string <_ipa_str> ENTRY VALUE : * _ipa_str : (str) IPA string """ #....................................................................... # replacements : 0x0361 (ligature in "t͡s" is replaced by 0x032F applied # on the next character) : t͡s -> ts̯ #....................................................................... ipa_str = [] nextchar_willbe_notindependant = False for char in IPATonalCharsToInternalChar(_ipa_str): if nextchar_willbe_notindependant: ipa_str.append( char ) ipa_str.append( chr(0x032F) ) nextchar_willbe_notindependant = False else: if char == chr(0x0361): nextchar_willbe_notindependant = True else: ipa_str.append( char ) #....................................................................... # (list)ipa_str -> (string)ipa_str #....................................................................... ipa_str = "".join(ipa_str) #....................................................................... # we check if every character in <ipa_str> is a known character : #....................................................................... for char in ipa_str: if char not in IPA_PREART_KEYS and \ char not in IPA_MAINART_KEYS and \ char not in IPA_POSTART_KEYS: error_msg = "IPA.init_the_names : unknown character '{0}' (={1}) in '{2}'." raise PhoSegError(error_msg.format(char, return_an_analysis_of_a_string(char), ipa_str)) #....................................................................... # main loop #....................................................................... for phoneme in re.finditer(NAMED_PHONEME_PATTERN, ipa_str): #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # new_element : a dictionary with three keys : # * predata : [ list of strings ] # * maindata : string # * postdata : [ list of string ] #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . new_element = dict() if phoneme.group("predata") is None: new_element["predata"] = [] else: new_element["predata"] = [IPA_PREART[char] for char in phoneme.group("predata") ] new_element["maindata"] = IPA_MAINART[ phoneme.group("maindata") ] if phoneme.group("postdata") is None: new_element["postdata"] = [] else: new_element["postdata"] = [IPA_POSTART[char] for char in phoneme.group("postdata") ] #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # we add to <self> the dict by converting it to keywords : #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . self.add_ipanames_as_keywords(new_element) #....................................................................... # special case : if we find in self[x] (x>0) a keyword relative # to tones AND the keyword "notindependant" , we move these keywords to # the last index being independant. # # E.g. : "ai̯˧˥" = (a) + ("notindependant" i, ˧˥ ) # becomes (a, ˧˥) + ("notindependant" i) # # self[x] are strings of characters separated by spaces. # #....................................................................... last_independant_index = -1 for i in range(1, len(self)): if "notindependant" not in self[i]: # independant index : last_independant_index = i else: # dependant index, with maybe a tone keyword to be moved backward : for keyword in self[i].split(" "): # if we have digits, we have a tone keyword (like '123') : if keyword.isdigit(): # removing the tone keyword from self[i] : self[i] = self[i].replace(keyword, "") # adding the tone keyword to self[last_independant_index] self[last_independant_index] += " " + keyword