Пример #1
0
    def process_word(self, word, root, mb, flag):
        num_of_syl = syllabifier.syllabify(root)
        soften_flag = True
        if root in self.out_of_rules:
            soften_flag = False
        elif root.endswith(u"lç") or root.endswith(u"lk") or root.endswith(
                u"lp") or root.endswith(u"lt") or root.endswith(
                    u"nç") or root.endswith(u"nt") or root.endswith(
                        u"rç") or root.endswith(u"rk") or root.endswith(
                            u"rp") or root.endswith(u"rs") or root.endswith(
                                u"st") or root.endswith("rt") or root.endswith(
                                    "rk") or root.endswith("nk"):
            soften_flag = False

        if root in self.pronunce_dict:
            rootpro_lis = self.pronunce_dict[root]
        else:
            l = self.lookup_substrings(root)
            if l != -1:
                rootpro_lis = l
            else:
                rootpro_lis = [word_to_sampa.map_one_to_one(root)]
                if rootpro_lis == [[]]:
                    rootpro_lis = [word_to_sampa.map_garbage(root)]
        return [
            word_to_sampa.unite_root_and_suffix_seq(word, root, mb, pro,
                                                    soften_flag)
            for pro in rootpro_lis
        ]
  def map_one_to_one(word):
    resultlis=[]
    sform = syllabifier.syllabify(word)
    if sform == -1: return []
    else:
      for syl in sform:
        resultstr=""
        syl_vow = filter(lambda x: word_to_sampa.is_a_vowel(x), syl)
        is_front = word_to_sampa.is_a_front_vowel(syl_vow)
        if len(syl)>1 and word_to_sampa.is_a_consonant(syl[0]) and word_to_sampa.is_a_consonant(syl[1]):
           for  i, currchar in enumerate(syl):
             if currchar == u"k":
               if is_front : resultstr += "c "
               else: resultstr += "k "
             elif currchar == u"g":
               if is_front: resultstr+= "gj "
               else: resultstr+= "g "
             elif currchar == u"l":  
               if is_front  : resultstr+= "l "
               else: resultstr+= "5 "  
             else:
               resultstr +=  word_to_sampa.phonemap[currchar]
               resultstr +=" "
             if i==0 : 
               if is_front : resultstr += "i "
               else: resultstr += "1 "
           resultstr = resultstr[:-1]
           resultlis.append(resultstr)
          

        else:
          for  currchar in syl:
            if currchar == u"k":
              if is_front : resultstr += "c "
              else: resultstr += "k "
            elif currchar == u"g":
              if is_front: resultstr+= "gj "
              else: resultstr+= "g " 
            elif currchar == u"l":
              if is_front  : resultstr+= "l "
              else: resultstr+= "5 "  
            else:
              resultstr +=  word_to_sampa.phonemap[currchar] 
              resultstr +=" "
          resultstr = resultstr[:-1]
          resultlis.append(resultstr)
              
    
    return " ".join(resultlis)
Пример #3
0
    def map_one_to_one(word):
        resultlis = []
        sform = syllabifier.syllabify(word)
        if sform == -1: return []
        else:
            for syl in sform:
                resultstr = ""
                syl_vow = filter(lambda x: word_to_sampa.is_a_vowel(x), syl)
                is_front = word_to_sampa.is_a_front_vowel(syl_vow)
                if len(syl) > 1 and word_to_sampa.is_a_consonant(
                        syl[0]) and word_to_sampa.is_a_consonant(syl[1]):
                    for i, currchar in enumerate(syl):
                        if currchar == u"k":
                            if is_front: resultstr += "c "
                            else: resultstr += "k "
                        elif currchar == u"g":
                            if is_front: resultstr += "gj "
                            else: resultstr += "g "
                        elif currchar == u"l":
                            if is_front: resultstr += "l "
                            else: resultstr += "5 "
                        else:
                            resultstr += word_to_sampa.phonemap[currchar]
                            resultstr += " "
                        if i == 0:
                            if is_front: resultstr += "i "
                            else: resultstr += "1 "
                    resultstr = resultstr[:-1]
                    resultlis.append(resultstr)

                else:
                    for currchar in syl:
                        if currchar == u"k":
                            if is_front: resultstr += "c "
                            else: resultstr += "k "
                        elif currchar == u"g":
                            if is_front: resultstr += "gj "
                            else: resultstr += "g "
                        elif currchar == u"l":
                            if is_front: resultstr += "l "
                            else: resultstr += "5 "
                        else:
                            resultstr += word_to_sampa.phonemap[currchar]
                            resultstr += " "
                    resultstr = resultstr[:-1]
                    resultlis.append(resultstr)

        return " ".join(resultlis)
Пример #4
0
  def process_word(self,word,root, mb, flag):
    num_of_syl = syllabifier.syllabify(root)
    soften_flag= True
    if root in self.out_of_rules:
     soften_flag=False
    elif root.endswith(u"lç") or root.endswith(u"lk") or root.endswith(u"lp") or root.endswith(u"lt") or root.endswith(u"nç") or root.endswith(u"nt") or root.endswith(u"rç") or root.endswith(u"rk") or root.endswith(u"rp") or root.endswith(u"rs") or root.endswith(u"st") or root.endswith("rt") or root.endswith("rk") or root.endswith("nk"):
      soften_flag=False

    if root in self.pronunce_dict:
      rootpro_lis = self.pronunce_dict[root]
    else:
      l = self.lookup_substrings(root)
      if l != -1:
        rootpro_lis = l
      else:
        rootpro_lis = [word_to_sampa.map_one_to_one(root)]
        if  rootpro_lis ==[[]] : rootpro_lis = [word_to_sampa.map_garbage(root)]
    return [word_to_sampa.unite_root_and_suffix_seq(word, root, mb, pro, soften_flag) for pro in rootpro_lis]