示例#1
0
def GetTestSents(request):
    nbr =  request.POST['nbrTestSents'].strip()
    if nbr != "":
        try:
            tool = MyToolKit()
            hdb = DBHandler('data/model.db')
            result = {}
            hdb.connect()
            cond = ' '
            for i in range(int(nbr)):
                if i < int(nbr)-1 : cond += "id="+str(random.randint(1,36111))+" or "
                else : cond += "id="+str(random.randint(1,36111))

            DataSents = hdb.getFromTable('sents_test',attribute='sent',condition=cond)

            #pprint(sents)
            sents_diac = ''
            for sent in DataSents:
                sents_diac += ' '.join(tool.words(sent[0])[1:len(tool.words(sent[0]))-1])+'\n'
            sents = tool.DeleteDiacritic(sents_diac)
            result['sents_diac'] =  sents_diac
            result['sents_whitout_diac'] = sents

        except ValueError: result = None
        #r = random.randint(1,100)
    else : result = None
    return HttpResponse(json.dumps(result),content_type="application/json")
示例#2
0
    def LettersVocaliser(self,sents,smooth_const):
        tool = MyToolKit()
        hdb = DBHandler('data/model.db')
        hdb.connect()
        res = hdb.getFromTable('letters_dictionary',attribute='type,vocabularies')
        dict = {}
        for r in res: dict[r[0]] = r[1]
    
        
        not_vocalised = self.getNotVocalised(sents)
        #parcourir les phrases
        for i in range(len(sents)):
            #pprint(not_vocalised[i])
    
            #parcourir les chaines non vocalisé dans une phrase
            for j in range(len(not_vocalised[i])):
                matrice = []
                char = tool.LettersDiac(not_vocalised[i][j])
                list_dict = []
                list_dict.append([-1,tool.HideChar(char[0],expect=['#',' ']),1])
                matrice.append(list_dict)
                
                list_dict = []
                list_dict.append([0,tool.HideChar(char[1],expect=['#',' ']),1])
                matrice.append(list_dict)
                
                list_dict = []
                list_dict.append([0,"#",1])
                matrice.append(list_dict)
                
                string = tool.DeleteDiacritic(not_vocalised[i][j])
                #pprint(string)
                #parcourir les caractéres
         
                #if(string[0] == 'ا' and string[1] == 'ل')
                k = 3
                while k < len(string):
                    list_dict = []
                    if string[k-1] == "#" and string[k] == 'ا' and string[k+1] == 'ل':
                        list_dict = []
                        list_dict.append([0,'_',1])
                        matrice.append(list_dict)
          
                        list_dict = []
                        list_dict.append([0,'_ْ',1])
                        matrice.append(list_dict)
    
                        k += 2
                    else :
                        #parcourir les possibilités
                        for possib in tool.words(dict[string[k]]):
                            if possib == "#":list_dict.append([-1,possib,1])
                            else : list_dict.append([-1,tool.HideChar(possib,expect=['#',' ']),0])
                        matrice.append(list_dict)
                        k += 1
                    

                v = self.ViterbiLetter(matrice,smooth_const)
                string = self.alignLetter(v,not_vocalised[i][j])
                #pprint(string)
                #pprint(not_vocalised[i][j])
                #pprint(sents[i])
                #not_vocalised[i] = not_vocalised[i].replace(not_vocalised[i][j],string)
                sents[i] = sents[i].replace(not_vocalised[i][j].replace('#',' ').strip(),string.replace('#',' ').strip())
                """
                if v[:2] == "##" : n = 0
                else: n = 1
                
                string1 = re.sub('#+',' ',not_vocalised[i][j]).strip().split(' '))
                string2 = re.sub('#+',' ',v).strip().split(' ')
                pprint([0])
                pprint(string)
                pprint('------------')
                """
                #print(not_vocalised[i][j]+' => '+v)
                #exit()
                #pprint('---------------------------------')
                #pprint(matrice)
                """
                for a in matrice:
                    #matrice[i-1][k][1]
                    pprint(a[0][0])
            
                exit()
                """
            
        return sents
示例#3
0
    def LettersVocaliser(self, sents, smooth_const):
        tool = MyToolKit()
        hdb = DBHandler('data/model.db')
        hdb.connect()
        res = hdb.getFromTable('letters_dictionary',
                               attribute='type,vocabularies')
        dict = {}
        for r in res:
            dict[r[0]] = r[1]

        not_vocalised = self.getNotVocalised(sents)
        #parcourir les phrases
        for i in range(len(sents)):
            #pprint(not_vocalised[i])

            #parcourir les chaines non vocalisé dans une phrase
            for j in range(len(not_vocalised[i])):
                matrice = []
                char = tool.LettersDiac(not_vocalised[i][j])
                list_dict = []
                list_dict.append(
                    [-1, tool.HideChar(char[0], expect=['#', ' ']), 1])
                matrice.append(list_dict)

                list_dict = []
                list_dict.append(
                    [0, tool.HideChar(char[1], expect=['#', ' ']), 1])
                matrice.append(list_dict)

                list_dict = []
                list_dict.append([0, "#", 1])
                matrice.append(list_dict)

                string = tool.DeleteDiacritic(not_vocalised[i][j])
                #pprint(string)
                #parcourir les caractéres

                #if(string[0] == 'ا' and string[1] == 'ل')
                k = 3
                while k < len(string):
                    list_dict = []
                    if string[k - 1] == "#" and string[k] == 'ا' and string[
                            k + 1] == 'ل':
                        list_dict = []
                        list_dict.append([0, '_', 1])
                        matrice.append(list_dict)

                        list_dict = []
                        list_dict.append([0, '_ْ', 1])
                        matrice.append(list_dict)

                        k += 2
                    else:
                        #parcourir les possibilités
                        for possib in tool.words(dict[string[k]]):
                            if possib == "#": list_dict.append([-1, possib, 1])
                            else:
                                list_dict.append([
                                    -1,
                                    tool.HideChar(possib, expect=['#', ' ']), 0
                                ])
                        matrice.append(list_dict)
                        k += 1

                v = self.ViterbiLetter(matrice, smooth_const)
                string = self.alignLetter(v, not_vocalised[i][j])
                #pprint(string)
                #pprint(not_vocalised[i][j])
                #pprint(sents[i])
                #not_vocalised[i] = not_vocalised[i].replace(not_vocalised[i][j],string)
                sents[i] = sents[i].replace(
                    not_vocalised[i][j].replace('#', ' ').strip(),
                    string.replace('#', ' ').strip())
                """
                if v[:2] == "##" : n = 0
                else: n = 1
                
                string1 = re.sub('#+',' ',not_vocalised[i][j]).strip().split(' '))
                string2 = re.sub('#+',' ',v).strip().split(' ')
                pprint([0])
                pprint(string)
                pprint('------------')
                """
                #print(not_vocalised[i][j]+' => '+v)
                #exit()
                #pprint('---------------------------------')
                #pprint(matrice)
                """
                for a in matrice:
                    #matrice[i-1][k][1]
                    pprint(a[0][0])
            
                exit()
                """

        return sents