Exemplo n.º 1
0
def getCategoryDict():
    #le tableau des catégories
    Cat = encod.get_categories(data+"/voc_etats")
    dico = {}
    for c in Cat:
        dico[c] = 0
    return dico
Exemplo n.º 2
0
def getCategoryDict():
    """Création d'un dictionnaire pour les categories"""
    # le tableau des catégories
    Cat = encod.get_categories(data + "/voc_etats")
    dico = {}
    for c in Cat:
        dico[c] = 0
    return dico
Exemplo n.º 3
0
	def initHmm(self, listObservables):
		self.states = encod.get_categories(data+"/voc_etats")

		self.Pi = {}
		for key in self.states:
			self.Pi[key] = 1.0/len(self.states)

		self.transitions = {}
		for state in self.states:
			self.transitions[state] = {}
			for secondState in self.states:
				self.transitions[state][secondState] = 1.0/len(self.states)

		self.emissions = {}
		for observable in listObservables:
			self.emissions[observable] = {}
			for state in self.states:
				self.emissions[observable][state] = 1.0/len(self.states)

		return [self.states, self.transitions, self.emissions]
Exemplo n.º 4
0
Arquivo: BW.py Projeto: tomtom2/HMM
	def initHmm(self, listObservables):
		'''Initialisation du hmm lambda_0 et premier calcul pour les probabilites de transitions, emissions et initiales (equiprobabilité partout)'''
		self.states = encod.get_categories(data+"/voc_etats")

		self.Pi = {}
		for key in self.states:
			self.Pi[key] = 1.0/len(self.states)

		self.transitions = {}
		for state in self.states:
			self.transitions[state] = {}
			for secondState in self.states:
				self.transitions[state][secondState] = 1.0/len(self.states)

		self.emissions = {}
		for observable in listObservables:
			self.emissions[observable] = {}
			for state in self.states:
				self.emissions[observable][state] = 1.0/len(self.states)

		return [self.states, self.transitions, self.emissions]
Exemplo n.º 5
0
def get_Pi_T_E():

    #le tableau des observables
    Tab = encod.encode(data+"/voc_observables")

    #le tableau des catégories
    Cat = encod.get_categories(data+"/voc_etats")



    #initialisation du nombre d'occurences des différentes catégories en début de phrase
    I = {}
    for c in Cat:
        I[c] = 0

    occur_cat = {}
    for c in Cat:
        occur_cat[c] = 0


    #initialisation de la matrice des occurences des bigrames: T[i, j] = nombre de bigrammes (cj, ci)
    T = {}
    for i in Cat:
        T[i] = getCategoryDict()

    #initialisation de la matrice d'emissions des observables: T[i, j] = nombre d'observable o[j] de categorie c[i]
    E = {}
    for i in Tab:
        E[i] = getCategoryDict()



    base_app = []
    file_app = open(data+"/train")
    start_sentence = True

    categorie_before = ""
    for line in file_app:
        
        if line != "\n":
            
            observable = line.split("\t")[0]
            categorie = line.split("\t")[1].replace("\n", "")

            #remplissage de E (les occurence d'emission et des categories)
            E[observable][categorie] = E[observable][categorie] + 1
            occur_cat[categorie] = occur_cat[categorie] + 1

            #remplissage de I (les début de phrases)
            if start_sentence:
                 I[categorie] = I[categorie] + 1
                 start_sentence = False
                 categorie_before = categorie

            elif line != "\n":
                #remplissage de T (les occurence des binomes)
                T[categorie_before][categorie] = T[categorie_before][categorie] + 1
                categorie_before = categorie

        else:
            start_sentence = True

    #############################
    # normalisation et écriture #
    #############################
    
    # Pi(c)
    N = 0
    for c in I:
        N = N + I[c]
    for c in I:
        I[c] = float(I[c])/N
        
    
    # T(c1, c2)
    for c1 in T:
        for c2 in T[c1]:
            T[c1][c2] = float(T[c1][c2])/occur_cat[c1]
            
    
    
    # E(c, m)
    for m in E:
        for c in E[m]:
            E[m][c] = float(E[m][c])/occur_cat[c]
    #print E
    return [I, T, E]
Exemplo n.º 6
0
    
    f = open(data+"/BWtest")
    for line in f:
        l = []
        if line != "\n":
            l = line.replace("\n", "").split("\t")
        else:
            l = ["", ""]
        t = [l[0], l[1], ""]
        T.append(t)

    return T



listState = encod.get_categories(data+"/voc_etats")
listState.append("")

test_table = encodeTestAsMatrix3n()

listObservables = range(len(test_table))
for k in range(len(test_table)):
    listObservables[k] = test_table[k][0]

### On construit un hmm avec le fichier d'apprentissage ###
S = app.get_Pi_T_E()
I = S[0]
T = S[1]
E = S[2]
if perturbation:
    for obs in listObservables: