def iteration_de_la_valeur(g, p, gamma, epsilon): global liste_actions start = tm.time() #2 etats : - pour t-1 # - pour t val_etats = np.zeros((len(g), len(g[0]))) vt = np.zeros((len(g), len(g[0]))) q = np.zeros((len(g), len(g[0]), len(liste_actions))) t = 0 #critere d'arret while np.max(np.abs(vt - val_etats)) >= epsilon or t == 0: t += 1 #etat precedant vt = np.copy(val_etats) for i in range( len(val_etats)): #TODO : plus propre de mettre nbLignes non ? for j in range(len(val_etats[0])): # pareil for action in liste_actions: r = -g[i][j][1] q[i][j][action] = r + gamma * ot.sum_p_v( g, val_etats, i, j, p, action) val_etats[i][j] = max(q[i][j]) #politique optimal d = np.zeros((len(g), len(g[0]))) for i in range(len(val_etats)): for j in range(len(val_etats[0])): d[i][j] = np.argmax(q[i][j]) end = tm.time() time = end - start return d, val_etats, t, time
def iteration_de_la_politique(g, p, gamma, epsilon): global liste_actions #2 etats : - pour t-1 # - pour t start = tm.time() val_etats = np.zeros((len(g), len(g[0]))) d = np.zeros((len(g), len(g[0]))) t = 0 vt = val_etats, val_etats #critere d'arret while np.max(np.abs(vt - val_etats)) >= epsilon or t == 0: # while not(np.array_equal(d_pred,d)) or t == 0: t += 1 #valeur de la politique courant vt = np.copy(val_etats) #evaluation de la politique courante for i in range(len(val_etats)): for j in range(len(val_etats[0])): r = -g[i][j][1] #arrivé au but val_etats[i][j] = r + gamma * ot.sum_p_v( g, val_etats, i, j, p, d[i][j]) #amelioration de la politique for i in range(len(val_etats)): for j in range(len(val_etats[0])): arg = [] for action in liste_actions: #arrivé au but r = -g[i][j][1] arg.append(r + gamma * ot.sum_p_v(g, val_etats, i, j, p, action)) arg = np.array(arg) d[i][j] = np.argmax(arg) # print(g) # print(ot.from_action_to_dir(d,g)) # #print(v) # print(t) end = tm.time() time = end - start return d, val_etats, t, time
def pl(g,p,gamma): global liste_actions start = tm.time() # Create a new model m = grb.Model("test") val_etats = [] # Create variables for i in range(len(g)): for j in range(len(g[0])): n = "v["+str(i)+"]["+str(j)+"]" r = - g[i][j][1] x = m.addVar(vtype=grb.GRB.CONTINUOUS, name=n) val_etats.append(x) val_etats = np.array(val_etats) val_etats = np.reshape(val_etats,(len(g),len(g[0]))) m.setObjective(np.sum(val_etats), grb.GRB.MINIMIZE) for i in range(len(g)): for j in range(len(g[0])): for action in liste_actions: r = -g[i][j][1] m.addConstr(val_etats[i][j] >= r + gamma * ot.sum_p_v(g, val_etats, i, j, p, action)) m.optimize() val_etats = np.reshape(np.array([v.x for v in m.getVars()]),(len(g),len(g[0]))) d = np.zeros((len(g), len(g[0]))) for i in range(len(val_etats)): for j in range(len(val_etats[0])): val_par_action = np.zeros(len(liste_actions)) for action in liste_actions: val_par_action[action] = r + gamma * ot.sum_p_v(g, val_etats, i, j, p, action) d[i][j] = np.argmax(val_par_action) end = tm.time() time = end - start #nombre d'iteration t = m.getAttr('IterCount') return d, val_etats, t, time
def pl(g, nbLignes, nbCol, liste_actions, rewards, p, gamma, epsilon): # Create a new model m = Model("test") val_etats = [] # Create variables for i in range(nbLignes): for j in range(nbCol): n = "v[" + str(i) + "][" + str(j) + "]" r = rewards[g[i][j]] if i == len(g) - 1 and j == len(g[0]) - 1: r = rewards[-1] x = m.addVar(vtype=GRB.CONTINUOUS, name=n) val_etats.append(x) # Set objective # print(val_etats[0]) # print(m.getVars()) # for v in m.getVars(): # print('la') # print('%s %g' % (v.varName, v.x)) # print(y) val_etats = np.array(val_etats) val_etats = np.reshape(val_etats, (nbLignes, nbCol)) m.setObjective(np.sum(val_etats), GRB.MINIMIZE) print((val_etats.shape)) for i in range(nbLignes): for j in range(nbCol): for action in liste_actions: r = outils.reward_action(g, i, j, action, rewards) if r: if i == len(g) - 1 and j == len(g[0]) - 1: r = rewards[-1] m.addConstr( val_etats[i][j] >= r + gamma * outils.sum_p_v(g, val_etats, i, j, p, action)) m.optimize() # for v in m.getVars(): # print('%s %g' % (v.varName, v.x)) val_etats = np.reshape(np.array([v.x for v in m.getVars()]), (nbLignes, nbCol)) # print(val_etats) # print(val_etats.shape) # for v in m.getConstrs(): # print('%s' % (v.getAttr("slack"))) # print('Obj: %g' % m.objVal) # Affiche le pl dans un fichier # m.write('debug.lp') d = np.zeros((nbLignes, nbCol)) for i in range(len(val_etats)): for j in range(len(val_etats[0])): val_par_action = np.zeros(len(liste_actions)) for action in liste_actions: val_par_action[action] = r + gamma * outils.sum_p_v( g, val_etats, i, j, p, action) d[i][j] = np.argmax(val_par_action) return d, val_etats