def main(): sizeS = 9 sizeA = 4 tau = getTau() rho = getRho() gamma = 0.75 V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma) V1, Q1 = valueIteration(sizeS, sizeA, tau, rho, gamma, 1) V2, Q2 = valueIteration(sizeS, sizeA, tau, rho, gamma, 2) V3, Q3 = valueIteration(sizeS, sizeA, tau, rho, gamma, 3) V4, Q4 = valueIteration(sizeS, sizeA, tau, rho, gamma, 4) # print(V) # print(V.reshape((3,3))) plt.figure() plt.subplot(3, 2, 6) plt.imshow(V.reshape((3, 3)), cmap='hot') plt.subplot(3, 2, 1) plt.imshow(V1.reshape((3, 3)), cmap='hot') plt.subplot(3, 2, 2) plt.imshow(V2.reshape((3, 3)), cmap='hot') plt.subplot(3, 2, 3) plt.imshow(V3.reshape((3, 3)), cmap='hot') plt.subplot(3, 2, 4) plt.imshow(V4.reshape((3, 3)), cmap='hot') plt.show()
def main(): print "~~~~~~~~~~ Value iteration rewards mean stop ~~~~~~~~~~\n\n" temp = valueIteration.valueIteration() valueIteration.STOP = 0 print "\n\n~~~~~~~~~~ Value iteration rewards mean nothing ~~~~~~~~~~\n\n" valueIteration.valueIteration() print "\n\n~~~~~~~~~~ Reinforcement Learning ~~~~~~~~~~\n\n" reinforcementLearning(temp)
def run_val_iter(tau, rho, gamma=GAMMA): V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma) print('Converged V:') print(V.reshape((3, 3))) plt.subplots(2, 2) plt.suptitle('Value Iteration -- Maze') for iters in range(1, 5): plt.subplot(2, 2, iters) V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma, iters) plt.imshow(V.reshape((3, 3)), cmap='hot') plt.title(str(iters) + ' iterations') plt.axis('off') plt.show()
def plot_graph(tau, rho): gammas = np.arange(0.5, 0.99, 0.01) V = np.zeros(gammas.shape) for i, gamma in enumerate(gammas): V[i] = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma)[-1] plt.figure() plt.plot(gammas, V) plt.title(r'$s_0$ for different $\gamma$ values')
def q3(): gamma = 0.5 gamma_values = [] s_0 = [] for i in range(50): gamma_values.append(gamma) s_0.append(valueIteration.valueIteration(5, 2, tau, rho, gamma)[0]) gamma += 0.01 plt.plot(gamma_values, s_0, 'r-') plt.ylabel('value of s_0') plt.xlabel('gamma') plt.show()
def main(): sizeS = 5 sizeA = 2 tau = getTau() rho = getRho() gamma = 0.75 V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma) V1, Q1 = valueIteration(sizeS, sizeA, tau, rho, 0.5) V2, Q2 = valueIteration(sizeS, sizeA, tau, rho, 0.75) V3, Q3 = valueIteration(sizeS, sizeA, tau, rho, 0.85) all_gamma = [0.5 + (x / 100.0) for x in range(49)] v_s0 = [] v_send = [] for gamma in all_gamma: V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma) v_s0.append(V[4]) v_send.append(V[0]) # print(v_s0) plt.figure() plt.plot(all_gamma, v_s0, 'b', all_gamma, v_send, 'r') plt.show()
def run_val_iter(tau, rho, gamma=GAMMA): # V = valueIteration(States.COUNT, Action.COUNT, # tau, rho, gamma) # print('Converged V:') # print(V) plt.subplots(3, 1) plt.suptitle('Value Iteration -- Patience, dear') for i, gamma in enumerate([0.5, 0.75, 0.85]): plt.subplot(3, 1, i + 1) V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma) print('V for gamma = %f:' % gamma) print(V) plt.imshow(V.reshape((1, -1)), cmap='hot') plt.title(r'$\gamma = %f$' % gamma) plt.axis('off')
# print model reward function R(s,a) print("\nREWARD FUNCTION :") for s in game.states: for a in game.actions: print("start state = " + s + ", action = " + a + ", reward = " + str(game.rewards(s, a))) print("\n----------------------------") print("END MDP model") print("----------------------------\n") # Run Value Iteration print("\n----------------------------") print("ITERATIONS OF MDP VALUE ITERATION") print("----------------------------\n") VI = valueIteration.valueIteration(game.states, game.actions, game.transitions, game.rewards, epsilon, gamma) # Run a complete episode from initial state to end state following the optimal policy print("\n----------------------------") print("OPTIMAL POLICY À PARTIR DE S0") print("----------------------------\n") valueIteration.playEpisode("s0", game.isEnd, VI, game.actions, game.transitions, game.rewards, gamma) print("\n----------------------------") print("OPTIMAL POLICY À PARTIR DE S2") print("----------------------------\n") valueIteration.playEpisode("s2", game.isEnd, VI, game.actions, game.transitions, game.rewards, gamma) # Run Q Learning Iteration print("\n----------------------------")
def plotHeapMap(iterations): for i in range(1, iterations + 1): V = valueIteration.valueIteration(9, 4, tau, rho, 0.75, i) #print V plt.imshow(V.reshape((3, 3)), cmap="hot") plt.show()
def q1(): V = valueIteration.valueIteration(9, 4, tau, rho, 0.75) print V
rho[i,j]=-6.0 #prob/reward of staying in same state if i==k: #if theres a wall/barrier if s.actions[a] == -1: tau[i,j,k]=1.0 else: tau[i,j,k]=0.2 else: if s.actions[a]==k: tau[i,j,k]=0.8 rho[i,j]=-2 V = valueIteration.valueIteration(9, 4, tau, rho, 0.75) plt.imshow(V.reshape((3, 3)), cmap="hot") plt.show() def plotHeapMap(iterations): for i in range(1,iterations+1): V = valueIteration.valueIteration(9, 4, tau, rho, 0.75, i) plt.imshow(V.reshape((3, 3)), cmap="hot") plt.show() #plotHeapMap(9)
action_tm = { 0: transM.tmJun2Jul, 1: transM.stmJun2Jul } # transition matrices else: action_tm = { 0: transM.tmJul2Aug, 1: transM.stmJul2Aug } # transition matrices policy = [] if method_name == 'v': print("\nValue Iteration") value_iteration = valueIteration(states, rewards, actions, action_tm, DISCOUNT_FACTOR, display_process=show_round) policy = getActionNames(value_iteration.generate_policy(), actions_names) elif method_name == 'p': print("\nPolicy Iteration") policy_iteration = policyIteration(states, rewards, actions, action_tm, DISCOUNT_FACTOR, display_process=show_round) policy = getActionNames(policy_iteration.generate_policy(), actions_names) else:
def q2(): print(valueIteration.valueIteration(5, 2, tau, rho, 0.5)) print(valueIteration.valueIteration(5, 2, tau, rho, 0.75)) print(valueIteration.valueIteration(5, 2, tau, rho, 0.85))
def q1(): print(valueIteration.valueIteration(5, 2, tau, rho, 0.75))