Exemplo n.º 1
0
def main():
    sizeS = 9
    sizeA = 4
    tau = getTau()
    rho = getRho()
    gamma = 0.75
    V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma)
    V1, Q1 = valueIteration(sizeS, sizeA, tau, rho, gamma, 1)
    V2, Q2 = valueIteration(sizeS, sizeA, tau, rho, gamma, 2)
    V3, Q3 = valueIteration(sizeS, sizeA, tau, rho, gamma, 3)
    V4, Q4 = valueIteration(sizeS, sizeA, tau, rho, gamma, 4)

    # print(V)
    # print(V.reshape((3,3)))

    plt.figure()
    plt.subplot(3, 2, 6)
    plt.imshow(V.reshape((3, 3)), cmap='hot')

    plt.subplot(3, 2, 1)
    plt.imshow(V1.reshape((3, 3)), cmap='hot')

    plt.subplot(3, 2, 2)
    plt.imshow(V2.reshape((3, 3)), cmap='hot')

    plt.subplot(3, 2, 3)
    plt.imshow(V3.reshape((3, 3)), cmap='hot')

    plt.subplot(3, 2, 4)
    plt.imshow(V4.reshape((3, 3)), cmap='hot')

    plt.show()
Exemplo n.º 2
0
def main():
    print "~~~~~~~~~~ Value iteration rewards mean stop ~~~~~~~~~~\n\n"
    temp = valueIteration.valueIteration()
    valueIteration.STOP = 0
    print "\n\n~~~~~~~~~~ Value iteration rewards mean nothing ~~~~~~~~~~\n\n"
    valueIteration.valueIteration()

    print "\n\n~~~~~~~~~~ Reinforcement Learning ~~~~~~~~~~\n\n"
    reinforcementLearning(temp)
Exemplo n.º 3
0
def main():
    print "~~~~~~~~~~ Value iteration rewards mean stop ~~~~~~~~~~\n\n"
    temp = valueIteration.valueIteration()
    valueIteration.STOP = 0
    print "\n\n~~~~~~~~~~ Value iteration rewards mean nothing ~~~~~~~~~~\n\n"
    valueIteration.valueIteration()

    print "\n\n~~~~~~~~~~ Reinforcement Learning ~~~~~~~~~~\n\n"
    reinforcementLearning(temp)
Exemplo n.º 4
0
def run_val_iter(tau, rho, gamma=GAMMA):
    V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma)
    print('Converged V:')
    print(V.reshape((3, 3)))
    plt.subplots(2, 2)
    plt.suptitle('Value Iteration -- Maze')
    for iters in range(1, 5):
        plt.subplot(2, 2, iters)
        V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma, iters)
        plt.imshow(V.reshape((3, 3)), cmap='hot')
        plt.title(str(iters) + ' iterations')
        plt.axis('off')
    plt.show()
Exemplo n.º 5
0
def plot_graph(tau, rho):
    gammas = np.arange(0.5, 0.99, 0.01)
    V = np.zeros(gammas.shape)
    for i, gamma in enumerate(gammas):
        V[i] = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma)[-1]
    plt.figure()
    plt.plot(gammas, V)
    plt.title(r'$s_0$ for different $\gamma$ values')
Exemplo n.º 6
0
def q3():
    gamma = 0.5
    gamma_values = []
    s_0 = []
    for i in range(50):
        gamma_values.append(gamma)
        s_0.append(valueIteration.valueIteration(5, 2, tau, rho, gamma)[0])
        gamma += 0.01
    plt.plot(gamma_values, s_0, 'r-')
    plt.ylabel('value of s_0')
    plt.xlabel('gamma')
    plt.show()
Exemplo n.º 7
0
def main():
    sizeS = 5
    sizeA = 2
    tau = getTau()
    rho = getRho()
    gamma = 0.75
    V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma)
    V1, Q1 = valueIteration(sizeS, sizeA, tau, rho, 0.5)
    V2, Q2 = valueIteration(sizeS, sizeA, tau, rho, 0.75)
    V3, Q3 = valueIteration(sizeS, sizeA, tau, rho, 0.85)

    all_gamma = [0.5 + (x / 100.0) for x in range(49)]
    v_s0 = []
    v_send = []
    for gamma in all_gamma:
        V, Q = valueIteration(sizeS, sizeA, tau, rho, gamma)
        v_s0.append(V[4])
        v_send.append(V[0])

    # print(v_s0)

    plt.figure()
    plt.plot(all_gamma, v_s0, 'b', all_gamma, v_send, 'r')
    plt.show()
Exemplo n.º 8
0
def run_val_iter(tau, rho, gamma=GAMMA):
    # V = valueIteration(States.COUNT, Action.COUNT,
    #                    tau, rho, gamma)
    # print('Converged V:')
    # print(V)
    plt.subplots(3, 1)
    plt.suptitle('Value Iteration -- Patience, dear')
    for i, gamma in enumerate([0.5, 0.75, 0.85]):
        plt.subplot(3, 1, i + 1)
        V = valueIteration(States.COUNT, Action.COUNT, tau, rho, gamma)
        print('V for gamma = %f:' % gamma)
        print(V)
        plt.imshow(V.reshape((1, -1)), cmap='hot')
        plt.title(r'$\gamma = %f$' % gamma)
        plt.axis('off')
Exemplo n.º 9
0
# print model reward function R(s,a)
print("\nREWARD FUNCTION :")
for s in game.states:
    for a in game.actions:
        print("start state = " + s + ", action = " + a + ", reward = " +
              str(game.rewards(s, a)))

print("\n----------------------------")
print("END MDP model")
print("----------------------------\n")

# Run Value Iteration
print("\n----------------------------")
print("ITERATIONS OF MDP VALUE ITERATION")
print("----------------------------\n")
VI = valueIteration.valueIteration(game.states, game.actions, game.transitions,
                                   game.rewards, epsilon, gamma)

# Run a complete episode from initial state to end state following the optimal policy
print("\n----------------------------")
print("OPTIMAL POLICY À PARTIR DE S0")
print("----------------------------\n")
valueIteration.playEpisode("s0", game.isEnd, VI, game.actions,
                           game.transitions, game.rewards, gamma)
print("\n----------------------------")
print("OPTIMAL POLICY À PARTIR DE S2")
print("----------------------------\n")
valueIteration.playEpisode("s2", game.isEnd, VI, game.actions,
                           game.transitions, game.rewards, gamma)

# Run Q Learning Iteration
print("\n----------------------------")
Exemplo n.º 10
0
def plotHeapMap(iterations):
    for i in range(1, iterations + 1):
        V = valueIteration.valueIteration(9, 4, tau, rho, 0.75, i)
        #print V
        plt.imshow(V.reshape((3, 3)), cmap="hot")
        plt.show()
Exemplo n.º 11
0
def q1():
    V = valueIteration.valueIteration(9, 4, tau, rho, 0.75)
    print V
Exemplo n.º 12
0
					rho[i,j]=-6.0
				#prob/reward of staying in same state
				if i==k:
					#if theres a wall/barrier
					if s.actions[a] == -1:
						tau[i,j,k]=1.0
					else:
						tau[i,j,k]=0.2
				else:
					if s.actions[a]==k:
						tau[i,j,k]=0.8
						rho[i,j]=-2



V = valueIteration.valueIteration(9, 4, tau, rho, 0.75)
plt.imshow(V.reshape((3, 3)), cmap="hot")
plt.show()

def plotHeapMap(iterations):
	for i in range(1,iterations+1):
		V = valueIteration.valueIteration(9, 4, tau, rho, 0.75, i)
		plt.imshow(V.reshape((3, 3)), cmap="hot")
		plt.show()


#plotHeapMap(9)



            action_tm = {
                0: transM.tmJun2Jul,
                1: transM.stmJun2Jul
            }  # transition matrices
        else:
            action_tm = {
                0: transM.tmJul2Aug,
                1: transM.stmJul2Aug
            }  # transition matrices

        policy = []
        if method_name == 'v':
            print("\nValue Iteration")
            value_iteration = valueIteration(states,
                                             rewards,
                                             actions,
                                             action_tm,
                                             DISCOUNT_FACTOR,
                                             display_process=show_round)
            policy = getActionNames(value_iteration.generate_policy(),
                                    actions_names)
        elif method_name == 'p':
            print("\nPolicy Iteration")
            policy_iteration = policyIteration(states,
                                               rewards,
                                               actions,
                                               action_tm,
                                               DISCOUNT_FACTOR,
                                               display_process=show_round)
            policy = getActionNames(policy_iteration.generate_policy(),
                                    actions_names)
        else:
Exemplo n.º 14
0
def q2():
    print(valueIteration.valueIteration(5, 2, tau, rho, 0.5))
    print(valueIteration.valueIteration(5, 2, tau, rho, 0.75))
    print(valueIteration.valueIteration(5, 2, tau, rho, 0.85))
Exemplo n.º 15
0
def q1():
    print(valueIteration.valueIteration(5, 2, tau, rho, 0.75))