Exemplo n.º 1
0
print('Testing whether optimal path is learned ... '),
shortest_path = 5
state = s1
terminal = 0
path = np.zeros((5, 5))
path += state[0, 0, :, :]
i = 0
while terminal == 0:

    action = agent.choose_action(state, 0)
    state_prime, reward, terminal = world.act(state, action)
    state = state_prime

    path += state[0, 0, :, :]

    i += 1
    if i == 20 or reward == -1:
        print('fail :(')

if np.sum(path) == shortest_path:
    print('success!')
else:
    print('fail :(')

print('Path: ')
print(path)

# visualize the weights for each of the action nodes
weights = agent.get_weights()
plot_weights(weights)
Exemplo n.º 2
0
print('Testing whether optimal path is learned ... '),
shortest_path = 5
state = s1
terminal = 0
path = np.zeros((5, 5))
path += state[0, 0, :, :]
i = 0
while terminal == 0:

    action = agent.choose_action(state, 0)
    state_prime, reward, terminal = world.act(state, action)
    state = state_prime

    path += state[0, 0, :, :]

    i += 1
    if i == 20 or reward == -1:
        print('fail :(')

if np.sum(path) == shortest_path:
    print('success!')
else:
    print('fail :(')

print('Path: ')
print(path)

# visualize the weights for each of the action nodes
weights = agent.get_weights()
plot_weights(weights)