print('Testing whether optimal path is learned ... '), shortest_path = 5 state = s1 terminal = 0 path = np.zeros((5, 5)) path += state[0, 0, :, :] i = 0 while terminal == 0: action = agent.choose_action(state, 0) state_prime, reward, terminal = world.act(state, action) state = state_prime path += state[0, 0, :, :] i += 1 if i == 20 or reward == -1: print('fail :(') if np.sum(path) == shortest_path: print('success!') else: print('fail :(') print('Path: ') print(path) # visualize the weights for each of the action nodes weights = agent.get_weights() plot_weights(weights)
print('Testing whether optimal path is learned ... '), shortest_path = 5 state = s1 terminal = 0 path = np.zeros((5, 5)) path += state[0, 0, :, :] i = 0 while terminal == 0: action = agent.choose_action(state, 0) state_prime, reward, terminal = world.act(state, action) state = state_prime path += state[0, 0, :, :] i += 1 if i == 20 or reward == -1: print('fail :(') if np.sum(path) == shortest_path: print('success!') else: print('fail :(') print('Path: ') print(path) # visualize the weights for each of the action nodes weights = agent.get_weights() plot_weights(weights)