def test_agent(self, agent, input_height, input_width): max_test_iter = 12 shortest_path = 4 j = 0 mp_lock.acquire() state = self.last_state.get_last_state() mp_lock.release() paths = np.zeros((max_test_iter + 1, 1, 1, input_height, input_width), dtype='int32') paths[j] = state rewards = [] # Begin test phase while True: action = agent.choose_action(state, 0) self.world.act(action) sleep(0.2) mp_lock.acquire() state_prime = self.last_state.get_last_state() mp_lock.release() reward, terminal = self.calculate_reward_and_terminal(state_prime) state = state_prime j += 1 paths[j] = state rewards.append(reward) if j == max_test_iter and reward < 10: print('not successful, no reward found after {} moves').format(max_test_iter) break elif terminal == 1: print('path found.') break reward_total = 0 for i in range(j + 1): print paths[i] for num in rewards: reward_total += num print "Total Reward: {}".format(reward_total) if j <= shortest_path + 1 and reward_total >= 10: print('success!') else: print('fail :(') # visualize the weights for each of the action nodes weights = agent.get_weights() plot_weights(weights)
print('Testing whether optimal path is learned ... '), shortest_path = 5 state = s1 terminal = 0 path = np.zeros((5, 5)) path += state[0, 0, :, :] i = 0 while terminal == 0: action = agent.choose_action(state, 0) state_prime, reward, terminal = world.act(state, action) state = state_prime path += state[0, 0, :, :] i += 1 if i == 20 or reward == -1: print('fail :(') if np.sum(path) == shortest_path: print('success!') else: print('fail :(') print('Path: ') print(path) # visualize the weights for each of the action nodes weights = agent.get_weights() plot_weights(weights)