def longest_path_to(self, des): """Find the longest path from the snake's head to the destination. Args: des (snake.base.pos.Pos): The destination position on the map. Returns: A collections.deque of snake.base.direc.Direc indicating the path directions. """ path = self.shortest_path_to(des) if not path: return deque() self._reset_table() cur = head = self.snake.head() # Set all positions on the shortest path to 'visited' self._table[cur.x][cur.y].visit = True for direc in path: cur = cur.adj(direc) self._table[cur.x][cur.y].visit = True # Extend the path between each pair of the positions idx, cur = 0, head while True: cur_direc = path[idx] nxt = cur.adj(cur_direc) if cur_direc == Direc.LEFT or cur_direc == Direc.RIGHT: tests = [Direc.UP, Direc.DOWN] elif cur_direc == Direc.UP or cur_direc == Direc.DOWN: tests = [Direc.LEFT, Direc.RIGHT] extended = False for test_direc in tests: cur_test = cur.adj(test_direc) nxt_test = nxt.adj(test_direc) if self._is_valid(cur_test) and self._is_valid(nxt_test): self._table[cur_test.x][cur_test.y].visit = True self._table[nxt_test.x][nxt_test.y].visit = True path.insert(idx, test_direc) path.insert(idx + 2, Direc.opposite(test_direc)) extended = True break if not extended: cur = nxt idx += 1 if idx >= len(path): break return path
def _choose_action(self, e_greedy=True): action_idx = None if e_greedy and np.random.uniform() < self._epsilon: while True: action_idx = np.random.randint(0, self._NUM_ACTIONS) if Direc.opposite( self.snake.direc) != self._SNAKE_ACTIONS[action_idx]: break else: q_eval_all = self._sess.run( self._q_eval_all, feed_dict={self._state_eval: self._state()[np.newaxis, :]}) q_eval_all = q_eval_all[0] # Find indices of actions with 1st and 2nd largest q value action_indices = np.argpartition(q_eval_all, q_eval_all.size - 2) action_idx = action_indices[-1] # If opposite direction, return direction with 2nd largest q value if Direc.opposite( self.snake.direc) == self._SNAKE_ACTIONS[action_idx]: action_idx = action_indices[-2] return action_idx
def _choose_action(self, e_greedy=True): action_idx = None if e_greedy and np.random.uniform() < self._epsilon: while True: action_idx = np.random.randint(0, self._NUM_ACTIONS) if Direc.opposite(self.snake.direc) != self._SNAKE_ACTIONS[action_idx]: break else: q_eval_all = self._sess.run( self._q_eval_all, feed_dict={ self._state_eval: self._state()[np.newaxis, :] } ) q_eval_all = q_eval_all[0] # Find indices of actions with 1st and 2nd largest q value action_indices = np.argpartition(q_eval_all, q_eval_all.size - 2) action_idx = action_indices[-1] # If opposite direction, return direction with 2nd largest q value if Direc.opposite(self.snake.direc) == self._SNAKE_ACTIONS[action_idx]: action_idx = action_indices[-2] return action_idx
def __update_direc(self, new_direc): if Direc.opposite(new_direc) != self.__snake.direc: self.__snake.direc_next = new_direc if self.__pause: self.__snake.move()
def test_opposite(): assert Direc.opposite(Direc.UP) == Direc.DOWN assert Direc.opposite(Direc.DOWN) == Direc.UP assert Direc.opposite(Direc.LEFT) == Direc.RIGHT assert Direc.opposite(Direc.RIGHT) == Direc.LEFT