def get_actions(self, s): x, y = self._find_space(s) w = len(s[0]) h = len(s) actions = [] if x > 0: actions.append(Action((-1, 0))) if x < w - 1: actions.append(Action((1, 0))) if y > 0: actions.append(Action((0, -1))) if y < h - 1: actions.append(Action((0, 1))) return actions
def test_predict(self): new_s = self.env.predict(self.s, Action((-1,0))) self.assertEquals(( tuple("1234"), tuple("5678"), tuple("9a~b"), ), new_s) new_s = self.env.predict(self.s, Action((0,-1))) self.assertEquals(( tuple("1234"), tuple("567~"), tuple("9ab8"), ), new_s)
def _action_push(maze, pos, new_pos, behind_new_pos): cmd = ( _effect(maze, pos, TARGETED_EMPTY_MARKS), _effect(maze, new_pos, TARGETED_PLAYER_MARKS), _effect(maze, behind_new_pos, TARGETED_BOX_MARKS), ) cost = MOVE_COST + PUSH_COST return Action(cmd, cost)
def get_actions(self, hyper_s): """Returns a list of possible actions from the given state. """ hyper_actions = [] for next_s in self._get_periphery(hyper_s): if not self._is_covered(hyper_actions, next_s): hyper_actions.append(Action(self._get_hyper_s(next_s))) return hyper_actions
def get_actions(self, s): """Returns a list of macro actions from given state. """ # The macro_action.cmd stores the end state of the macro. macros = [] for cost, end_s in self.partition.get_macro_edges(s): macros.append(Action(end_s, cost)) return macros
def _prepare_action(s, next_s, cost=1): """Returns an action that would do the transition. """ cmd = [] for y, (row, next_row) in enumerate(zip(s, next_s)): for x, (cell, next_cell) in enumerate(zip(row, next_row)): if cell != next_cell: change_pos = (x, y) cmd.append((change_pos, next_cell)) return Action(cmd, cost)
def get_actions(self, s): self._use_state(s) acts = [] self.rules.set_models(self.models) for unit in self.units: if unit.is_out(): continue for shift in ruling.ACTION_SHIFTS: positions, cost = self.rules.calc_move(unit, shift) if positions is not None: acts.append( Action(_create_state(self.models, positions), cost)) return acts
def _action_jump(pos, new_pos, frog_mark): cmd = ( (pos, frog_mark), (new_pos, SPACE_MARK), ) return Action(cmd)
def get_near_transitions(self, pos): """Returns a list of near (next_s, a) pairs. """ x, y = pos neighbors = [(x + sx, y + sy) for sx, sy in self.action_shifts] return zip(neighbors, [Action(cmd) for cmd in COMMANDS])
def test_estim_cost(self): self.assertEquals(0, self.env.estim_cost(self.s)) new_s = self.env.predict(self.s, Action((-1,0))) self.assertTrue(new_s != self.s) self.assertEquals(1, self.env.estim_cost(new_s))
def _action_move(maze, pos, new_pos): cmd = ( _effect(maze, pos, TARGETED_EMPTY_MARKS), _effect(maze, new_pos, TARGETED_PLAYER_MARKS), ) return Action(cmd, cost=MOVE_COST)
def test_predit(self): next_s = self.env.predict(self.initial_state, Action(RIGHT)) next_pos, next_boxes = next_s self.assertEqual((2, 1), next_pos) self.assertEqual(sorted(((3, 1), (2, 2), (3, 2))), sorted(next_boxes))
def decode_action(self, bits): cmd = self.ACTION_BITS_TO_CMD.get(tuple(bits)) if cmd is None: raise ValueError("Invalid action bits: %s" % bits) return Action(cmd)