def test_winner_correct_when_board_full(): game = Pylos() assert_equal(game.get_winner(), None) fill_layer(game, 0) fill_layer(game, 1) fill_layer(game, 2) fill_layer(game, 3) assert_equal(game.get_winner(), 1)
def test_winner_correct_when_current_player_no_balls(): game = Pylos() assert game.get_winner() is None play_turn(game, None, (0, 0, 0)) play_turn(game, None, (0, 3, 0)) play_turn(game, None, (0, 0, 1)) play_turn(game, None, (0, 3, 1)) play_turn(game, None, (0, 1, 0)) play_turn(game, None, (0, 3, 2)) play_turn(game, None, (0, 1, 1), (0, 1, 0), (0, 1, 1)) play_turn(game, None, (0, 3, 3)) play_turn(game, None, (0, 0, 2)) play_turn(game, None, (0, 0, 3)) assert_equal(game.render(), "0001/..../..../1111#.../.../...#../..#.") # fill rows 2 and 3 for row in range(1, 3): play_turn(game, None, (0, row, 0)) play_turn(game, None, (0, row, 1)) play_turn(game, None, (0, row, 2)) play_turn(game, None, (0, row, 3)) assert_equal(game.reserve, [8, 6]) assert_equal(game.render(), "0001/0101/0101/1111#.../.../...#../..#.") # fill next layer fill_layer(game, 1) fill_layer(game, 2) assert_equal(game.reserve, [1, 0]) assert_equal(game.current_player, 1) assert_equal(game.render(), "0001/0101/0101/1111#010/101/010#10/10#.") assert_equal(game.determine_winner(), 0) assert_equal(game.get_winner(), 0)
class PylosEnv(gym.Env): done: bool metadata = {} def __init__(self): self.pylos = Pylos() self.done = False self.reset() # each move consists of 31 possible values (0-29 meaning one of the board positions, # 30 meaning 'none' or 'reserve' depending on the current phase) # note that for the 'target' phase, 30 is never a valid value self.action_space = spaces.Discrete(31) def state_from_pylos(self): player_onehot = np.array( [1 - self.pylos.current_player, self.pylos.current_player]) # game phase phase_onehot = np.array([ 1 if self.pylos.phase == pylos.PHASE_SOURCE_LOCATION else 0, 1 if self.pylos.phase == pylos.PHASE_TARGET_LOCATION else 0, 1 if self.pylos.phase == pylos.PHASE_RETRACT1 else 0, 1 if self.pylos.phase == pylos.PHASE_RETRACT2 else 0 ]) # board (3 nodes per board position) board_vector = np.array([[ 1 if ball_owner is None else 0 for ball_owner in self.flat_board() ], [1 if ball_owner == 0 else 0 for ball_owner in self.flat_board()], [ 1 if ball_owner == 1 else 0 for ball_owner in self.flat_board() ]]).flatten('F') return np.concatenate([player_onehot, phase_onehot, board_vector]) def flat_board(self): result = [] for l in self.pylos.layers: for r in l: result += r return result def reset(self): self.done = False self.pylos = Pylos() return self.state_from_pylos() def step(self, action): if self.done: print("WARNING: step called after done") return self.state_from_pylos(), 0, self.done, { 'warning_step_after_done': True } location = map_action_to_location(action) if not self.pylos.is_valid_move(location): return self.create_invalid_move_step_response(location) player = self.pylos.current_player reserve_before = self.pylos.reserve[player] self.pylos.move(location) reserve_after = self.pylos.reserve[player] winner = self.pylos.get_winner() reward = 1 reward += reserve_after - reserve_before if winner is not None: reward += 10 self.done = True return self.state_from_pylos(), reward, self.done, { 'phase': self.pylos.phase } def render(self): if self.pylos.winner is not None: print(" *** Winner *** ", self.pylos.winner) print("Player: ", self.pylos.current_player) print("Phase: ", PHASE_NAMES[self.pylos.phase]) print("\n".join([self._render_row(r) for r in range(4)])) def _render_row(self, row): layers = self.pylos.render().split("#") split_layers = [layer.split("/") for layer in layers] return " ".join(([ layer[row] if row < len(layer) else " " * (4 - row) for layer in split_layers ])) def create_invalid_move_step_response(self, location): return self.state_from_pylos(), -10, self.done, {'invalid': location}