def test_render_in_place(height): for i in range(20): print(i) util.print_up(height) print("Move it on up!", end="") state = State(height, 7, 6, 4) state.step(2, 3) state.step(4, 2) state.render(in_place=True) print("hey!") state.render() print("wohou!")
def test_field_to_int(height): tsu_rules = (height == 13) state = State(height, 3, 3, 1, tsu_rules=tsu_rules) for _ in range(10): state.step(*random.choice(state.actions)) state.render() stack = state.field.to_list() n = state.field_to_int() print(n) state.reset() state.field_from_int(n) state.render() assert (state.field.to_list() == stack)
def test_mirror(height): state = State(height, 5, 3, 5) twin = state.clone() for i in range(state.num_deals): x = np.random.randint(0, state.width - 1) orientation = np.random.randint(0, 4) state.step(x, orientation) x = state.width - x - 1 if orientation % 2 == 0: x -= 1 orientation = (orientation + 2) % 4 twin.step(x, orientation) state.render() twin.render() state.mirror() state.render() assert (state.field.to_list() == twin.field.to_list())
def test_state_encoding(height): state = State(height, 5, 4, 4) for _ in range(25): state.step(*random.choice(state.actions)) state.num_deals = None state.render() deals = state.deals[:] field = state.field.to_list() encoded = state_encode(state) print(encoded) state.field.reset() state.deals = [] decoded = state_decode(state, encoded) decoded.render() assert (deals == decoded.deals) assert (field == decoded.field.to_list())
def benchmark(depth, threshold, factor): state = State(16, 8, 5, 3, tsu_rules=False) total_reward = 0 for i in range(1000): if i % 100 == 0: print(i, "/ 1000") actions = tree_search_actions(state, depth, occupation_threshold=threshold, factor=factor) action = random.choice(actions) reward = state.step(*state.actions[action]) total_reward += reward if reward < 0: return total_reward, True return total_reward, False
class PuyoPuyoEndlessEnv(gym.Env): """ Puyo Puyo environment. Single player endless mode. """ TESTING = False metadata = {"render.modes": ["human", "console", "ansi"]} def __init__(self, height, width, num_colors, num_deals, tsu_rules=False): self.state = State(height, width, num_colors, num_deals, tsu_rules=tsu_rules) self.reward_range = (-1, self.state.max_score) self.action_space = spaces.Discrete(len(self.state.actions)) self.observation_space = spaces.Tuple(( spaces.Box(0, 1, (self.state.num_colors, self.state.num_deals, 2), dtype=np.int8), spaces.Box( 0, 1, (self.state.num_colors, self.state.height, self.state.width), dtype=np.int8), )) self.seed() self.viewer = None self.anim_state = None self.last_action = None def seed(self, seed=None): return [self.state.seed(seed)] def reset(self): self.state.reset() if self.viewer: self.anim_state = None self.last_action = None return self.state.encode() def close(self): if self.viewer: self.viewer.close() def render(self, mode="console"): if self.TESTING and mode == "human": mode = "console" if mode == "human": from time import sleep from gym_puyopuyo.rendering import ImageViewer, AnimationState if self.anim_state: self.anim_state.state.deals[1:] = self.state.deals[:-1] else: self.anim_state = AnimationState(self.state.clone()) if not self.viewer: self.viewer = ImageViewer(width=self.anim_state.width + 4, height=self.anim_state.height) if self.last_action is not None: self.anim_state.state.play_deal( *self.state.actions[self.last_action]) self.anim_state.state.deals.pop() self.anim_state.infer_entities() for frame in self.anim_state.resolve(): self.viewer.render_state(frame) sleep(0.05) return outfile = StringIO() if mode == "ansi" else sys.stdout self.state.render(outfile) if mode == "ansi": return outfile def _step_state(self, state, action, include_observations=True): action = self.state.actions[action] reward = self.state.step(*action) if include_observations: return self.state.encode(), reward return reward def step(self, action): self.last_action = action observation, reward = self._step_state(self.state, action) return observation, reward, (reward < 0), {"state": self.state} def get_action_mask(self): return self.state.get_action_mask() def get_root(self): return self.state.clone() def read_record(self, file, include_last=False): """ Reads a record and yields observations like step does. The actions played are available under the info element. """ initial_state = self.state.clone() initial_state.reset() for state, action, reward in read_record(file, initial_state, include_last=include_last): info = { "state": state, "action": state.actions.index(action) if action else None, } done = True if reward is None else (reward < 0) yield state.encode(), reward, done, info if done: return @classmethod def permute_observation(cls, observation): """ Permute the observation in-place without affecting which action is optimal """ deals, colors = observation deals = np.copy(deals) colors = np.copy(colors) # Flip deals other than the first one as it affects next action for i in range(1, len(deals[0])): if random.random() < 0.5: for color in range(len(deals)): deals[color][i][0], deals[color][i][1] = deals[color][i][ 1], deals[color][i][0] perm = list(range(len(colors))) random.shuffle(perm) permute(deals, perm) permute(colors, perm) return (deals, colors)
def test_resolve(): state = State(8, 7, 2, 1) state.deals[0] = (0, 0) stack = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, G, G, G, _, _, _, _, _, R, R, R, G, G, G, _, ] state.field = BottomField.from_list(stack, num_layers=state.num_layers) state.render() reward = state.step(0, 1) state.render() assert (reward == 4)
def test_garbage(): state = State(8, 5, 3, 1, has_garbage=True) state.step(0, 0) state.add_garbage(9) state.render() O = state.field.num_colors # noqa stack = state.field.to_list() expected = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, O, O, _, _, _, _, _, _, O, O, O, O, _, _, _, _, 0, 0, O, O, O, _, _, _, ] for p1, p2 in zip(stack, expected): if p1 == O: assert (p2 == O) else: assert (p2 != O)
def test_resolve_large(): state = State(16, 7, 2, 1) state.deals[0] = (0, 0) stack = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, G, G, G, _, _, _, _, _, R, R, R, G, G, G, _, ] state.field = TallField.from_list(stack, num_layers=state.num_layers) state.render() reward = state.step(0, 1) assert (reward == 8500 + 760)