def test_make_move(height): state = State(height, 5, 2, 1) state.deals[0] = (0, 1) state.render() state.play_deal(2, 3) state.render() _, stack = state.encode() print(stack) assert (stack[0][1][2]) assert (stack[1][0][2])
def benchmark(depth, threshold, factor): state = State(16, 8, 5, 3, tsu_rules=False) total_reward = 0 for i in range(1000): if i % 100 == 0: print(i, "/ 1000") actions = tree_search_actions(state, depth, occupation_threshold=threshold, factor=factor) action = random.choice(actions) reward = state.step(*state.actions[action]) total_reward += reward if reward < 0: return total_reward, True return total_reward, False
def test_render_in_place(height): for i in range(20): print(i) util.print_up(height) print("Move it on up!", end="") state = State(height, 7, 6, 4) state.step(2, 3) state.step(4, 2) state.render(in_place=True) print("hey!") state.render() print("wohou!")
def test_write_record(): state = State(13, 6, 4, 3, True) actions = state.actions[:] stream = StringIO() write_record(stream, state, actions) stream.seek(0) stacks = json.load(stream) assert (len(stacks) == len(actions)) assert (stacks[0][0]) assert (any(stacks[-1]))
def tree_search_actions(state: State, depth: int, factor=0.22, occupation_threshold=0.0): colors: List[int] = [] for deal in state.deals[1:]: colors.extend(deal) action_mask: int = 0 for action in state.actions: action_mask |= 1 << state._validation_actions.index(action) search_args = [ state.num_layers, state.has_garbage, action_mask, colors, depth - 1, factor, ] search_fun = core.bottom_tree_search if isinstance(state.field, TallField): search_args.insert(1, state.tsu_rules) search_args.insert(1, state.width) search_fun = core.tall_tree_search base_popcount: int = state.field.popcount prevent_chains: bool = (base_popcount < occupation_threshold * state.width * state.height) best_indices = [] best_score: float = float("-inf") possible_indices = [] possible_score = float("-inf") for index, (child, score) in enumerate(state.get_children(True)): if not child: continue args = [child.field.data] + search_args tree_score: float = search_fun(*args) child_score = score + GAMMA * tree_score if prevent_chains and child.field.popcount < base_popcount: if child_score > possible_score: possible_indices = [index] possible_score = child_score elif child_score == possible_score: possible_indices.append(index) else: if child_score > best_score: best_indices = [index] best_score = child_score elif child_score == best_score: best_indices.append(index) return best_indices or possible_indices or [np.random.randint(0, len(state.actions))]
def test_action_mask_tsu(): state = State(13, 6, 2, 1, tsu_rules=True) stack = [_, _, _, _, _, _, _, _] * state.field.offset stack += [ R, _, R, G, _, G, _, _, ] state.field = TallField.from_list(stack, num_layers=state.num_layers, tsu_rules=state.tsu_rules) state.render() mask = state.get_action_mask() print(mask) assert (len(mask) == 5 + 5 + 6 + 6) for i, (x, orientation) in enumerate(state.actions): if x in (1, 4): assert (mask[i]) elif orientation in (0, 2) and x in (0, 3): assert (mask[i]) else: assert (not mask[i])
def __init__(self, height, width, num_colors, num_deals, tsu_rules=False): self.state: State = State(height, width, num_colors, num_deals, tsu_rules=tsu_rules) self.reward_range: Tuple[int, int] = (-1, self.state.max_score) self.action_space: spaces.Discrete = spaces.Discrete(len(self.state.actions)) self.observation_space = spaces.Tuple(( spaces.Box(0, 1, (self.state.num_colors, self.state.num_deals, 2), dtype=np.int8), spaces.Box(0, 1, (self.state.num_colors, self.state.height, self.state.width), dtype=np.int8), )) self.seed() self.viewer = None self.anim_state = None self.last_action: Optional[int] = None
def test_state_encoding(height): state = State(height, 5, 4, 4) for _ in range(25): state.step(*random.choice(state.actions)) state.num_deals = None state.render() deals = state.deals[:] field = state.field.to_list() encoded = state_encode(state) print(encoded) state.field.reset() state.deals = [] decoded = state_decode(state, encoded) decoded.render() assert (deals == decoded.deals) assert (field == decoded.field.to_list())
def test_gravity(): _ = None stack = [_] * 8 * 6 stack += [ 1, 2, _, 1, _, _, _, _, 3, _, _, _, _, _, _, _, ] state = State(8, 4, 4, deals=[]) state.field = BottomField.from_list(stack) state = AnimationState(state) assert state.step_gravity() stack = [_] * 4 * 6 stack += [ 1, _, _, _, 3, 2, _, 1, ] assert state.to_list() == stack assert not state.step_gravity()
def test_action_mask(): state = State(8, 7, 2, 1) stack = [ R, _, R, _, _, _, G, _, ] state.field = BottomField.from_list(stack, num_layers=state.num_layers) state.render() mask = state.get_action_mask() print(mask) assert (len(mask) == 6 + 6 + 7 + 7) for i, (x, orientation) in enumerate(state.actions): if x in (3, 4): assert (mask[i]) elif orientation in (1, 3) and x in (1, 5): assert (mask[i]) else: assert (not mask[i])
print("Loading deals from {}".format(args.infile)) with open(args.infile) as f: dealss = json.load(f) else: print("Recalculating deals") dealss = unique_deals(args.depth, args.num_colors) print("Processing {} unique sequences".format(len(dealss))) result = [] for deals in dealss: if not deals_have_potential(deals): print("Provably cannot clear", deals) continue base_state = State(8, args.width, args.num_colors, deals=deals) base_state.render() values = {} def collect(state): key = state_key(state) if key in values: return values[key] = None for child, reward in state.get_children(): collect(child) collect(base_state) print("Number of unique positions", len(values))
args = parser.parse_args() deals_depth = args.surface_depth + args.num_deals log("Recalculating deals") dealss = unique_deals(deals_depth, args.num_colors) log("Processing {} unique sequences".format(len(dealss))) if args.infile: log("Loading partially calculated data from {}".format(args.infile)) with open(args.infile) as f: data = json.load(f) values = data["scores"] base_state = State(args.height, args.width, args.num_colors, deals=[], tsu_rules=(args.height == 13)) else: values = {} for deals in dealss: base_state = State(args.height, args.width, args.num_colors, deals=deals, tsu_rules=(args.height == 13)) def collect(state): if len(state.deals) >= args.num_deals: for child, reward in state.get_children(): collect(child)
def test_has_moves_tsu(): state = State(13, 2, 4, 1, tsu_rules=True) stack = [_, _, _, _, _, _, _, _] * state.field.offset stack += [ _, R, _, _, _, _, _, _, B, R, _, _, _, _, _, _, Y, B, _, _, _, _, _, _, G, B, _, _, _, _, _, _, G, R, _, _, _, _, _, _, Y, R, _, _, _, _, _, _, B, G, _, _, _, _, _, _, B, R, _, _, _, _, _, _, B, R, _, _, _, _, _, _, Y, B, _, _, _, _, _, _, G, B, _, _, _, _, _, _, G, R, _, _, _, _, _, _, Y, R, _, _, _, _, _, _, ] state.field = TallField.from_list(stack, num_layers=state.num_layers, tsu_rules=state.tsu_rules) state.render() assert (state.get_children())
class PuyoPuyoEndlessEnv(gym.Env): """ Puyo Puyo environment. Single player endless mode. """ TESTING = False metadata = {"render.modes": ["human", "console", "ansi"]} def __init__(self, height, width, num_colors, num_deals, tsu_rules=False): self.state = State(height, width, num_colors, num_deals, tsu_rules=tsu_rules) self.reward_range = (-1, self.state.max_score) self.action_space = spaces.Discrete(len(self.state.actions)) self.observation_space = spaces.Tuple(( spaces.Box(0, 1, (self.state.num_colors, self.state.num_deals, 2), dtype=np.int8), spaces.Box( 0, 1, (self.state.num_colors, self.state.height, self.state.width), dtype=np.int8), )) self.seed() self.viewer = None self.anim_state = None self.last_action = None def seed(self, seed=None): return [self.state.seed(seed)] def reset(self): self.state.reset() if self.viewer: self.anim_state = None self.last_action = None return self.state.encode() def close(self): if self.viewer: self.viewer.close() def render(self, mode="console"): if self.TESTING and mode == "human": mode = "console" if mode == "human": from time import sleep from gym_puyopuyo.rendering import ImageViewer, AnimationState if self.anim_state: self.anim_state.state.deals[1:] = self.state.deals[:-1] else: self.anim_state = AnimationState(self.state.clone()) if not self.viewer: self.viewer = ImageViewer(width=self.anim_state.width + 4, height=self.anim_state.height) if self.last_action is not None: self.anim_state.state.play_deal( *self.state.actions[self.last_action]) self.anim_state.state.deals.pop() self.anim_state.infer_entities() for frame in self.anim_state.resolve(): self.viewer.render_state(frame) sleep(0.05) return outfile = StringIO() if mode == "ansi" else sys.stdout self.state.render(outfile) if mode == "ansi": return outfile def _step_state(self, state, action, include_observations=True): action = self.state.actions[action] reward = self.state.step(*action) if include_observations: return self.state.encode(), reward return reward def step(self, action): self.last_action = action observation, reward = self._step_state(self.state, action) return observation, reward, (reward < 0), {"state": self.state} def get_action_mask(self): return self.state.get_action_mask() def get_root(self): return self.state.clone() def read_record(self, file, include_last=False): """ Reads a record and yields observations like step does. The actions played are available under the info element. """ initial_state = self.state.clone() initial_state.reset() for state, action, reward in read_record(file, initial_state, include_last=include_last): info = { "state": state, "action": state.actions.index(action) if action else None, } done = True if reward is None else (reward < 0) yield state.encode(), reward, done, info if done: return @classmethod def permute_observation(cls, observation): """ Permute the observation in-place without affecting which action is optimal """ deals, colors = observation deals = np.copy(deals) colors = np.copy(colors) # Flip deals other than the first one as it affects next action for i in range(1, len(deals[0])): if random.random() < 0.5: for color in range(len(deals)): deals[color][i][0], deals[color][i][1] = deals[color][i][ 1], deals[color][i][0] perm = list(range(len(colors))) random.shuffle(perm) permute(deals, perm) permute(colors, perm) return (deals, colors)
def test_field_to_int(height): tsu_rules = (height == 13) state = State(height, 3, 3, 1, tsu_rules=tsu_rules) for _ in range(10): state.step(*random.choice(state.actions)) state.render() stack = state.field.to_list() n = state.field_to_int() print(n) state.reset() state.field_from_int(n) state.render() assert (state.field.to_list() == stack)
def test_mirror(height): state = State(height, 5, 3, 5) twin = state.clone() for i in range(state.num_deals): x = np.random.randint(0, state.width - 1) orientation = np.random.randint(0, 4) state.step(x, orientation) x = state.width - x - 1 if orientation % 2 == 0: x -= 1 orientation = (orientation + 2) % 4 twin.step(x, orientation) state.render() twin.render() state.mirror() state.render() assert (state.field.to_list() == twin.field.to_list())
def test_garbage_tsu(): state = State(13, 6, 5, 1, tsu_rules=True, has_garbage=True) stack = [_, _, _, _, _, _, _, _] * state.field.offset stack += [ _, R, _, _, _, _, _, _, B, R, _, _, _, _, _, _, Y, B, _, _, _, _, _, _, G, B, _, _, _, _, _, _, G, R, _, _, _, _, _, _, Y, R, _, _, _, _, _, _, B, G, _, _, _, _, _, _, B, R, _, _, _, _, _, _, B, R, _, _, _, _, _, _, Y, B, _, _, _, _, _, _, G, B, _, _, _, _, _, _, G, R, _, _, _, _, _, _, Y, R, _, _, _, _, _, _, ] state.field = TallField.from_list(stack, num_layers=state.num_layers, tsu_rules=state.tsu_rules, has_garbage=state.has_garbage) state.render() state.add_garbage(39) state.render() state.field.resolve() assert (state.field.popcount == 51)
def test_garbage(): state = State(8, 5, 3, 1, has_garbage=True) state.step(0, 0) state.add_garbage(9) state.render() O = state.field.num_colors # noqa stack = state.field.to_list() expected = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, O, O, _, _, _, _, _, _, O, O, O, O, _, _, _, _, 0, 0, O, O, O, _, _, _, ] for p1, p2 in zip(stack, expected): if p1 == O: assert (p2 == O) else: assert (p2 != O)
def test_read_record(): record = """[[ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],[ 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0 ],[ 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],[ 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0 ],[ 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],[ 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0 ],[ 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, 0, 0 ],[ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0 ],[ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0 ],[ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2 ],[ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1 ],[ 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]]""" actions = [ (0, 0), (2, 1), (1, 0), (0, 1), (1, 0), (3, 1), (3, 1), (4, 1), (4, 1), (5, 1), (5, 1), (0, 0), ] stream = StringIO() stream.write(record) stream.seek(0) base_state = State(13, 6, 4, 3, True) result = list(read_record(stream, base_state)) total_reward = 0 for (state, action, reward), expected_action in zip(result, actions): state.render() total_reward += reward assert (action[0] == expected_action[0]) assert (action[1] % 2 == expected_action[1]) assert (total_reward == 4840)
def test_no_moves(): state = State(8, 2, 4, 1) stack = [ _, R, _, _, _, _, _, _, B, R, _, _, _, _, _, _, Y, B, _, _, _, _, _, _, G, B, _, _, _, _, _, _, G, R, _, _, _, _, _, _, Y, R, _, _, _, _, _, _, B, G, _, _, _, _, _, _, B, R, _, _, _, _, _, _, ] state.field = BottomField.from_list(stack, num_layers=state.num_layers) state.render() assert (not state.get_children())
def test_resolve(): state = State(8, 7, 2, 1) state.deals[0] = (0, 0) stack = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, G, G, G, _, _, _, _, _, R, R, R, G, G, G, _, ] state.field = BottomField.from_list(stack, num_layers=state.num_layers) state.render() reward = state.step(0, 1) state.render() assert (reward == 4)
def test_resolve_large(): state = State(16, 7, 2, 1) state.deals[0] = (0, 0) stack = [ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, G, G, G, _, _, _, _, _, R, R, R, G, G, G, _, ] state.field = TallField.from_list(stack, num_layers=state.num_layers) state.render() reward = state.step(0, 1) assert (reward == 8500 + 760)