示例#1
0
def test_make_move(height):
    state = State(height, 5, 2, 1)
    state.deals[0] = (0, 1)
    state.render()
    state.play_deal(2, 3)
    state.render()
    _, stack = state.encode()
    print(stack)
    assert (stack[0][1][2])
    assert (stack[1][0][2])
示例#2
0
def benchmark(depth, threshold, factor):
    state = State(16, 8, 5, 3, tsu_rules=False)
    total_reward = 0
    for i in range(1000):
        if i % 100 == 0:
            print(i, "/ 1000")
        actions = tree_search_actions(state, depth, occupation_threshold=threshold, factor=factor)
        action = random.choice(actions)
        reward = state.step(*state.actions[action])
        total_reward += reward
        if reward < 0:
            return total_reward, True
    return total_reward, False
示例#3
0
def test_render_in_place(height):
    for i in range(20):
        print(i)
    util.print_up(height)
    print("Move it on up!", end="")
    state = State(height, 7, 6, 4)
    state.step(2, 3)
    state.step(4, 2)
    state.render(in_place=True)
    print("hey!")
    state.render()
    print("wohou!")
示例#4
0
def test_write_record():
    state = State(13, 6, 4, 3, True)
    actions = state.actions[:]
    stream = StringIO()
    write_record(stream, state, actions)
    stream.seek(0)
    stacks = json.load(stream)
    assert (len(stacks) == len(actions))
    assert (stacks[0][0])
    assert (any(stacks[-1]))
示例#5
0
def tree_search_actions(state: State, depth: int, factor=0.22, occupation_threshold=0.0):
    colors: List[int] = []
    for deal in state.deals[1:]:
        colors.extend(deal)

    action_mask: int = 0
    for action in state.actions:
        action_mask |= 1 << state._validation_actions.index(action)

    search_args = [
        state.num_layers,
        state.has_garbage,
        action_mask,
        colors,
        depth - 1,
        factor,
    ]
    search_fun = core.bottom_tree_search
    if isinstance(state.field, TallField):
        search_args.insert(1, state.tsu_rules)
        search_args.insert(1, state.width)
        search_fun = core.tall_tree_search

    base_popcount: int = state.field.popcount
    prevent_chains: bool = (base_popcount < occupation_threshold * state.width * state.height)

    best_indices = []
    best_score: float = float("-inf")

    possible_indices = []
    possible_score = float("-inf")
    for index, (child, score) in enumerate(state.get_children(True)):
        if not child:
            continue

        args = [child.field.data] + search_args
        tree_score: float = search_fun(*args)

        child_score = score + GAMMA * tree_score

        if prevent_chains and child.field.popcount < base_popcount:
            if child_score > possible_score:
                possible_indices = [index]
                possible_score = child_score
            elif child_score == possible_score:
                possible_indices.append(index)
        else:
            if child_score > best_score:
                best_indices = [index]
                best_score = child_score
            elif child_score == best_score:
                best_indices.append(index)
    return best_indices or possible_indices or [np.random.randint(0, len(state.actions))]
示例#6
0
def test_action_mask_tsu():
    state = State(13, 6, 2, 1, tsu_rules=True)
    stack = [_, _, _, _, _, _, _, _] * state.field.offset
    stack += [
        R,
        _,
        R,
        G,
        _,
        G,
        _,
        _,
    ]
    state.field = TallField.from_list(stack,
                                      num_layers=state.num_layers,
                                      tsu_rules=state.tsu_rules)
    state.render()
    mask = state.get_action_mask()
    print(mask)
    assert (len(mask) == 5 + 5 + 6 + 6)
    for i, (x, orientation) in enumerate(state.actions):
        if x in (1, 4):
            assert (mask[i])
        elif orientation in (0, 2) and x in (0, 3):
            assert (mask[i])
        else:
            assert (not mask[i])
示例#7
0
    def __init__(self, height, width, num_colors, num_deals, tsu_rules=False):
        self.state: State = State(height, width, num_colors, num_deals, tsu_rules=tsu_rules)
        self.reward_range: Tuple[int, int] = (-1, self.state.max_score)

        self.action_space: spaces.Discrete = spaces.Discrete(len(self.state.actions))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, 1, (self.state.num_colors, self.state.num_deals, 2), dtype=np.int8),
            spaces.Box(0, 1, (self.state.num_colors, self.state.height, self.state.width), dtype=np.int8),
        ))
        self.seed()

        self.viewer = None
        self.anim_state = None
        self.last_action: Optional[int] = None
示例#8
0
def test_state_encoding(height):
    state = State(height, 5, 4, 4)

    for _ in range(25):
        state.step(*random.choice(state.actions))

    state.num_deals = None
    state.render()

    deals = state.deals[:]
    field = state.field.to_list()
    encoded = state_encode(state)
    print(encoded)

    state.field.reset()
    state.deals = []
    decoded = state_decode(state, encoded)
    decoded.render()

    assert (deals == decoded.deals)
    assert (field == decoded.field.to_list())
示例#9
0
def test_gravity():
    _ = None
    stack = [_] * 8 * 6
    stack += [
        1, 2, _, 1, _, _, _, _,
        3, _, _, _, _, _, _, _,
    ]

    state = State(8, 4, 4, deals=[])
    state.field = BottomField.from_list(stack)

    state = AnimationState(state)

    assert state.step_gravity()

    stack = [_] * 4 * 6
    stack += [
        1, _, _, _,
        3, 2, _, 1,
    ]

    assert state.to_list() == stack

    assert not state.step_gravity()
示例#10
0
def test_action_mask():
    state = State(8, 7, 2, 1)
    stack = [
        R,
        _,
        R,
        _,
        _,
        _,
        G,
        _,
    ]
    state.field = BottomField.from_list(stack, num_layers=state.num_layers)
    state.render()
    mask = state.get_action_mask()
    print(mask)
    assert (len(mask) == 6 + 6 + 7 + 7)
    for i, (x, orientation) in enumerate(state.actions):
        if x in (3, 4):
            assert (mask[i])
        elif orientation in (1, 3) and x in (1, 5):
            assert (mask[i])
        else:
            assert (not mask[i])
示例#11
0
        print("Loading deals from {}".format(args.infile))
        with open(args.infile) as f:
            dealss = json.load(f)
    else:
        print("Recalculating deals")
        dealss = unique_deals(args.depth, args.num_colors)
    print("Processing {} unique sequences".format(len(dealss)))

    result = []

    for deals in dealss:
        if not deals_have_potential(deals):
            print("Provably cannot clear", deals)
            continue

        base_state = State(8, args.width, args.num_colors, deals=deals)
        base_state.render()

        values = {}

        def collect(state):
            key = state_key(state)
            if key in values:
                return
            values[key] = None
            for child, reward in state.get_children():
                collect(child)

        collect(base_state)
        print("Number of unique positions", len(values))
示例#12
0
    args = parser.parse_args()

    deals_depth = args.surface_depth + args.num_deals

    log("Recalculating deals")
    dealss = unique_deals(deals_depth, args.num_colors)
    log("Processing {} unique sequences".format(len(dealss)))

    if args.infile:
        log("Loading partially calculated data from {}".format(args.infile))
        with open(args.infile) as f:
            data = json.load(f)
            values = data["scores"]
        base_state = State(args.height,
                           args.width,
                           args.num_colors,
                           deals=[],
                           tsu_rules=(args.height == 13))
    else:
        values = {}
        for deals in dealss:
            base_state = State(args.height,
                               args.width,
                               args.num_colors,
                               deals=deals,
                               tsu_rules=(args.height == 13))

            def collect(state):
                if len(state.deals) >= args.num_deals:
                    for child, reward in state.get_children():
                        collect(child)
示例#13
0
def test_has_moves_tsu():
    state = State(13, 2, 4, 1, tsu_rules=True)
    stack = [_, _, _, _, _, _, _, _] * state.field.offset
    stack += [
        _,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        G,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
    ]
    state.field = TallField.from_list(stack,
                                      num_layers=state.num_layers,
                                      tsu_rules=state.tsu_rules)
    state.render()
    assert (state.get_children())
示例#14
0
class PuyoPuyoEndlessEnv(gym.Env):
    """
    Puyo Puyo environment. Single player endless mode.
    """

    TESTING = False

    metadata = {"render.modes": ["human", "console", "ansi"]}

    def __init__(self, height, width, num_colors, num_deals, tsu_rules=False):
        self.state = State(height,
                           width,
                           num_colors,
                           num_deals,
                           tsu_rules=tsu_rules)
        self.reward_range = (-1, self.state.max_score)

        self.action_space = spaces.Discrete(len(self.state.actions))
        self.observation_space = spaces.Tuple((
            spaces.Box(0,
                       1, (self.state.num_colors, self.state.num_deals, 2),
                       dtype=np.int8),
            spaces.Box(
                0,
                1,
                (self.state.num_colors, self.state.height, self.state.width),
                dtype=np.int8),
        ))
        self.seed()

        self.viewer = None
        self.anim_state = None
        self.last_action = None

    def seed(self, seed=None):
        return [self.state.seed(seed)]

    def reset(self):
        self.state.reset()
        if self.viewer:
            self.anim_state = None
            self.last_action = None
        return self.state.encode()

    def close(self):
        if self.viewer:
            self.viewer.close()

    def render(self, mode="console"):
        if self.TESTING and mode == "human":
            mode = "console"

        if mode == "human":
            from time import sleep
            from gym_puyopuyo.rendering import ImageViewer, AnimationState

            if self.anim_state:
                self.anim_state.state.deals[1:] = self.state.deals[:-1]
            else:
                self.anim_state = AnimationState(self.state.clone())

            if not self.viewer:
                self.viewer = ImageViewer(width=self.anim_state.width + 4,
                                          height=self.anim_state.height)

            if self.last_action is not None:
                self.anim_state.state.play_deal(
                    *self.state.actions[self.last_action])
                self.anim_state.state.deals.pop()
                self.anim_state.infer_entities()

            for frame in self.anim_state.resolve():
                self.viewer.render_state(frame)
                sleep(0.05)
            return

        outfile = StringIO() if mode == "ansi" else sys.stdout
        self.state.render(outfile)
        if mode == "ansi":
            return outfile

    def _step_state(self, state, action, include_observations=True):
        action = self.state.actions[action]
        reward = self.state.step(*action)
        if include_observations:
            return self.state.encode(), reward
        return reward

    def step(self, action):
        self.last_action = action
        observation, reward = self._step_state(self.state, action)
        return observation, reward, (reward < 0), {"state": self.state}

    def get_action_mask(self):
        return self.state.get_action_mask()

    def get_root(self):
        return self.state.clone()

    def read_record(self, file, include_last=False):
        """
        Reads a record and yields observations like step does.

        The actions played are available under the info element.
        """
        initial_state = self.state.clone()
        initial_state.reset()
        for state, action, reward in read_record(file,
                                                 initial_state,
                                                 include_last=include_last):
            info = {
                "state": state,
                "action": state.actions.index(action) if action else None,
            }
            done = True if reward is None else (reward < 0)
            yield state.encode(), reward, done, info
            if done:
                return

    @classmethod
    def permute_observation(cls, observation):
        """
        Permute the observation in-place without affecting which action is optimal
        """
        deals, colors = observation
        deals = np.copy(deals)
        colors = np.copy(colors)

        # Flip deals other than the first one as it affects next action
        for i in range(1, len(deals[0])):
            if random.random() < 0.5:
                for color in range(len(deals)):
                    deals[color][i][0], deals[color][i][1] = deals[color][i][
                        1], deals[color][i][0]

        perm = list(range(len(colors)))
        random.shuffle(perm)
        permute(deals, perm)
        permute(colors, perm)
        return (deals, colors)
示例#15
0
def test_field_to_int(height):
    tsu_rules = (height == 13)
    state = State(height, 3, 3, 1, tsu_rules=tsu_rules)
    for _ in range(10):
        state.step(*random.choice(state.actions))
    state.render()
    stack = state.field.to_list()
    n = state.field_to_int()
    print(n)
    state.reset()
    state.field_from_int(n)
    state.render()
    assert (state.field.to_list() == stack)
示例#16
0
def test_mirror(height):
    state = State(height, 5, 3, 5)
    twin = state.clone()
    for i in range(state.num_deals):
        x = np.random.randint(0, state.width - 1)
        orientation = np.random.randint(0, 4)
        state.step(x, orientation)
        x = state.width - x - 1
        if orientation % 2 == 0:
            x -= 1
            orientation = (orientation + 2) % 4
        twin.step(x, orientation)
    state.render()
    twin.render()
    state.mirror()
    state.render()
    assert (state.field.to_list() == twin.field.to_list())
示例#17
0
def test_garbage_tsu():
    state = State(13, 6, 5, 1, tsu_rules=True, has_garbage=True)
    stack = [_, _, _, _, _, _, _, _] * state.field.offset
    stack += [
        _,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        G,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
    ]
    state.field = TallField.from_list(stack,
                                      num_layers=state.num_layers,
                                      tsu_rules=state.tsu_rules,
                                      has_garbage=state.has_garbage)
    state.render()
    state.add_garbage(39)
    state.render()
    state.field.resolve()
    assert (state.field.popcount == 51)
示例#18
0
def test_garbage():
    state = State(8, 5, 3, 1, has_garbage=True)
    state.step(0, 0)
    state.add_garbage(9)
    state.render()
    O = state.field.num_colors  # noqa
    stack = state.field.to_list()
    expected = [
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        O,
        O,
        _,
        _,
        _,
        _,
        _,
        _,
        O,
        O,
        O,
        O,
        _,
        _,
        _,
        _,
        0,
        0,
        O,
        O,
        O,
        _,
        _,
        _,
    ]
    for p1, p2 in zip(stack, expected):
        if p1 == O:
            assert (p2 == O)
        else:
            assert (p2 != O)
示例#19
0
def test_read_record():
    record = """[[
    1, 1, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    0, 0, 1, 0, 0, 0,
    0, 0, 2, 0, 0, 0
    ],[
    0, 3, 2, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    3, 0, 0, 0, 0, 0,
    3, 0, 0, 0, 0, 0
    ],[
    0, 1, 2, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    0, 0, 0, 2, 0, 0,
    0, 0, 0, 4, 0, 0
    ],[
    0, 0, 0, 4, 0, 0,
    0, 0, 0, 4, 0, 0
    ],[
    0, 0, 0, 0, 1, 0,
    0, 0, 0, 0, 1, 0
    ],[
    0, 0, 0, 0, 1, 0,
    0, 0, 0, 0, 4, 0
    ],[
    0, 0, 0, 0, 0, 2,
    0, 0, 0, 0, 0, 2
    ],[
    0, 0, 0, 0, 0, 2,
    0, 0, 0, 0, 0, 1
    ],[
    3, 4, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ]]"""
    actions = [
        (0, 0),
        (2, 1),
        (1, 0),
        (0, 1),
        (1, 0),
        (3, 1),
        (3, 1),
        (4, 1),
        (4, 1),
        (5, 1),
        (5, 1),
        (0, 0),
    ]
    stream = StringIO()
    stream.write(record)
    stream.seek(0)
    base_state = State(13, 6, 4, 3, True)
    result = list(read_record(stream, base_state))
    total_reward = 0
    for (state, action, reward), expected_action in zip(result, actions):
        state.render()
        total_reward += reward
        assert (action[0] == expected_action[0])
        assert (action[1] % 2 == expected_action[1])
    assert (total_reward == 4840)
示例#20
0
def test_no_moves():
    state = State(8, 2, 4, 1)
    stack = [
        _,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        B,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        Y,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        G,
        _,
        _,
        _,
        _,
        _,
        _,
        B,
        R,
        _,
        _,
        _,
        _,
        _,
        _,
    ]
    state.field = BottomField.from_list(stack, num_layers=state.num_layers)
    state.render()
    assert (not state.get_children())
示例#21
0
def test_resolve():
    state = State(8, 7, 2, 1)
    state.deals[0] = (0, 0)
    stack = [
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        G,
        G,
        _,
        _,
        _,
        _,
        _,
        R,
        R,
        R,
        G,
        G,
        G,
        _,
    ]
    state.field = BottomField.from_list(stack, num_layers=state.num_layers)
    state.render()
    reward = state.step(0, 1)
    state.render()
    assert (reward == 4)
示例#22
0
def test_resolve_large():
    state = State(16, 7, 2, 1)
    state.deals[0] = (0, 0)
    stack = [
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
        G,
        G,
        G,
        _,
        _,
        _,
        _,
        _,
        R,
        R,
        R,
        G,
        G,
        G,
        _,
    ]
    state.field = TallField.from_list(stack, num_layers=state.num_layers)
    state.render()
    reward = state.step(0, 1)
    assert (reward == 8500 + 760)