def read_data(data_path, max_games=None):
    game_files = list(sorted(list_files(data_path, '.json')))
    augmentations = list(Augmentation.iter_augmentations())

    if max_games is not None:
        game_files = list(game_files)[-max_games:]
        print('Using game files from %s to %s' % (game_files[0], game_files[-1]))

    x = []
    y_policy = []
    y_reward = []

    for game_path in tqdm(game_files):
        with open(game_path, 'r') as fin:
            game_data = json.load(fin)

        winner, starter, actions, policies = AlphaConnectSerializer.deserialize(game_data)

        state = State.empty()
        states = [state]
        for action in actions:
            state = state.take_action(action)
            states.append(state)
        states, final_state = states[:-1], states[-1]

        n_samples = min(len(states), len(augmentations))
        game_samples = sample(list(range(len(states))), n_samples)
        for augmentation, i in zip(augmentations, game_samples):
            augmentend_action_order = sorted(Action.iter_actions(), key=lambda a: a.augment(augmentation).to_int())

            x.append(states[i].to_numpy(augmentation))
            y_policy.append([policies[i].get(action, 0.0) for action in augmentend_action_order])
            y_reward.append(winner_value(final_state.winner, states[i]))

    return np.array(x), np.array(y_policy), np.array(y_reward)
Пример #2
0
    def simulate(self, node: 'AlphaConnectNode', callback):
        if node.state.is_end_of_game():
            state_value = self.evaluate_final_state(node)
            callback(state_value, None)
        else:
            self.queue.append((node, callback))

        if len(self.queue) >= self.batch_size:
            nodes, callbacks = zip(*self.queue)
            array = np.concatenate(
                list(map(lambda node: node.state.to_numpy(batch=True), nodes)))
            pred_actions, pred_value = self.model.predict(array)

            for i, callback in enumerate(callbacks):
                state_value = pred_value[i].item()
                action_probs = dict(zip(Action.iter_actions(),
                                        pred_actions[i]))
                callback(state_value, action_probs)

            self.queue = []
Пример #3
0
    def decide(self, state: State):
        while True:
            print('Possible actions:')
            print(
                format_in_action_grid(
                    {action: str(action)
                     for action in Action.iter_actions()},
                    cell_format='{:.2s}',
                    default_value='  '))
            user_input = input('Choose your action: ')

            try:
                action = Action.from_hex(user_input)
                if action in state.allowed_actions:
                    print()
                    return action
                else:
                    print('Action %s not allowed' % action)

            except ValueError:
                print('User input is not an action')