def read_data(data_path, max_games=None): game_files = list(sorted(list_files(data_path, '.json'))) augmentations = list(Augmentation.iter_augmentations()) if max_games is not None: game_files = list(game_files)[-max_games:] print('Using game files from %s to %s' % (game_files[0], game_files[-1])) x = [] y_policy = [] y_reward = [] for game_path in tqdm(game_files): with open(game_path, 'r') as fin: game_data = json.load(fin) winner, starter, actions, policies = AlphaConnectSerializer.deserialize(game_data) state = State.empty() states = [state] for action in actions: state = state.take_action(action) states.append(state) states, final_state = states[:-1], states[-1] n_samples = min(len(states), len(augmentations)) game_samples = sample(list(range(len(states))), n_samples) for augmentation, i in zip(augmentations, game_samples): augmentend_action_order = sorted(Action.iter_actions(), key=lambda a: a.augment(augmentation).to_int()) x.append(states[i].to_numpy(augmentation)) y_policy.append([policies[i].get(action, 0.0) for action in augmentend_action_order]) y_reward.append(winner_value(final_state.winner, states[i])) return np.array(x), np.array(y_policy), np.array(y_reward)
def simulate(self, node: 'AlphaConnectNode', callback): if node.state.is_end_of_game(): state_value = self.evaluate_final_state(node) callback(state_value, None) else: self.queue.append((node, callback)) if len(self.queue) >= self.batch_size: nodes, callbacks = zip(*self.queue) array = np.concatenate( list(map(lambda node: node.state.to_numpy(batch=True), nodes))) pred_actions, pred_value = self.model.predict(array) for i, callback in enumerate(callbacks): state_value = pred_value[i].item() action_probs = dict(zip(Action.iter_actions(), pred_actions[i])) callback(state_value, action_probs) self.queue = []
def decide(self, state: State): while True: print('Possible actions:') print( format_in_action_grid( {action: str(action) for action in Action.iter_actions()}, cell_format='{:.2s}', default_value=' ')) user_input = input('Choose your action: ') try: action = Action.from_hex(user_input) if action in state.allowed_actions: print() return action else: print('Action %s not allowed' % action) except ValueError: print('User input is not an action')