Пример #1
0
    def extract_episodes(self):
        collected = collect_episodes(
            ReinforcementAI(self.judge), episode_cnt, episode_length)

        episodes = []

        for c in collected:
            obs = torch.cat(
                tuple(Translator.encode_board(b).unsqueeze(0)
                      for b in c['boards']), 0)
            idx = torch.tensor(
                tuple(Translator.encode_move_idx(m) for m in c['moves']),
                dtype=torch.long)
            prob = self.judge.forward(obs)[0][
                torch.arange(idx.size()[0], dtype=torch.long), idx]
            ext = c['extrinsic']
            episodes += [
                {'obs': obs, 'idx': idx, 'prob': prob, 'ext': ext,
                 'over': c['boards'][-1].is_game_over()}]
        
        return episodes