Exemplo n.º 1
0
def train(board: Board,
          striga: Striga,
          witcher: Witcher,
          initial_state: QState,
          epochs=1000):
    total_wins = 0

    hits = []
    win_p = []
    lifetime = []
    rewards = []
    time = []

    heatmap_start = board.to_numpy()
    heatmap = board.to_numpy()

    for i in range(epochs):
        game = Env(board, initial_state, striga, witcher)
        while game.game_res is None:
            game.advance()

        if game.game_res == GameResult.WITCHER:
            total_wins += 1

        if i == epochs // 100:
            heatmap_start = heatmap.copy()

        heatmap += game.witcher_positions
        time.append(i + 1)
        hits.append(float(game.hits))
        win_p.append(total_wins / (i + 1))
        lifetime.append(game.turn)
        rewards.append(game.rewards)

    return hits, win_p, lifetime, rewards, time, heatmap, heatmap_start