Exemplo n.º 1
0
def generate_episode_from_Q(env: BlackjackEnv, Q, epsilon,
                            action_count) -> [tuple]:
    """
    Generates an episode
    @param env:
    @param Q:
    @param epsilon:
    @param action_count
    Returns
    """
    episode = []
    # stores the initial state.  [sum of player cards, open dealer card, has usable Ace]
    state = env.reset()
    while True:
        if state in Q:
            # choose the action with the Q table in mind
            action = np.random.choice(np.arange(action_count),
                                      p=get_probs(Q[state], epsilon,
                                                  action_count))
        else:
            # if we have never visited this state before, just throw the dice
            action = env.action_space.sample()

        next_state, reward, done, info = env.step(action)
        episode.append((state, action, reward))
        state = next_state
        if done:
            break
    return episode
Exemplo n.º 2
0
def gen_episode_data(policy: DeterministicPolicy,
                     env: BlackjackEnv) -> List[Tuple[State, Action, Reward]]:
    episode_history = []
    state = env.reset()
    done = False
    while not done:
        action = policy(state)
        next_state, reward, done, _ = env.step(action)
        episode_history.append((state, action, reward))
        state = next_state
    return episode_history
Exemplo n.º 3
0
def gen_custom_s0_stochastic_episode(policy: Policy, env: BlackjackEnv, initial_state: State) \
        -> List[Tuple[State, Action, Reward]]:
    episode_history = []
    state = initial_state
    done = False
    while not done:
        A: ActionValue = policy[state]
        action = np.random.choice([0, 1], p=A / sum(A))
        next_state, reward, done, _ = env.step(action)
        episode_history.append((state, action, reward))
        state = next_state
    return episode_history