Python BlackjackEnv примеры использования

Язык программирования: Python

Пространство имен/Пакет: gym.envs.toy_text

Класс/Тип: BlackjackEnv

Примеров на hotexamples.com: 10

Python BlackjackEnv - 10 примеров найдено. Это лучшие примеры Python кода для gym.envs.toy_text.BlackjackEnv, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BlackjackEnv(5)

reset(5)

step(3)

Основные методы

BlackjackEnv (5)

reset (5)

step (3)

Пример #1

Показать файл

def generate_episode_from_Q(env: BlackjackEnv, Q, epsilon,
                            action_count) -> [tuple]:
    """
    Generates an episode
    @param env:
    @param Q:
    @param epsilon:
    @param action_count
    Returns
    """
    episode = []
    # stores the initial state.  [sum of player cards, open dealer card, has usable Ace]
    state = env.reset()
    while True:
        if state in Q:
            # choose the action with the Q table in mind
            action = np.random.choice(np.arange(action_count),
                                      p=get_probs(Q[state], epsilon,
                                                  action_count))
        else:
            # if we have never visited this state before, just throw the dice
            action = env.action_space.sample()

        next_state, reward, done, info = env.step(action)
        episode.append((state, action, reward))
        state = next_state
        if done:
            break
    return episode

Пример #2

Показать файл

def gen_episode_data(policy: DeterministicPolicy,
                     env: BlackjackEnv) -> List[Tuple[State, Action, Reward]]:
    episode_history = []
    state = env.reset()
    done = False
    while not done:
        action = policy(state)
        next_state, reward, done, _ = env.step(action)
        episode_history.append((state, action, reward))
        state = next_state
    return episode_history

Пример #3

Показать файл

def gen_stochastic_episode(
        policy: Policy,
        env: BlackjackEnv) -> List[Tuple[State, Action, Reward]]:
    episode_history = []
    state = env.reset()
    done = False
    while not done:
        A: ActionValue = policy[state]
        action = np.random.choice([0, 1], p=A / sum(A))
        next_state, reward, done, _ = env.step(action)
        episode_history.append((state, action, reward))
        state = next_state
    return episode_history

Пример #4

Показать файл

def reset_env_with_s0(env: BlackjackEnv, s0: State) -> BlackjackEnv:
    env.reset()
    player_sum = s0[0]
    oppo_sum = s0[1]
    has_usable = s0[2]

    env.dealer[0] = oppo_sum
    if has_usable:
        env.player[0] = 1
        env.player[1] = player_sum - 11
    else:
        if player_sum > 11:
            env.player[0] = 10
            env.player[1] = player_sum - 10
        else:
            env.player[0] = 2
            env.player[1] = player_sum - 2
    return env

Пример #5

Показать файл

def mc_control_exploring_starts_state(env: BlackjackEnv, s_0: State, num_episodes, discount_factor=1.0) \
        -> Tuple[ActionValue, Policy]:
    states = list(product(range(10, 22), range(1, 11), (True, False)))
    policy = {
        s: np.ones(env.action_space.n) * 1.0 / env.action_space.n
        for s in states
    }
    Q = defaultdict(lambda: np.zeros(env.action_space.n))

    returns_sum = defaultdict(float)
    returns_count = defaultdict(float)

    for episode_i in range(1, num_episodes + 1):
        player_sum = s_0[0]
        oppo_sum = s_0[1]
        has_usable = s_0[2]

        env.reset()
        env.dealer[0] = oppo_sum
        if has_usable:
            env.player[0] = 1
            env.player[1] = player_sum - 11
        else:
            if player_sum > 11:
                env.player[0] = 10
                env.player[1] = player_sum - 10
            else:
                env.player[0] = 2
                env.player[1] = player_sum - 2

        episode_history = gen_custom_s0_stochastic_episode(policy, env, s_0)

        G = 0
        a = episode_history[0][1]
        for s_a_r in episode_history:
            G += s_a_r[2]
            returns_sum[s_0, a] += G
            returns_count[s_0, a] += 1.0
            Q[s_0][a] = returns_sum[s_0, a] / returns_count[s_0, a]
            best_a = np.argmax(Q[s_0])
            policy[s_0][best_a] = 1.0
            policy[s_0][1 - best_a] = 0.0

    return Q, policy

Пример #6

Показать файл

    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'),
                      num_episodes=2)

        # state: discrete, action: box
        # self.unittest(environment=dict(environment='gym', level='GuessingGame'), num_episodes=2)

        # state: discrete, action: tuple(discrete)
        # from gym.envs.algorithmic import ReverseEnv
        # self.unittest(environment=ReverseEnv, num_episodes=2)

        # state: discrete, action: discrete
        from gym.envs.toy_text import FrozenLakeEnv
        self.unittest(environment=FrozenLakeEnv, num_episodes=2)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv(), num_episodes=2)

        # Classic control
        self.unittest(environment='CartPole-v1', num_episodes=2)
        self.unittest(environment='MountainCar-v0', num_episodes=2)
        self.unittest(environment='MountainCarContinuous-v0', num_episodes=2)
        self.unittest(environment='Pendulum-v1', num_episodes=2)
        self.unittest(environment='Acrobot-v1', num_episodes=2)

        # Box2d
        self.unittest(environment='LunarLander-v2', num_episodes=2)
        self.unittest(environment='LunarLanderContinuous-v2', num_episodes=2)
        self.unittest(environment='BipedalWalker-v3', num_episodes=2)
        self.unittest(environment='BipedalWalkerHardcore-v3', num_episodes=2)
        # below: self.unittest(environment='CarRacing-v0', num_episodes=2)

        # Toy text
        # above: self.unittest(environment='Blackjack-v1', num_episodes=2)
        self.unittest(environment='FrozenLake-v1', num_episodes=2)
        self.unittest(environment='FrozenLake8x8-v1', num_episodes=2)
        self.unittest(environment='CliffWalking-v0', num_episodes=2)
        self.unittest(environment='Taxi-v3', num_episodes=2)

        # Unit test
        self.unittest(environment='CubeCrash-v0', num_episodes=2)
        self.unittest(environment='CubeCrashSparse-v0', num_episodes=2)
        self.unittest(environment='CubeCrashScreenBecomesBlack-v0',
                      num_episodes=2)
        self.unittest(environment='MemorizeDigits-v0', num_episodes=2)

Пример #7

Показать файл

    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'))

        # state: discrete, action: box
        self.unittest(environment=dict(
            environment='gym', level='GuessingGame', max_episode_steps=False))

        # state: discrete, action: tuple(discrete)
        from gym.envs.algorithmic import ReverseEnv
        self.unittest(environment=ReverseEnv)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv())

Пример #8

Показать файл

        # Find all states the we've visited in this episode
        # We convert each state to a tuple so that we can use it as a dict key
        states_in_episode = set([tuple(x[0]) for x in episode])
        for state in states_in_episode:
            # Find the first occurance of the state in the episode
            first_occurence_idx = next(i for i, x in enumerate(episode)
                                       if x[0] == state)
            # Sum up all rewards since the first occurance
            G = sum([
                x[2] * (discount_factor**i)
                for i, x in enumerate(episode[first_occurence_idx:])
            ])
            # Calculate average return for this state over all sampled episodes
            returns_sum[state] += G
            returns_count[state] += 1.0
            V[state] = returns_sum[state] / returns_count[state]

    return V


if __name__ == "__main__":
    # matplotlib.style.use('ggplot')

    env = BlackjackEnv()
    V_10k = mc_prediction(sample_policy, env, num_episodes=100000)
    print(V_10k)
    plot_value_function(V_10k, title="10,000 Steps")

    # V_500k = mc_prediction(sample_policy, env, num_episodes=50000)
    # plotting.plot_value_function(V_500k, title="500,000 Steps")

Пример #9

Показать файл

Файл: test_environments.py Проект: yyht/tensorforce

    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'),
                      num_episodes=2)

        # state: discrete, action: box
        self.unittest(environment=dict(environment='gym',
                                       level='GuessingGame'),
                      num_episodes=2)

        # state: discrete, action: tuple(discrete)
        from gym.envs.algorithmic import ReverseEnv
        self.unittest(environment=ReverseEnv, num_episodes=2)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv(), num_episodes=2)

        # Classic control
        # above: self.unittest(environment='CartPole-v1', num_episodes=2)
        self.unittest(environment='MountainCar-v0', num_episodes=2)
        self.unittest(environment='MountainCarContinuous-v0', num_episodes=2)
        self.unittest(environment='Pendulum-v0', num_episodes=2)
        self.unittest(environment='Acrobot-v1', num_episodes=2)

        # Box2d
        self.unittest(environment='LunarLander-v2', num_episodes=2)
        self.unittest(environment='LunarLanderContinuous-v2', num_episodes=2)
        self.unittest(environment='BipedalWalker-v3', num_episodes=2)
        self.unittest(environment='BipedalWalkerHardcore-v3', num_episodes=2)
        # below: self.unittest(environment='CarRacing-v0', num_episodes=2)

        # Toy text
        # above: self.unittest(environment='Blackjack-v0', num_episodes=2)
        self.unittest(environment='KellyCoinflip-v0', num_episodes=2)
        # TODO: out-of-bounds problems!
        # self.unittest(environment=dict(
        #     environment='KellyCoinflipGeneralized-v0', clip_distributions=True
        # ), num_episodes=2)
        self.unittest(environment='FrozenLake-v0', num_episodes=2)
        self.unittest(environment='FrozenLake8x8-v0', num_episodes=2)
        self.unittest(environment='CliffWalking-v0', num_episodes=2)
        self.unittest(environment='NChain-v0', num_episodes=2)
        self.unittest(environment='Roulette-v0', num_episodes=2)
        self.unittest(environment='Taxi-v3', num_episodes=2)
        # above: self.unittest(environment='GuessingGame-v0', num_episodes=2)
        self.unittest(environment='HotterColder-v0', num_episodes=2)

        # Algorithmic
        self.unittest(environment='Copy-v0', num_episodes=2)
        self.unittest(environment='RepeatCopy-v0', num_episodes=2)
        self.unittest(environment='ReversedAddition-v0', num_episodes=2)
        self.unittest(environment='ReversedAddition3-v0', num_episodes=2)
        self.unittest(environment='DuplicatedInput-v0', num_episodes=2)
        # above: self.unittest(environment='Reverse-v0', num_episodes=2)

        # Unit test
        self.unittest(environment='CubeCrash-v0', num_episodes=2)
        self.unittest(environment='CubeCrashSparse-v0', num_episodes=2)
        self.unittest(environment='CubeCrashScreenBecomesBlack-v0',
                      num_episodes=2)
        self.unittest(environment='MemorizeDigits-v0', num_episodes=2)

Пример #10

Показать файл

 def __init__(self):
     env = BlackjackEnv()
     super().__init__(env)
     self.observation_space = spaces.Discrete(704)