Python FixedGameMDP示例

编程语言: Python

命名空间/包名称: capstone.rl

类/类型: FixedGameMDP

hotexamples.com的示例: 6

Python FixedGameMDP - 已找到6个示例。这些是从开源项目中提取的最受好评的capstone.rl.FixedGameMDP现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

FixedGameMDP(6)

常用方法

FixedGameMDP (6)

示例#1

显示文件

 def on_episode_begin(self, episode, qfunction):
     mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
     env = Environment(mdp)
     qlearning.env = env
     egreedy.action_space = env.actions
     qlearning.policy.provider = env.actions
     if episode % 50 == 0:
         print('Episode {}'.format(episode))

示例#2

显示文件

from capstone.datasets.ucic4 import get_random_game, get_random_loss_game
from capstone.game.games import Connect4 as C4
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning as ApproxQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork
import numpy as np
import random

seed = 383
random.seed(seed)
np.random.seed(seed)

mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
env = Environment(mdp)
c4dn = Connect4DeepNetwork()
egreedy = EGreedy(action_space=env.actions,
                  qfunction=c4dn,
                  epsilon=1.0,
                  selfplay=False,
                  random_state=seed)
qlearning = ApproxQLearning(env=env,
                            qfunction=c4dn,
                            policy=egreedy,
                            discount_factor=0.99,
                            selfplay=False,
                            experience_replay=True,
                            replay_memory_size=20000,
                            batch_size=32)

示例#3

显示文件

文件： c4_ql_tabular_fixed.py 项目： davidrobles/mlnd-capstone-code

from capstone.game.utils import c42pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import QValuesPlotter
from capstone.rl.value_functions import TabularQ

seed = 23
board = [['X', 'O', 'O', ' ', 'O', ' ', ' '],
         ['X', 'O', 'X', ' ', 'X', ' ', ' '],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X']]
game = Connect4(board)
mdp = FixedGameMDP(game, AlphaBeta(), 1)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularQ(random_state=seed),
                      policy=RandomPolicy(env.actions, random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0,
                      n_episodes=1000)
qlearning.train(callbacks=[
    QValuesPlotter(state=game,
                   actions=game.legal_moves(),
                   filepath='figures/c4_ql_tab_qvalues.pdf')
])

####################
# Generate figures #

示例#4

显示文件

from capstone.game.games import TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import GameMDP, FixedGameMDP, Environment
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter
from capstone.rl.value_functions import MLP

seed = 23
game = TicTacToe()
env = Environment(FixedGameMDP(game, RandPlayer(), 1))
mlp = MLP()
qlearning = ApproximateQLearning(
    env=env,
    policy=RandomPolicy(env.actions, random_state=seed),
    qfunction=mlp,
    discount_factor=1.0,
    n_episodes=50000
)
qlearning.train(
    callbacks=[
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,
            period=5000,
            # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf'
            filepath='figures/test88.pdf'
        )
    ]
)

示例#5

显示文件

'''
Q-Learning is used to estimate the state-action values for all
Tic-Tac-Toe positions against a Random opponent.
'''
from capstone.game.games import TicTacToe
from capstone.game.players import AlphaBeta, RandPlayer
from capstone.game.utils import tic2pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter
from capstone.rl.value_functions import TabularQ

seed = 23
game = TicTacToe()
mdp = FixedGameMDP(game, RandPlayer(random_state=seed), 1)
env = Environment(mdp)
qlearning = QLearning(
    env=env,
    qfunction=TabularQ(random_state=seed),
    policy=RandomPolicy(env.actions, random_state=seed),
    learning_rate=0.1,
    discount_factor=1.0,
    n_episodes=65000
)
qlearning.train(
    callbacks=[
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,

示例#6

显示文件

'''
The Q-learning algorithm is used to learn a function approximator
for the state-action values of Connect-4 positions.
'''
from capstone.game.games import Connect4, TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions import MLP, QNetwork

# game = Connect4()
game = TicTacToe()
# mdp = GameMDP(game)
mdp = FixedGameMDP(game, RandPlayer(), 1)
env = Environment(mdp)
# qnetwork = QNetwork(n_input_units=42, n_output_units=7)
qnetwork = QNetwork(n_input_units=9,
                    n_hidden_layers=3,
                    n_output_units=9,
                    n_hidden_units=100)
# qnetwork = QNetwork(n_input_units=42, n_hidden_layers=3, n_output_units=7, n_hidden_units=100)
egreedy = EGreedy(env.actions, qnetwork, 1.0)
qlearning = ApproximateQLearning(
    env=env,
    qfunction=qnetwork,
    policy=EGreedy(env.actions, qnetwork, 0.3),
    discount_factor=0.99,  # change this to 1, and say because is deterministic
    n_episodes=100000,
    experience_replay=False)