示例#1
0
 def on_episode_begin(self, episode, qfunction):
     mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
     env = Environment(mdp)
     qlearning.env = env
     egreedy.action_space = env.actions
     qlearning.policy.provider = env.actions
     if episode % 50 == 0:
         print('Episode {}'.format(episode))
示例#2
0
from capstone.datasets.ucic4 import get_random_game, get_random_loss_game
from capstone.game.games import Connect4 as C4
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning as ApproxQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork
import numpy as np
import random

seed = 383
random.seed(seed)
np.random.seed(seed)

mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
env = Environment(mdp)
c4dn = Connect4DeepNetwork()
egreedy = EGreedy(action_space=env.actions,
                  qfunction=c4dn,
                  epsilon=1.0,
                  selfplay=False,
                  random_state=seed)
qlearning = ApproxQLearning(env=env,
                            qfunction=c4dn,
                            policy=egreedy,
                            discount_factor=0.99,
                            selfplay=False,
                            experience_replay=True,
                            replay_memory_size=20000,
                            batch_size=32)
from capstone.game.utils import c42pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import QValuesPlotter
from capstone.rl.value_functions import TabularQ

seed = 23
board = [['X', 'O', 'O', ' ', 'O', ' ', ' '],
         ['X', 'O', 'X', ' ', 'X', ' ', ' '],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X']]
game = Connect4(board)
mdp = FixedGameMDP(game, AlphaBeta(), 1)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularQ(random_state=seed),
                      policy=RandomPolicy(env.actions, random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0,
                      n_episodes=1000)
qlearning.train(callbacks=[
    QValuesPlotter(state=game,
                   actions=game.legal_moves(),
                   filepath='figures/c4_ql_tab_qvalues.pdf')
])

####################
# Generate figures #
示例#4
0
from capstone.game.games import TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import GameMDP, FixedGameMDP, Environment
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter
from capstone.rl.value_functions import MLP

seed = 23
game = TicTacToe()
env = Environment(FixedGameMDP(game, RandPlayer(), 1))
mlp = MLP()
qlearning = ApproximateQLearning(
    env=env,
    policy=RandomPolicy(env.actions, random_state=seed),
    qfunction=mlp,
    discount_factor=1.0,
    n_episodes=50000
)
qlearning.train(
    callbacks=[
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,
            period=5000,
            # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf'
            filepath='figures/test88.pdf'
        )
    ]
)
示例#5
0
'''
Q-Learning is used to estimate the state-action values for all
Tic-Tac-Toe positions against a Random opponent.
'''
from capstone.game.games import TicTacToe
from capstone.game.players import AlphaBeta, RandPlayer
from capstone.game.utils import tic2pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter
from capstone.rl.value_functions import TabularQ

seed = 23
game = TicTacToe()
mdp = FixedGameMDP(game, RandPlayer(random_state=seed), 1)
env = Environment(mdp)
qlearning = QLearning(
    env=env,
    qfunction=TabularQ(random_state=seed),
    policy=RandomPolicy(env.actions, random_state=seed),
    learning_rate=0.1,
    discount_factor=1.0,
    n_episodes=65000
)
qlearning.train(
    callbacks=[
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,
示例#6
0
'''
The Q-learning algorithm is used to learn a function approximator
for the state-action values of Connect-4 positions.
'''
from capstone.game.games import Connect4, TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions import MLP, QNetwork

# game = Connect4()
game = TicTacToe()
# mdp = GameMDP(game)
mdp = FixedGameMDP(game, RandPlayer(), 1)
env = Environment(mdp)
# qnetwork = QNetwork(n_input_units=42, n_output_units=7)
qnetwork = QNetwork(n_input_units=9,
                    n_hidden_layers=3,
                    n_output_units=9,
                    n_hidden_units=100)
# qnetwork = QNetwork(n_input_units=42, n_hidden_layers=3, n_output_units=7, n_hidden_units=100)
egreedy = EGreedy(env.actions, qnetwork, 1.0)
qlearning = ApproximateQLearning(
    env=env,
    qfunction=qnetwork,
    policy=EGreedy(env.actions, qnetwork, 0.3),
    discount_factor=0.99,  # change this to 1, and say because is deterministic
    n_episodes=100000,
    experience_replay=False)