Пример #1
0
 def on_episode_begin(self, episode, qfunction):
     mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
     env = Environment(mdp)
     qlearning.env = env
     egreedy.action_space = env.actions
     qlearning.policy.provider = env.actions
     if episode % 50 == 0:
         print('Episode {}'.format(episode))
Пример #2
0
from capstone.game.games import TicTacToe
from capstone.game.players import KerasPlayer, RandPlayer
from capstone.game.utils import play_series

players = [KerasPlayer('models/qltic.h5'), RandPlayer()]
game = TicTacToe()
play_series(game, players, n_matches=1000)
Пример #3
0
from capstone.datasets.ucic4 import get_random_game, get_random_loss_game
from capstone.game.games import Connect4 as C4
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning as ApproxQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork
import numpy as np
import random

seed = 383
random.seed(seed)
np.random.seed(seed)

mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1)
env = Environment(mdp)
c4dn = Connect4DeepNetwork()
egreedy = EGreedy(action_space=env.actions,
                  qfunction=c4dn,
                  epsilon=1.0,
                  selfplay=False,
                  random_state=seed)
qlearning = ApproxQLearning(env=env,
                            qfunction=c4dn,
                            policy=egreedy,
                            discount_factor=0.99,
                            selfplay=False,
                            experience_replay=True,
                            replay_memory_size=20000,
                            batch_size=32)
Пример #4
0
mdp = GameMDP(game)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularVF(random_state=seed),
                      policy=RandomPolicy(action_space=env.action_space,
                                          random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0,
                      selfplay=True)


class Monitor(Callback):
    def on_episode_begin(self, episode, qfunction):
        if episode % 100 == 0:
            print('Episode {}'.format(episode))


qlearning.train(
    n_episodes=70000,
    callbacks=[
        Monitor(),
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,
            period=1000,
            filepath=
            '../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf'
        )
    ])
from capstone.game.games import TicTacToe
from capstone.game.players import RandPlayer
from capstone.game.utils import play_series

game = TicTacToe()
players = [RandPlayer(), RandPlayer()]
play_series(game, players)
Пример #6
0
                                 selfplay=True,
                                 experience_replay=True,
                                 replay_memory_size=10000,
                                 batch_size=32)


class Monitor(Callback):
    def on_episode_begin(self, episode, qfunction):
        if episode % 50 == 0:
            print('Episode {}'.format(episode))


qlearning.train(
    n_episodes=1750,
    callbacks=[
        EpisodicWLDPlotter(game=game,
                           opp_player=RandPlayer(),
                           n_matches=1000,
                           period=250,
                           filepath='figures/c4_dqn_simple.pdf'),
        # LinearAnnealing(egreedy, 'epsilon', init=1.0, final=0.1, n_episodes=1000),
        Monitor()
    ])

from capstone.game.players import GreedyQ

g = GreedyQ(qnetwork)
print 'Move:', g.choose_move(game)

# IMPORTANT: dont forget to filter the best value, ignore the ilegal moves
Пример #7
0
from capstone.game.games import Connect4
from capstone.game.players import RandPlayer
from capstone.game.utils import play_series


class MyPlayer(object):
    def choose_move(self, game):
        return game.legal_moves()[0]


my = MyPlayer()

game = Connect4()
players = [my, RandPlayer()]
play_series(game, players, n_matches=1000)
from capstone.game.games import TicTacToe
from capstone.game.players import MonteCarlo, RandPlayer
from capstone.game.utils import play_series

game = TicTacToe()
players = [MonteCarlo(), RandPlayer()]
n_matches = 10
play_series(game, players, n_matches)
print('')
players.reverse()
play_series(game, players, n_matches)
Пример #9
0
from capstone.game.games import TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import GameMDP, FixedGameMDP, Environment
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter
from capstone.rl.value_functions import MLP

seed = 23
game = TicTacToe()
env = Environment(FixedGameMDP(game, RandPlayer(), 1))
mlp = MLP()
qlearning = ApproximateQLearning(
    env=env,
    policy=RandomPolicy(env.actions, random_state=seed),
    qfunction=mlp,
    discount_factor=1.0,
    n_episodes=50000
)
qlearning.train(
    callbacks=[
        EpisodicWLDPlotter(
            game=game,
            opp_player=RandPlayer(random_state=seed),
            n_matches=1000,
            period=5000,
            # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf'
            filepath='figures/test88.pdf'
        )
    ]
)
from capstone.game.games import TicTacToe
from capstone.game.players import AlphaBeta, RandPlayer
from capstone.game.utils import play_series

game = TicTacToe()
players = [AlphaBeta(), RandPlayer()]
print('Players: {}\n'.format(players))
n_matches = 10
play_series(game, players, n_matches)
players.reverse()
print('\nPlayers: {}\n'.format(players))
play_series(game, players, n_matches)
# result = model.predict(np.array([xx]), batch_size=1)
# print('result', result)
# import pdb; pdb.set_trace()

# history = model.fit(X_train, Y_train, batch_size=batchSize, nb_epoch=epochs, verbose=1, validation_data=(X_test, Y_test), callbacks=[])

# # Report results
# score = model.evaluate(X_test, Y_test, verbose=0)
# print 'Test score:', score[0]
# print 'Test accuracy:', score[1]

from capstone.rl.value_functions import MLP
from capstone.game.players import GreedyQ, RandPlayer
from capstone.game.utils import play_series
from capstone.game.games import Connect4
mlp = MLP()
mlp.model = model
n_matches = 1000

results = play_series(
    # game=get_random_game,
    game=C4(),
    players=[GreedyQ(mlp), RandPlayer()],
    # players=[RandPlayer(), RandPlayer()],
    n_matches=n_matches,
    verbose=True)

print('Win:', results['W'] / float(n_matches))
print('Draw:', results['D'] / float(n_matches))
print('Loss:', results['L'] / float(n_matches))
Пример #12
0
'''
The Q-learning algorithm is used to learn a function approximator
for the state-action values of Connect-4 positions.
'''
from capstone.game.games import Connect4, TicTacToe
from capstone.game.players import RandPlayer
from capstone.rl import Environment, GameMDP, FixedGameMDP
from capstone.rl.learners import ApproximateQLearning
from capstone.rl.policies import EGreedy, RandomPolicy
from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing
from capstone.rl.value_functions import MLP, QNetwork

# game = Connect4()
game = TicTacToe()
# mdp = GameMDP(game)
mdp = FixedGameMDP(game, RandPlayer(), 1)
env = Environment(mdp)
# qnetwork = QNetwork(n_input_units=42, n_output_units=7)
qnetwork = QNetwork(n_input_units=9,
                    n_hidden_layers=3,
                    n_output_units=9,
                    n_hidden_units=100)
# qnetwork = QNetwork(n_input_units=42, n_hidden_layers=3, n_output_units=7, n_hidden_units=100)
egreedy = EGreedy(env.actions, qnetwork, 1.0)
qlearning = ApproximateQLearning(
    env=env,
    qfunction=qnetwork,
    policy=EGreedy(env.actions, qnetwork, 0.3),
    discount_factor=0.99,  # change this to 1, and say because is deterministic
    n_episodes=100000,
    experience_replay=False)
from keras.models import load_model
from capstone.game.games import Connect4
from capstone.game.players import AlphaBeta, GreedyQ, KerasPlayer, RandPlayer
from capstone.game.players.kerasplayer import KerasStatePlayer
from capstone.game.utils import play_match, play_series
from capstone.rl.value_functions import QNetwork

results = play_series(
    game=Connect4(),
    # players=[KerasStatePlayer('models/episode-14500-winpct-0.942'), RandPlayer()],
    players=[
        RandPlayer(),
        KerasStatePlayer('models/episode-14500-winpct-0.942')
    ],
    # players=[RandPlayer(), RandPlayer()],
    n_matches=100,
    verbose=True)
Пример #14
0
import numpy as np
from capstone.datasets.ucic4 import get_random_win_game, get_random_loss_game
from capstone.game.players.kerasplayer import KerasStatePlayer
from capstone.game.players import RandPlayer
from capstone.game.utils import play_match, play_series
from capstone.utils import print_aec, str_aec

keras = KerasStatePlayer('models/episode-14500-winpct-0.942')
rnd = RandPlayer()

N_EVALUATIONS = 100
N_MATCHES_PER_EVALUATION = 100


def run_evaluation(generator, players, expected):
    '''
    Returns the accuracy of the predition.
    '''
    print 'Running experiment for %s' % expected
    outcomes = []
    for i in range(N_EVALUATIONS):
        print 'Episode %d' % i
        results = play_series(
            game=generator(),
            players=players,
            n_matches=N_MATCHES_PER_EVALUATION,
            verbose=False
        )
        outcomes.append(results[expected] / float(N_MATCHES_PER_EVALUATION))
    return np.mean(outcomes)