Exemplo n.º 1
0
    def get_next_move(self, m: mc.Mancala) -> int:

        state = m.get_board_status()

        # state[mc.PLAYER1_BANK] = 0
        # state[mc.PLAYER2_BANK] = 0

        test_m = mc.Mancala()

        test_m.set_board_status(self.p, state)

        best_score = -100
        best_move = -1

        moves = test_m.get_valid_moves()
        for i in moves:
            # check the minimax value of each valid move
            # print("top level: move %i" % i)
            test_m.set_board_status(self.p, state)
            test_m.play_turn(i)
            test_score = self.minimax(test_m, self._depth)
            # best_score, best_move = max((best_score, best_move), (test_score, i),  key=lambda x: x[0])
            if test_score > best_score:
                best_score = test_score
                best_move = i
            print("top level: move %i, score %i" % (i, test_score))

        return best_move
Exemplo n.º 2
0
import tensorflow as tf, mancala, numpy as np, random
from collections import deque
import matplotlib.pyplot as plt

game = mancala.Mancala()
retrain = False

model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation=tf.nn.relu, input_shape=(14, )),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dense(14)
])

learning_rate = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate)

model.compile(loss=tf.keras.losses.mean_squared_error.__name__,
              optimizer=optimizer)

num_episodes = 2000
num_turns_per_episode = 500

epsilon = 1.0
epsilon_min = 0.5
epsilon_decay = 0.995

gamma = 0.95

if retrain:
    fig = plt.figure()
Exemplo n.º 3
0
import reinforcement_learning_model as avm
import mancala as mancala

import matplotlib.pyplot as plt







if __name__ == "__main__":

	manc = mancala.Mancala()

	#create an instance of our rl agent without any discounting factor (we don't care if we win in 5 steps or 10)
	rl_agent = avm.ActionValueModel(epsilon = 0.5, discounting_factor = lambda a : 1)
	

	#map out valid actions for each player
	actions_bottom = [(1, "bottom"),
				(2, "bottom"),
				(3, "bottom"),
				(4, "bottom"),
				(5, "bottom"),
				(6, "bottom")]

	actions_top = [(7, "top"),
				(8, "top"),
				(9, "top"),
				(10, "top"),
Exemplo n.º 4
0
# ---------------------------------------------------------------------------

# EVOLUTION LOOP:

for gen in range(start_gen, end_gen):

    # Resets agent score so old agents don't get an advantage.
    for agent in population:
        agent.score = 0

    # Matches up all the agents.
    matchups = [(a, b) for a in population for b in population]

    for players in matchups:
        # Creates the mancala game object for each matchup.
        game = mncl.Mancala()

        # Agents take turns chosing stones.
        while game.is_active:
            # Takes the game board and changes into a column vector for the agents to process.
            board_input = np.array(game.board).reshape((len(game.board), 1))

            # Calculates output of the neural network for the given boardstate input.
            output = players[game.player].choose(board_input)

            # Reshapes the output into a list of prioritised choices.
            choices = np.argsort(output.reshape((1, 6)))

            # Test each choice in turn until it finds a valid move.
            n = 0
            while not game.valid_choice(choices[0][n]):
Exemplo n.º 5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 13 11:20:54 2020

@author: johanna
"""

import numpy as np
import random
import mancala as m
import matplotlib.pyplot as plt

ma = m.Mancala()
#ma.net.generate_random_network([6,12,6,2])
#input()
ma.name = 'testeinfach1'
print("Start")
#print(ma.net.biases)
print(ma.spielfeld)
ma.print_spielfeld()
#ma.train_net(1,10,0.1,5)

print("trained")
#print(ma.play())
print('play gegen Random')
ma.net.generate_random_network([14, 10, 6])
ma.name = 'testeinfach1'
print("Start")
ma.print_spielfeld()
Spieler1 = np.array([0])
Exemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 15 14:15:22 2020

@author: johanna
"""

import numpy as np
import mancala as m

ma = m.Mancala(exploration_rate=0.2)
print("Start")
print(ma.net.biases)
print(ma.spielfeld[0:12])
#ma.train_net(500,25,1)
print("trained")
#print(ma.play())
print('play gegen Random')
matest = m.Mancala(exploration_rate=0.8)
matest.net.load_network_from_files("Test")

Spieler1gewonnen = 0
Spieler2gewonnen = 0
unentschieden = 0
for i in range(1, 2):
    #print(i)

    while not (
            np.array_equal(matest.spielfeld[0:6], [0, 0, 0, 0, 0, 0])
            or np.array_equal(matest.spielfeld[6:12], [0, 0, 0, 0, 0, 0])
Exemplo n.º 7
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 13 11:20:54 2020

@author: johanna
"""

import numpy as np
import random
import mancala as m

matest = m.Mancala(exploration_rate = 0.3, name = "Test", network_layers = 4)

#lasse Netz als Spieler 1 gegen Random spielen
Spieler1gewonnen = 0
Spieler2gewonnen = 0
unentschieden = 0

k = 50000
for i in range (1,k):
    #print(i)
    spielfelder = []
    
    while not(np.array_equal(matest.spielfeld[0:6] ,[0,0,0,0,0,0]) or np.array_equal(matest.spielfeld[6:12] ,[0,0,0,0,0,0]) or matest.spielfeld[12]>36 or matest.spielfeld[13]>36): 
        #Spieler 1 netz
        
        feld = matest.get_next_action(matest.spielfeld)
        matest.spielfeld, reward = matest.get_spielfeld_and_reward_after_action(matest.spielfeld, feld)
        
        spielfelder.append(matest.spielfeld)
Exemplo n.º 8
0
import Network as net
import mancala as man
import numpy as np
import os

a = man.Mancala(exploration_rate=1.0)

a.name = "Net"
a.name2 = "Net2"
a.net.generate_random_network([14, 14, 14, 14, 14, 6])
a.net2.generate_random_network([14, 14, 14, 14, 14, 6])

rate = []

size = 1000
for i in range(size):
    print("Training")
    a.train_dq(100, 10, 0.2, 10)
    a.exploration_rate -= 1 / (size + size / 10)
    rate.append(a.get_win_rate(1000))

np.savetxt("rate.csv", rate, delimiter=',')
Exemplo n.º 9
0
import mancala
import minimax

m = mancala.Mancala("Human", "CPU")
ai = minimax.MiniMax(mancala.PLAYER2, depth=5)

while not m.is_game_over():
    try:
        print(m)
        if m.current_player() == mancala.PLAYER1:
            move = int(
                input("current player: %s" % m.get_current_player_name()))
        else:
            print("getting move from AI")
            move = ai.get_next_move(m)
            print("AI chose %i" % move)
        m.play_turn(move)

    except Exception as e:
        print(e.message)

print(m.get_winner() + " wins!")