Exemplo n.º 1
0
 def reset(self):
     # Reset the state of the environment to an initial state
     self.game = Game(self.board_width, self.board_height)
     self.steps = 0
     if (self.game.current_player==1): #  so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner
         self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) 
     return np.array(self.game.board)
Exemplo n.º 2
0
class C4Env(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self,board_width=7,board_height=6):
        super(C4Env, self).__init__()
        self.board_width = board_width
        self.board_height = board_height
        
        self.reset()
        # The player has the option to
        self.action_space = spaces.Discrete(self.board_width)
        # Example for using image as input:
        self.observation_space = spaces.Box(low=-1, high=1, shape=(self.board_width, self.board_height), dtype=np.int8)

    def step(self, action):
        # Execute one time step within the environment (each player makes a move)
        self.game.play(action, self.game.current_player)
        if (not self.game.get_status()):
            self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) 
            self.game.get_status()
        self.steps += 1
        reward = float(-self.game.winner) / self.steps 
        
        done = self.game.get_status()

        obs = self.game.board
        return obs, reward, done, {} #TODO was ist {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.game = Game(self.board_width, self.board_height)
        self.steps = 0
        if (self.game.current_player==1): #  so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner
            self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) 
        return np.array(self.game.board)

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        input = np.fliplr(np.rot90(self.game.board, axes=(1,0)))

        print(Back.BLUE)
        for i in range(len(input)):
            for j in range(len(input[i])):
                if input[i][j]==-1:
                    print('\033[31m' + "\u25CF" , end =" ")
                elif input[i][j]==1:
                    print('\033[33m' + "\u25CF" , end =" ")
                else:
                    print('\033[30m' + "\u25CF" , end =" ")
            print(Style.RESET_ALL + '\x1b[K')
            print(Back.BLUE, end ="")
        print(Style.RESET_ALL+'\x1b[K')

    def get_legal_moves(self):
        return self.game.get_legal_moves()
Exemplo n.º 3
0
    def test_next_move_gives_the_best_defence_move(self):
        minimax = Minimax(1)
        game = Game(4, 4)

        game.board = [
            [0, 0, 0, 0],
            [0, 0, 0, 0],
            [0, -1, -1, -1],
            [0, 0, 1, 1],
        ]

        self.assertEqual(minimax.next_move(game), 2)
Exemplo n.º 4
0
    def test_next_move_gives_the_best_move_with_diagonals(self):
        minimax = Minimax(1)
        game = Game(4, 4)

        game.board = [
            [0, -1, 1, -1],
            [0, 1, -1, -1],
            [0, -1, 1, -1],
            [0, 0, 0, 1],
        ]

        self.assertEqual(minimax.next_move(game), 0)
Exemplo n.º 5
0
class RandomEnv(C4Env):
    def step(self, action):
        # Execute one time step within the environment (each player makes a move)
        self.game.play(action, self.game.current_player)
        if (not self.game.get_status()):
            self.game.play(self.game.random_action(), self.game.current_player)
            self.game.get_status()
        self.steps += 1

        reward = self.boardchecker()

        #reward = float(-self.game.winner) / self.steps

        done = self.game.get_status()

        obs = self.game.board
        return obs, reward, done, {}  #TODO was ist {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.game = Game(self.board_width, self.board_height)
        self.steps = 0
        if (
                self.game.current_player == 1
        ):  # so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner
            self.game.play(self.game.random_action(), self.game.current_player)
        return np.array(self.game.board)

    def boardchecker(self):
        #Anzahl der 2er,3er und 4er Ketten der Spieler werden verrechnet
        return (self.game.check_for_streak(-1, 2) +
                (self.game.check_for_streak(-1, 3) * 10) +
                (self.game.check_for_streak(-1, 4) * 1000)) - (
                    self.game.check_for_streak(1, 2) +
                    (self.game.check_for_streak(1, 3) * 10) +
                    (self.game.check_for_streak(1, 4) * 1000))
Exemplo n.º 6
0
def main(argv):
    first_player = -1
    second_player = 1

    p1_type = 'neural_network'
    p2_type = 'neural_network'
    number_of_games = 1
    randomness = 0.25
    board_width = 4
    board_height = 4

    try:
        opts, args = getopt.getopt(argv, "hv", [
            "p1=", "p2=", "board_width=", "board_height=", "iterations=",
            "randomness="
        ])
    except getopt.GetoptError:
        print 'train.py --p1 <1st player type> --p2 <2nd player type>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'train.py --p1 <1st player type> --p2 <2nd player type>'
            sys.exit()
        if opt == '-v':
            global verbose
            verbose = True
        elif opt in ("--p1"):
            p1_type = arg
        elif opt in ("--p2"):
            p2_type = arg
        elif opt in ("--board_width"):
            board_width = string.atoi(arg)
        elif opt in ("--board_height"):
            board_height = string.atoi(arg)
        elif opt in ("--iterations"):
            number_of_games = string.atoi(arg)
        elif opt in ("--randomness"):
            randomness = string.atof(arg)

    game = Game(board_width, board_height)

    batch_statuses = {'draw': 0, 'won_p1': 0, 'won_p2': 0}

    game_statuses = {'draw': 0, 'won_p1': 0, 'won_p2': 0}

    batch_number = 1

    with ai.factory.create(p1_type, first_player) as p1_ai:
        with ai.factory.create(p2_type, second_player) as p2_ai:
            for game_number in range(number_of_games):
                status = game.get_status()

                while status == GAME_STATUS['PLAYING']:
                    current_player = game.current_player
                    current_ai = p1_ai if current_player == first_player \
                        else p2_ai

                    action = current_ai.next_move(game)

                    if random(
                    ) < randomness and current_player == first_player:
                        printv('random triggered')
                        action = game.random_action()
                    printv(str(current_player) + ' plays ' + str(action))
                    game.play(action, current_player)
                    printv(np.matrix(game.board).transpose())

                    status = game.get_status()

                    p1_ai.turn_feedback(current_player, action)
                    p2_ai.turn_feedback(current_player, action)

                p1_ai.game_feedback(game, status, game.winner)
                p2_ai.game_feedback(game, status, game.winner)

                if (game.winner == first_player):
                    batch_statuses['won_p1'] += 1
                elif game.winner == second_player:
                    batch_statuses['won_p2'] += 1
                else:
                    batch_statuses['draw'] += 1

                game.reset()
                if game_number > 1 and game_number % 100 == 0:
                    print("### BATCH N " + str(batch_number) + " ###")
                    batch_number += 1
                    handle_stats(game_statuses, batch_statuses, p1_type,
                                 p2_type)
Exemplo n.º 7
0
    def test_change_turn(self):
        game = Game(self.player_one, self.player_two, self.height, self.width)

        assert game.current_player == self.player_one
        game.change_turn()
        assert game.current_player == self.player_two
Exemplo n.º 8
0
from connectfour.game import Game, GAME_STATUS
from minmax import minmax,find_best_move
import numpy as np
from colorama import init, Fore , Back , Style
import datetime

BOARD_WIDTH = 7
BOARD_HIGHT = 6
game = Game(BOARD_WIDTH, BOARD_HIGHT)
print(game.board)
# game.current_player= 1
# game.play(2, 1)
# game.play(2, -1)
# game.play(4, 1)
# game.play(4, -1)




# before = datetime.datetime.now()
# minmax(game,7)
# after = datetime.datetime.now()
# print(after-before)
print(find_best_move(minmax(game,4)))




input = np.fliplr(np.rot90(game.board, axes=(1,0)))

print(Back.BLUE, end ="")