def reset(self): # Reset the state of the environment to an initial state self.game = Game(self.board_width, self.board_height) self.steps = 0 if (self.game.current_player==1): # so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) return np.array(self.game.board)
class C4Env(gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human']} def __init__(self,board_width=7,board_height=6): super(C4Env, self).__init__() self.board_width = board_width self.board_height = board_height self.reset() # The player has the option to self.action_space = spaces.Discrete(self.board_width) # Example for using image as input: self.observation_space = spaces.Box(low=-1, high=1, shape=(self.board_width, self.board_height), dtype=np.int8) def step(self, action): # Execute one time step within the environment (each player makes a move) self.game.play(action, self.game.current_player) if (not self.game.get_status()): self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) self.game.get_status() self.steps += 1 reward = float(-self.game.winner) / self.steps done = self.game.get_status() obs = self.game.board return obs, reward, done, {} #TODO was ist {} def reset(self): # Reset the state of the environment to an initial state self.game = Game(self.board_width, self.board_height) self.steps = 0 if (self.game.current_player==1): # so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner self.game.play(find_best_move(minmax(self.game,4)), self.game.current_player) return np.array(self.game.board) def render(self, mode='human', close=False): # Render the environment to the screen input = np.fliplr(np.rot90(self.game.board, axes=(1,0))) print(Back.BLUE) for i in range(len(input)): for j in range(len(input[i])): if input[i][j]==-1: print('\033[31m' + "\u25CF" , end =" ") elif input[i][j]==1: print('\033[33m' + "\u25CF" , end =" ") else: print('\033[30m' + "\u25CF" , end =" ") print(Style.RESET_ALL + '\x1b[K') print(Back.BLUE, end ="") print(Style.RESET_ALL+'\x1b[K') def get_legal_moves(self): return self.game.get_legal_moves()
def test_next_move_gives_the_best_defence_move(self): minimax = Minimax(1) game = Game(4, 4) game.board = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, -1, -1, -1], [0, 0, 1, 1], ] self.assertEqual(minimax.next_move(game), 2)
def test_next_move_gives_the_best_move_with_diagonals(self): minimax = Minimax(1) game = Game(4, 4) game.board = [ [0, -1, 1, -1], [0, 1, -1, -1], [0, -1, 1, -1], [0, 0, 0, 1], ] self.assertEqual(minimax.next_move(game), 0)
class RandomEnv(C4Env): def step(self, action): # Execute one time step within the environment (each player makes a move) self.game.play(action, self.game.current_player) if (not self.game.get_status()): self.game.play(self.game.random_action(), self.game.current_player) self.game.get_status() self.steps += 1 reward = self.boardchecker() #reward = float(-self.game.winner) / self.steps done = self.game.get_status() obs = self.game.board return obs, reward, done, {} #TODO was ist {} def reset(self): # Reset the state of the environment to an initial state self.game = Game(self.board_width, self.board_height) self.steps = 0 if ( self.game.current_player == 1 ): # so ist immer der RL Agent als erstes im step dran und kann keinen illegalen zug machen wenn er als action das selbe feld wählt wie der Gegner self.game.play(self.game.random_action(), self.game.current_player) return np.array(self.game.board) def boardchecker(self): #Anzahl der 2er,3er und 4er Ketten der Spieler werden verrechnet return (self.game.check_for_streak(-1, 2) + (self.game.check_for_streak(-1, 3) * 10) + (self.game.check_for_streak(-1, 4) * 1000)) - ( self.game.check_for_streak(1, 2) + (self.game.check_for_streak(1, 3) * 10) + (self.game.check_for_streak(1, 4) * 1000))
def main(argv): first_player = -1 second_player = 1 p1_type = 'neural_network' p2_type = 'neural_network' number_of_games = 1 randomness = 0.25 board_width = 4 board_height = 4 try: opts, args = getopt.getopt(argv, "hv", [ "p1=", "p2=", "board_width=", "board_height=", "iterations=", "randomness=" ]) except getopt.GetoptError: print 'train.py --p1 <1st player type> --p2 <2nd player type>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'train.py --p1 <1st player type> --p2 <2nd player type>' sys.exit() if opt == '-v': global verbose verbose = True elif opt in ("--p1"): p1_type = arg elif opt in ("--p2"): p2_type = arg elif opt in ("--board_width"): board_width = string.atoi(arg) elif opt in ("--board_height"): board_height = string.atoi(arg) elif opt in ("--iterations"): number_of_games = string.atoi(arg) elif opt in ("--randomness"): randomness = string.atof(arg) game = Game(board_width, board_height) batch_statuses = {'draw': 0, 'won_p1': 0, 'won_p2': 0} game_statuses = {'draw': 0, 'won_p1': 0, 'won_p2': 0} batch_number = 1 with ai.factory.create(p1_type, first_player) as p1_ai: with ai.factory.create(p2_type, second_player) as p2_ai: for game_number in range(number_of_games): status = game.get_status() while status == GAME_STATUS['PLAYING']: current_player = game.current_player current_ai = p1_ai if current_player == first_player \ else p2_ai action = current_ai.next_move(game) if random( ) < randomness and current_player == first_player: printv('random triggered') action = game.random_action() printv(str(current_player) + ' plays ' + str(action)) game.play(action, current_player) printv(np.matrix(game.board).transpose()) status = game.get_status() p1_ai.turn_feedback(current_player, action) p2_ai.turn_feedback(current_player, action) p1_ai.game_feedback(game, status, game.winner) p2_ai.game_feedback(game, status, game.winner) if (game.winner == first_player): batch_statuses['won_p1'] += 1 elif game.winner == second_player: batch_statuses['won_p2'] += 1 else: batch_statuses['draw'] += 1 game.reset() if game_number > 1 and game_number % 100 == 0: print("### BATCH N " + str(batch_number) + " ###") batch_number += 1 handle_stats(game_statuses, batch_statuses, p1_type, p2_type)
def test_change_turn(self): game = Game(self.player_one, self.player_two, self.height, self.width) assert game.current_player == self.player_one game.change_turn() assert game.current_player == self.player_two
from connectfour.game import Game, GAME_STATUS from minmax import minmax,find_best_move import numpy as np from colorama import init, Fore , Back , Style import datetime BOARD_WIDTH = 7 BOARD_HIGHT = 6 game = Game(BOARD_WIDTH, BOARD_HIGHT) print(game.board) # game.current_player= 1 # game.play(2, 1) # game.play(2, -1) # game.play(4, 1) # game.play(4, -1) # before = datetime.datetime.now() # minmax(game,7) # after = datetime.datetime.now() # print(after-before) print(find_best_move(minmax(game,4))) input = np.fliplr(np.rot90(game.board, axes=(1,0))) print(Back.BLUE, end ="")