Пример #1
0
def get_newgame():
    """returns the game scene"""

    scene = Scene()
    background_layer = BackgroundLayer()

    # model
    model = GameModel(background_layer)

    # controller
    ctrl = GameCtrl(model)

    # view
    hud = HUD()
    view = GameView(model, hud)

    # set controller in model
    model.set_controller(ctrl)

    # add controller
    scene.add(ctrl, z=1, name="controller")

    # add view
    scene.add(hud, z=3, name="hud")

    scene.add(background_layer, z=0, name="background")
    scene.add(view, z=2, name="view")

    return scene
Пример #2
0
 def __init__(self):
     """
     Konstruktor
     """
     self.Dialog = QtWidgets.QMainWindow()
     self.view = GameView()
     self.model = GameModel()
     self.view.setupUi(self.Dialog)
Пример #3
0
def new_game():
    scene = Scene()
    model = GameModel()
    ctrl = GameCtrl(model)
    view = GameView(model)

    model.set_controller( ctrl )

    scene.add( BackgroundLayer(), z=0, name="background" )
    scene.add( ctrl, z=1, name="controller" )
    scene.add( view, z=2, name="view" )

    return scene
Пример #4
0
    def __init__(self,
                 board_size,
                 save_model=True,
                 model_dir=None,
                 debug=True,
                 learning_rate=0.01,
                 max_experience_history=600000):
        self.save_model = save_model
        self.model_dir = model_dir
        self.experience_history_path = self.model_dir + "exp_history.p"
        self.max_experience_history = max_experience_history
        self.debug = debug
        self.learning_rate = learning_rate

        self.q_network = GameModel(board_size,
                                   model_name='q_network',
                                   model_dir=self.model_dir,
                                   learning_rate=learning_rate)
        print(self.q_network.model.summary())
        self.q_hat = GameModel(board_size,
                               model_name='q_hat',
                               model_dir=self.model_dir,
                               learning_rate=learning_rate)
Пример #5
0
class GameController:
	def __init__(self):
		self.message = ''
		self.game_model = GameModel()
		self.winner_check = 0

	def start_game(self):
		self.get_updated_vars()
		return {
				 'previous_player':self.previous_player,
				 'current_player': self.current_player,
				 'current_game_state':self.current_game_state,
				 'message':self.message,
				 'available_columns':self.available_columns
			}

	def player_move(self,move):
		self.col = move
		self.row = self.game_model.get_column_capacity(self.col)
		self.update_vars()
		self.get_updated_vars()
		if self.winner_check:
			is_a_winning_move = pattern_finder(
											self.game_model.get_current_game_state(),
			 							  	self.row,self.col,self.game_model.get_sequence_length()
										 )
			if is_a_winning_move:
				self.message = self.game_model.get_winning_message()
			elif self.game_model.is_game_over():
				self.message = self.game_model.get_tie_message()
		return {
				 'previous_player':self.previous_player,
				 'current_player': self.current_player,
				 'row':self.row,
				 'col':self.col,
				 'message':self.message,
				 'available_columns':self.available_columns,
				 'number_of_moves':self.number_of_moves
		}

	def update_vars(self):
		self.game_model.update_game_state(self.row,self.col)
		if self.game_model.get_column_capacity(self.col) <= 0:
			self.game_model.update_available_colums(self.col)
		self.game_model.update_column_capacity(self.col)
		self.game_model.update_game_state(self.row,self.col)
		self.game_model.update_number_of_moves()
		self.game_model.update_current_player()
		self.check_for_winner()

	def get_updated_vars(self):
		self.previous_player = self.game_model.get_previous_player()
		self.current_player = self.game_model.get_current_player()
		self.current_game_state = self.game_model.get_current_game_state()
		self.number_of_moves = self.game_model.get_number_of_moves()
		self.available_columns = self.game_model.get_available_columns()

	def check_for_winner(self):
		if self.game_model.get_number_of_moves() >= (self.game_model.get_number_of_players()\
			*self.game_model.get_sequence_length()) - self.game_model.get_number_of_players() +1 :
			self.winner_check = 1
Пример #6
0
	def __init__(self):
		self.message = ''
		self.game_model = GameModel()
		self.winner_check = 0
Пример #7
0
class Game:
    gameModel = None
    __MAX_TURN = 0
    __MIN_TURN = 1
    __lastMove = None

    # Returns, if it is the turn of MAX
    def isMaxTurn(self):
        return self.gameModel.getTurn() == self.__MAX_TURN

    # Returns, if it is the turn of MIN
    def isMinTurn(self):
        return self.gameModel.getTurn() == self.__MIN_TURN

    # Returns the initial state
    def reset(self):
        self.gameModel = GameModel()

    # Do Bean Final Movement
    def doTerminalBeanMovement(self):
        newValue = self.gameModel.getFieldValue(
            self.gameModel.PLAYER1_BASE) + self.gameModel.getFieldValue(
                self.gameModel.PLAYER1_1) + self.gameModel.getFieldValue(
                    self.gameModel.PLAYER1_2) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER1_3
                    ) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER1_4
                    ) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER1_5
                    ) + self.gameModel.getFieldValue(self.gameModel.PLAYER1_6)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_BASE, newValue)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_1, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_2, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_3, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_4, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_5, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER1_6, 0)
        newValue = self.gameModel.getFieldValue(
            self.gameModel.PLAYER2_BASE) + self.gameModel.getFieldValue(
                self.gameModel.PLAYER2_1) + self.gameModel.getFieldValue(
                    self.gameModel.PLAYER2_2) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER2_3
                    ) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER2_4
                    ) + self.gameModel.getFieldValue(
                        self.gameModel.PLAYER2_5
                    ) + self.gameModel.getFieldValue(self.gameModel.PLAYER2_6)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_BASE, newValue)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_1, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_2, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_3, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_4, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_5, 0)
        self.gameModel.changeFieldValue(self.gameModel.PLAYER2_6, 0)

    # Zug auf den momentanen Status des Games ausführen
    def doMove(self, move):
        # Überprüfen ob das Spiel schon den Final State erreicht hat
        if self.isTerminal():
            return
        # Überprüfen ob der Zug auf der Seite des momentanen Spielers ist
        if not self.isFieldOnCurrentPlayerSide(move):
            print("Move " + str(move) + " is not legal")
            return
        # Anzahl der Bohnen holen
        beans = self.gameModel.getFieldValue(move)

        # Falls keine Bohnen vorhanden kann der Zug nicht ausgefürht werden (illigal move)
        if beans == 0:
            print("Move " + str(move) + " no beans to move")
            return

        # Setzen des letzten Zuges
        self.__lastMove = move

        # Alle Bohnen aus momentanem Feld entfernen
        self.gameModel.changeFieldValue(move, 0)
        currentField = move + 1
        while beans > 0:
            # Falls Min-Max am Zug und das Punktefeld erreicht wird eine Bohnen abgelegt
            if (self.isMaxTurn()
                    and currentField == self.gameModel.PLAYER2_BASE):
                currentField = 0
                continue
                # Spieler 1 legt keine Bohnen in das Punktefeld von Spieler 2
            # Falls Spieler am Zug und das Punktefeld erreicht wird eine Bohnen abgelegt
            if (self.isMinTurn()
                    and currentField == self.gameModel.PLAYER1_BASE):
                currentField = currentField + 1
                continue
                # Spieler 2 legt keine Bohnen in das Punktefeld von Spieler 1

            # Eine Bohnen zum momentanen Feld hinzufügen
            currentValue = self.gameModel.getFieldValue(currentField)
            newValue = currentValue + 1
            self.gameModel.changeFieldValue(currentField, newValue)
            # Anzahl der übrigen Bohnen um 1 verringern da eine in eine Feld gelegt wurde
            beans = beans - 1

            # Falls die letzte Bohne in ein leeres Feld gelegt wird und auf der gegenseite auch Bohnen liegen
            # kann der Spieler die Bohnen beider Felder in sein Punktefeld legen
            if newValue == 1 and beans == 0 and self.isFieldOnCurrentPlayerSide(
                    currentField):
                self.handleLastBeanOnOwnEmptyField(currentField)

            # Vorrücken auf das nächste Feld
            currentField = currentField + 1
            if currentField == self.gameModel.PLAYER2_BASE + 1:
                currentField = 0
        # Zug an den anderen Spieler übergeben
        self.gameModel.switchTurn()
        if self.isTerminal():
            self.doTerminalBeanMovement()

    # Returns the last move leading to this state
    def getLastMove(self):
        return self.__lastMove

    # Returns all possible moves of this state
    def getPossibleMoves(self):
        i = self.gameModel.PLAYER1_1
        minI = i
        maxI = self.gameModel.PLAYER1_6
        if self.isMinTurn():
            i = self.gameModel.PLAYER2_1
            minI = i
            maxI = self.gameModel.PLAYER2_6
        possibleMoves = []
        for move in range(minI, maxI + 1):
            if self.gameModel.getFieldValue(move) != 0:
                possibleMoves.append(move)
        return possibleMoves

    # Returns all successor states
    def expand(self):
        raise NotImplementedError("You should have implemented this")

    # Returns, if the current state is terminal
    def isTerminal(self):
        fieldsToCheck = self.getPlayerOneMoves()
        playerOneSideEmpty = self.areAllFieldsEmpty(fieldsToCheck)
        fieldsToCheck = self.getPlayerTwoMoves()
        playerTwoSideEmpty = self.areAllFieldsEmpty(fieldsToCheck)
        if (playerOneSideEmpty or playerTwoSideEmpty):
            self.doTerminalBeanMovement()
        return (playerOneSideEmpty or playerTwoSideEmpty)

    # Returns the evaluation of a state related to MAX
    def evalValueForMax(self):
        raise NotImplementedError("You should have implemented this")

    # Returns the evaluation for a terminal state (for MAX)
    def utility(self):
        raise NotImplementedError("You should have implemented this")

    def __init__(self):
        self.gameModel = GameModel()

    ############################################################################
    ############################ Hilfsmethoden #################################
    ############################################################################

    def areAllFieldsEmpty(self, fields):
        allEmpty = True
        for field in fields:
            if self.gameModel.getFieldValue(field) != 0:
                allEmpty = False
        return allEmpty

    def getPlayerOneMoves(self):
        return [
            self.gameModel.PLAYER1_1, self.gameModel.PLAYER1_2,
            self.gameModel.PLAYER1_3, self.gameModel.PLAYER1_4,
            self.gameModel.PLAYER1_5, self.gameModel.PLAYER1_6
        ]

    def getPlayerTwoMoves(self):
        return [
            self.gameModel.PLAYER2_1, self.gameModel.PLAYER2_2,
            self.gameModel.PLAYER2_3, self.gameModel.PLAYER2_4,
            self.gameModel.PLAYER2_5, self.gameModel.PLAYER2_6
        ]

    def isFieldOnCurrentPlayerSide(self, move):
        if self.isMinTurn():
            return move in self.getPlayerTwoMoves()
        if self.isMaxTurn():
            return move in self.getPlayerOneMoves()

    def getCurrentPlayerBase(self):
        if self.isMinTurn():
            return self.gameModel.PLAYER2_BASE
        if self.isMaxTurn():
            return self.gameModel.PLAYER1_BASE

    def handleLastBeanOnOwnEmptyField(self, curField):
        opposingField = self.getOpposingField(curField)
        beansOnOpposingField = self.gameModel.getFieldValue(opposingField)
        if beansOnOpposingField == 0:
            return
            # in this case, nothing happens
        playerBase = self.getCurrentPlayerBase()
        beanCountInBase = self.gameModel.getFieldValue(playerBase)
        self.gameModel.changeFieldValue(
            playerBase, beanCountInBase + beansOnOpposingField + 1)
        self.gameModel.changeFieldValue(opposingField, 0)
        self.gameModel.changeFieldValue(curField, 0)

    def getOpposingField(self, field):
        if field == self.gameModel.PLAYER1_1:
            return self.gameModel.PLAYER2_6
        if field == self.gameModel.PLAYER1_2:
            return self.gameModel.PLAYER2_5
        if field == self.gameModel.PLAYER1_3:
            return self.gameModel.PLAYER2_4
        if field == self.gameModel.PLAYER1_4:
            return self.gameModel.PLAYER2_3
        if field == self.gameModel.PLAYER1_5:
            return self.gameModel.PLAYER2_2
        if field == self.gameModel.PLAYER1_6:
            return self.gameModel.PLAYER2_1
        if field == self.gameModel.PLAYER2_1:
            return self.gameModel.PLAYER1_6
        if field == self.gameModel.PLAYER2_2:
            return self.gameModel.PLAYER1_5
        if field == self.gameModel.PLAYER2_3:
            return self.gameModel.PLAYER1_4
        if field == self.gameModel.PLAYER2_4:
            return self.gameModel.PLAYER1_3
        if field == self.gameModel.PLAYER2_5:
            return self.gameModel.PLAYER1_2
        if field == self.gameModel.PLAYER2_6:
            return self.gameModel.PLAYER1_1
Пример #8
0
 def reset(self):
     self.gameModel = GameModel()
Пример #9
0
 def __init__(self):
     self.gameModel = GameModel()
Пример #10
0
 def __init__(self):
     self.running = True
     self.gameModel = GameModel()
     self.view = GameView()
     self.runGame()
Пример #11
0
class GameController(object):
    """
        GameController
        Das Spiel wird mit der Funktion :func:`__init__` gestartet
            - **Methoden**:
                * :func:`GameController.show`: Zeigt das GUI an
                * :func:`GameController.game_playing`: bekommt button und schaut ob es der richtige Button ist
                * :func:`GameController.updateStatistik`: aktualisiert die Statistik
                * :func:`GameController.reshuffle`: Zufällige Zahlen werden den Buttons zugewiesen
                * :func:`GameController.push_button_new_click`: neues Spiel wird gestarten
                * :func:`GameController.push_button_end_click`: Spiel wird geschlossen
        """
    def __init__(self):
        """
        Konstruktor
        """
        self.Dialog = QtWidgets.QMainWindow()
        self.view = GameView()
        self.model = GameModel()
        self.view.setupUi(self.Dialog)

    def show(self):
        """
        Zeigt das GUI des Spiels an
        :return: nichts
        """
        self.Dialog.show()
        self.reshuffle()
        self.view.pushButton_Neu.clicked.connect(
            partial(self.push_button_new_click))
        self.view.pushButton_End.clicked.connect(
            partial(self.push_button_end_click))

        for button in self.view.buttons:
            button.clicked.connect(partial(self.game_playing, button))

    def game_playing(self, p):
        """
        bekommt button und schaut ob es der richtige Button ist
        :param p: button der gedrückt wurde
        :type p: `QPushButton`
        :return: nichts
        """
        button = p
        if int(button.text()) == self.model.nextValue:
            button.setEnabled(False)
            self.model.nextValue += 1
            self.model.isCorrect += 1
            self.model.isOpen -= 1
        else:
            self.model.isWrong += 1
        self.model.isTotal += 1
        self.updateStatistik()

    def updateStatistik(self):
        """
        Aktualisiert die Statistik
        :return: nichts
        """
        self.view.lineEdit_0.setText(str(self.model.isOpen))
        self.view.lineEdit_1.setText(str(self.model.isCorrect))
        self.view.lineEdit_2.setText(str(self.model.isWrong))
        self.view.lineEdit_3.setText(str(self.model.isTotal))
        self.view.lineEdit_4.setText(str(self.model.Ngame))

    def reshuffle(self):
        """
        Zufällige Zahlen (0 - 14) werden den Buttons zugewiesen
        :return:  nichts
        """
        for button in self.view.buttons:
            button.setEnabled(True)
        i = 0
        buttonValues = []
        while i < 15:
            randnum = random.randrange(0, 15)
            if not (randnum in buttonValues):
                buttonValues.append(randnum)
                i += 1

        i = 0
        for button in self.view.buttons:
            button.setText(str(buttonValues[i]))
            i += 1

    def push_button_new_click(self):
        """
        Neues Spiel wird gestartet beim klicken des Neu-buttons
        :return: nichts
        """
        self.model.new_game()
        self.updateStatistik()
        self.reshuffle()

    def push_button_end_click(self):
        """
        Spiel wird beendet beim klicken des Ende-buttons
        :return: nichts
        """
        sys.exit()
Пример #12
0
class GameTrainer():
    def __init__(self,
                 board_size,
                 save_model=True,
                 model_dir=None,
                 debug=True,
                 learning_rate=0.01,
                 max_experience_history=600000):
        self.save_model = save_model
        self.model_dir = model_dir
        self.experience_history_path = self.model_dir + "exp_history.p"
        self.max_experience_history = max_experience_history
        self.debug = debug
        self.learning_rate = learning_rate

        self.q_network = GameModel(board_size,
                                   model_name='q_network',
                                   model_dir=self.model_dir,
                                   learning_rate=learning_rate)
        print(self.q_network.model.summary())
        self.q_hat = GameModel(board_size,
                               model_name='q_hat',
                               model_dir=self.model_dir,
                               learning_rate=learning_rate)
        #self.q_hat = self.q_network.copy_weights_to(GameModel(board_size, model_name='q_hat', model_dir=self.model_dir, learning_rate=learning_rate))

    def calc_reward(self, oldboard, newboard):
        # The rewards should be normalized so that the max tile value can be changed without having to retrain
        # if newboard.game_state == GameStates.LOSE: return -1
        # nom_reward = np.clip((newboard.score - oldboard.score) // 2 , 1, None)
        # reward = np.log2(nom_reward) / (np.log2(newboard.max_tile) - 1)
        # return np.clip(reward, -1., 1.)
        # if newboard.game_state == GameStates.LOSE: return -1
        # if newboard.game_state == GameStates.WIN: return 1
        # return 0
        #max_reward = np.log2(oldboard.max_tile)
        # Mean of the score awarded for new tile placement - either a 2 or a 4 are equally likely so mean = 3
        if newboard.game_state == GameStates.LOSE: return -1
        if newboard.game_state == GameStates.WIN: return 1
        tile_placement_mean_score = np.mean(oldboard.bonus_mask * 3)
        reward = np.clip(
            2 * (newboard.score - oldboard.score - tile_placement_mean_score) /
            oldboard.max_tile, -1, 1)
        #print("Score: {0}".format(score))
        return reward

    def exec_action(self, gameboard, gameaction):
        oldboard = copy.deepcopy(gameboard)
        gameboard.make_move(gameaction)
        return (oldboard, self.calc_reward(oldboard, gameboard))

    def preprocess_state(self, gameboard):
        #return gameboard.board.astype(np.float32) / gameboard.max_tile
        return (np.log2(np.clip(gameboard.board, 1,
                                gameboard.max_tile))) / np.log2(
                                    gameboard.max_tile)

    def get_action_probabilities(self, gameboard):
        return np.ravel(self.q_network(self.preprocess_state(gameboard)))

    def select_action(self, gameboard, epsilon):
        if len(gameboard.action_set) <= 0: return None

        # Return a random action with probability epsilon, otherwise return the model's recommendation
        if np.random.rand() < epsilon:
            return random.sample(gameboard.action_set, 1)[0]

        # Return the action with highest probability that is actually possible on the board, as predicted by the Q network
        action_probs = self.get_action_probabilities(gameboard)
        best_actions = [GameActions(i) for i in np.argsort(action_probs)[::-1]]
        return next(a for a in best_actions if a in gameboard.action_set)

    def calculate_y_target(self, newboards, actions_oh, rewards, gamestates,
                           gamma):
        # Determine best actions for the future board states from the current Q-network
        # (this is the DDQN approach for faster training)
        newboards_arr = np.array(newboards)
        best_actions = np.argmax(self.q_network(newboards_arr), axis=1)
        q_hat_output = self.q_hat(
            newboards_arr, Utils.one_hot(best_actions, len(GameActions)))
        #q_hat_output = self.q_hat(np.array(newboards))
        # print("Q-hat output values: ", q_hat_output)
        q_hat_pred = np.max(q_hat_output, axis=1)
        q_values = q_hat_pred * np.array([
            0 if s != GameStates.IN_PROGRESS.value else gamma
            for s in gamestates
        ])
        total_reward = np.array(rewards) + q_values
        return actions_oh * total_reward.reshape((-1, 1))

    def save_experience_history(self, D):
        if os.path.exists(self.experience_history_path):
            os.remove(self.experience_history_path)
        saved = False
        f_hist = None
        while (not saved):
            try:
                f_hist = open(self.experience_history_path, "wb")
                dill.dump(D, f_hist)
                saved = True
                print("Saved gameplay experience to " +
                      self.experience_history_path)
            except Exception as e:
                traceback.print_exc()
                print(e)
                print(
                    "WARNING: failed to save experience replay history.  Will try again in 5 seconds..."
                )
                time.sleep(5)
            finally:
                if not f_hist is None and 'close' in dir(f_hist):
                    f_hist.close()
        if os.path.getsize(self.experience_history_path) > 0:
            shutil.copy2(
                self.experience_history_path,
                self.experience_history_path.replace('.p', '_BACKUP.p'))

    def restore_experience_history(self):
        f_hist = open(self.experience_history_path, "rb") if os.path.exists(
            self.experience_history_path) and os.path.getsize(
                self.experience_history_path) > 0 else None
        if f_hist is None: return RingBuf(self.max_experience_history)
        D = dill.load(f_hist)
        f_hist.close()
        if isinstance(D, RingBuf) and len(D) > 0:
            print("Restored gameplay experience from " +
                  self.experience_history_path)
            return D
        return RingBuf(self.max_experience_history)

    def train_model(self,
                    episodes=10,
                    max_tile=2048,
                    max_game_history=500,
                    max_epsilon=1.0,
                    min_epsilon=0.1,
                    mini_batch_size=32,
                    gamma=0.99,
                    update_qhat_weights_steps=10000):
        # Training variables
        D = self.restore_experience_history()  # experience replay queue
        gamehistory = deque(
            maxlen=max_game_history)  # history of completed games
        epsilon = max_epsilon  # probability of selecting a random action.  This is annealed from 1.0 to 0.1 over time
        update_frequency = 4  # Number of actions selected before the Q-network is updated again
        globalstep = 0

        # approx_steps_per_episode = 200
        # episodes_per_tb_output = 100
        # steps_per_tb_output = approx_steps_per_episode * episodes_per_tb_output     # MUST BE A MULTIPLE OF update_frequency

        # # Prepare a callback to write TensorBoard debugging output
        # tbCallback = tf.keras.callbacks.TensorBoard(log_dir=self.model_dir, histogram_freq=1,
        #                                             batch_size=mini_batch_size, write_graph=False,
        #                                             write_images=False, write_grads=True)

        q_hat_group = 1
        f_log = None
        if self.debug:
            f_log = open(self.model_dir + "training_log.csv", "w")
            f_log.write(
                "Q_hat_group,Avg_Pred,Avg_Target,Avg_Diff,Won_Lost_Games,My_Loss,Actual_Loss\n"
            )

        # Loop over requested number of games (episodes)
        loss = 0
        for episode in range(episodes):
            # New game
            gameboard = GameBoard(self.q_network.board_size, max_tile=max_tile)

            # Play the game
            stepcount = 0
            while gameboard.game_state == GameStates.IN_PROGRESS:
                stepcount += 1
                globalstep += 1

                # Select an action to perform.  It will be a random action with probability epsilon, otherwise
                # the action with highest probability from the Q-network will be chosen
                action = self.select_action(gameboard, epsilon)
                oldboard, reward = self.exec_action(gameboard, action)

                # Append the (preprocessed) original board, selected action, reward and new board to the history
                # This is to implement experience replay for reinforcement learning
                # Ensure history size is capped at max_history by randomly replacing an experience in the queue if necessary
                experience = (self.preprocess_state(oldboard), action.value,
                              reward, self.preprocess_state(gameboard),
                              gameboard.game_state.value)
                D.append(experience)

                # Perform a gradient descent step on the Q-network when a game is finished or every so often
                #if globalstep % update_frequency == 0 and len(D) >= mini_batch_size:
                if len(D) >= max(mini_batch_size, self.max_experience_history
                                 ) and globalstep % update_frequency == 0:
                    # Randomly sample from the experience history and unpack into separate arrays
                    batch = [
                        D[i]
                        for i in np.random.randint(0, len(D), mini_batch_size)
                    ]
                    oldboards, actions, rewards, newboards, gamestates = [
                        list(k) for k in zip(*batch)
                    ]

                    # One-hot encode the actions for each of these boards as this will form the basis of the
                    # loss calculation
                    actions_one_hot = Utils.one_hot(actions, len(GameActions))

                    # Compute the target network output using the Q-hat network, actions and rewards for each
                    # sampled history item
                    y_target = self.calculate_y_target(newboards,
                                                       actions_one_hot,
                                                       rewards, gamestates,
                                                       gamma)

                    #print("Rewards:{0}".format(" ".join(["{:.2f}".format(r) for r in rewards])))

                    # Perform a single gradient descent update step on the Q-network
                    # callbacks = []
                    # if self.debug and (globalstep % steps_per_tb_output == 0): callbacks.append(tbCallback)
                    X = [
                        np.array(oldboards).reshape(
                            (-1, self.q_network.board_size,
                             self.q_network.board_size, 1)), actions_one_hot
                    ]

                    # if len(callbacks) > 0:
                    #     self.q_network.model.fit(x=X, y=y_target, validation_split=0.16, epochs=1, verbose=False, callbacks=callbacks)
                    # else:
                    #loss += self.q_network.model.train_on_batch(x=X, y=y_target)

                    write_log_entry = (globalstep %
                                       (update_frequency * 10) == 0) and f_log
                    if write_log_entry:
                        y_pred = self.q_network(oldboards, actions_one_hot)
                        avg_pred = np.mean(np.abs(np.sum(y_pred, axis=1)))
                        avg_target = np.mean(np.abs(np.sum(y_target, axis=1)))
                        diff = np.sum(y_target - y_pred, axis=1)
                        avg_diff = np.mean(np.abs(diff))
                        my_loss = np.mean(np.square(diff))
                        won_lost_games = np.sum(
                            np.array(gamestates) < GameStates.IN_PROGRESS.value
                        )

                    curloss = self.q_network.model.train_on_batch(x=X,
                                                                  y=y_target)
                    loss += curloss

                    if write_log_entry:
                        f_log.write(
                            "{:d},{:.7f},{:.7f},{:.7f},{:d},{:.7f},{:.7f}\n".
                            format(q_hat_group, avg_pred, avg_target, avg_diff,
                                   won_lost_games, my_loss, curloss))

                # Every so often, copy the network weights over from the Q-network to the Q-hat network
                # (this is required for network weight convergence)
                if globalstep % update_qhat_weights_steps == 0:

                    def test_weight_update(testboard):
                        board_processed = self.preprocess_state(testboard)
                        q_res = np.ravel(self.q_network(board_processed))
                        q_hat_res = np.ravel(self.q_hat(board_processed))
                        print("\nQ-network result on testboard: {0}".format(
                            q_res))
                        print(
                            "Q-hat result on testboard: {0}".format(q_hat_res))
                        print("Difference: {0}\n".format(q_res - q_hat_res))

                    test_weight_update(oldboard)

                    self.q_network.copy_weights_to(self.q_hat)
                    print("Weights copied to Q-hat network")
                    self.q_network.save_to_file()

                    test_weight_update(oldboard)

                    lr_new = self.learning_rate / (1 + episode /
                                                   (episodes / 2))
                    #lr_new = self.learning_rate / np.sqrt(episode)
                    self.q_network.compile(learning_rate=lr_new)
                    print("Q-network learning rate updated to {:.6f}".format(
                        lr_new))

                    q_hat_group += 1

                # Perform annealing on epsilon
                epsilon = max(
                    min_epsilon,
                    min_epsilon + ((max_epsilon - min_epsilon) *
                                   (episodes - episode) / episodes))
                #epsilon = max_epsilon
                #if self.debug: print("epsilon: {:.4f}".format(epsilon))

            # Append metrics for each completed game to the game history list
            gameresult = (gameboard.game_state.value, gameboard.score,
                          stepcount, gameboard.largest_tile_placed)
            if len(gamehistory) >= max_game_history: gamehistory.popleft()
            gamehistory.append(gameresult)
            print('result: {0}, score: {1}, steps: {2}, max tile: {3}'.format(
                GameStates(gameresult[0]).name, gameresult[1], gameresult[2],
                gameresult[3]))
            if (episode + 1) % 20 == 0:
                games_to_retrieve = min(len(gamehistory), 100)
                last_x_games = list(
                    itertools.islice(gamehistory,
                                     len(gamehistory) - games_to_retrieve,
                                     None))
                last_x_results = list(zip(*last_x_games))[0]
                games_won = np.sum([
                    1 if r == GameStates.WIN.value else 0
                    for r in last_x_results
                ])
                print("\nEpisode {0}/{1}".format(episode + 1, episodes))
                print("History queue {0}/{1}".format(
                    len(D), self.max_experience_history))
                print("Game win % (for last {:d} games): {:.1f}%".format(
                    games_to_retrieve, 100. * (games_won / games_to_retrieve)))
                print("Epsilon = {:.3f}".format(epsilon))
                print("Training loss: {:.5f}\n".format(loss))
                loss = 0

            # Save experience history to disk periodically
            if self.save_model and (episode + 1) % 2000 == 0:
                self.save_experience_history(D)
                # Output garbage
                print("GC.isenabled() = {0}".format(gc.isenabled()))
                print("Garbage:", gc.garbage)
                print("Counts:", gc.get_count())
                print("globals() = ", sorted(list(globals().keys())))

        # Perform one final model weight save for next run
        if self.save_model:
            self.q_network.save_to_file()
            self.save_experience_history(D)

        if f_log: f_log.close()

        return gamehistory

    @staticmethod
    def display_training_history(gamehistory):
        results, scores, stepcounts, max_tiles = list(zip(*gamehistory))
        resultpercentages = np.cumsum(
            [1 if r == GameStates.WIN.value else 0
             for r in results]) / range(1,
                                        len(results) + 1)
        print("Final win %: {:2f}".format(resultpercentages[-1] * 100.))

        x = range(1, len(results) + 1)
        fig, ax_arr = plt.subplots(nrows=4,
                                   ncols=1,
                                   sharex=True,
                                   figsize=(5, 10))
        ax_arr[0].plot(x, resultpercentages)
        ax_arr[0].set_ylim(bottom=0)
        ax_arr[0].set_title('Win % (cumulative) = {:.2f}%'.format(
            resultpercentages[-1] * 100.))

        ax_arr[1].plot(x, scores)
        ax_arr[1].set_title('Score')

        ax_arr[2].plot(x, stepcounts)
        ax_arr[2].set_title('Actions per Game')

        ax_arr[3].plot(x, max_tiles)
        ax_arr[3].set_title('Max Tile Placed')

        ax_arr[3].set_xlabel("Game #")
        ax_arr[3].xaxis.set_major_locator(MaxNLocator(integer=True))

        fig.canvas.set_window_title("2048 Game Training Results")
        plt.get_current_fig_manager().window.state('zoomed')
        plt.show()

        return gamehistory