def action(self, state, player=1): if random.random() < self.epsilon: move = c4.getRandomMove(state) else: move = self.greedy(state) new_state = c4.makeMove(state, player, move) self.previous_state = new_state return move
def action(self, state): r = random.random() if r < self.epsilon: move = self.random(state) else: move = self.greedy(state) new_state = c4.makeMove(state, self.player, move) self.prevstate = self.state_string(new_state) self.prevscore = self.lookup(new_state) return move
def greedy(self, state): maxval = float('-inf') maxmove = None for i in range(7): if c4.isValidMove(state, i): new_state = c4.makeMove(state, self.player, i) val = self.lookup(new_state) if val > maxval: maxval = val maxmove = i self.backup(maxval) return maxmove
def greedy(self, state, player=1): max_value = float("-inf") next_move = None # TODO: implemen get_possible_moves in c4 for move in range(7): if c4.isValidMove(state, move): new_state = c4.makeMove(state, player, move) val = self.net.predict(c4.getNeuralInput(new_state).reshape(1, 126), batch_size=1) if val > max_value: max_value = val next_move = move self.backup(max_value) return next_move
def play(self): state = c4.getNewBoard() player = 1 while not c4.isBoardFull(state): if player == 1: move = self.action(state, player) else: move = c4.getRandomMove(state) state = c4.makeMove(state, player, move) if c4.isWinner(state, player): return player player *= -1 return 0.5 # draw
def play(agent1, agent2): state = empty_state() player = 1 while not c4.isBoardFull(state): if player > 0: move = agent1.action(state) else: move = agent2.action(state) state = c4.makeMove(state, player, move) winner = game_over(state) player *= -1 if winner != EMPTY: return winner return winner
def greedy(self, state, player=1): max_value = float("-inf") next_move = None # TODO: implemen get_possible_moves in c4 for move in range(7): if c4.isValidMove(state, move): new_state = c4.makeMove(state, player, move) val = self.net.predict(c4.getNeuralInput(new_state).reshape( 1, 126), batch_size=1) if val > max_value: max_value = val next_move = move self.backup(max_value) return next_move