def policy(self, game): with self._graph.as_default(): predictions = self._model.predict( -game.board().reshape(1, 15, 15, 1)) available = numpy.zeros((225, 1)) positions = util.list_positions(game.board(), renju.Player.NONE) for pos in positions: available[pos[0] * 15 + pos[1]] = 1 arr = predictions.T + available * 0.1 code_move = numpy.argmax(arr) print(self._name + ':', util.to_move([code_move // 15, code_move % 15])) return arr
def process_move(self, move): # check and record move, if not finished if not self.game.is_posible_move(move): self.board_canvas.print_message("Move is invalid!") return False self.board_canvas.place_move(move, self.game._player.__repr__()) self.board_canvas.print_message(self.game._player.__repr__() + " move is: " + util.to_move(move)) if not self.game.move(move): self.finish_game("Player " + self.game._player.__repr__() + " wins") return False return True
def choose_move(board): positions = util.list_positions(board, renju.Player.NONE) model = load_model('modelusual.h5') model_graph = tf.get_default_graph() model_graph_2 = tf.get_default_graph() model_2 = load_model('modelsuper.h5') SUMEKENOV = CnnAgent(color='black', name='SUMEKENOV', model=(model, model_graph)) SUMEKENOV_2 = CnnAgent(color='black', name='SUMEKENOV_2', model=(model_2, model_graph_2)) arr = SUMEKENOV.policy(game) arr_2 = SUMEKENOV_2.policy(game) arr_final = arr + arr_2 mv = numpy.argmax(arr_final, axis=1) return util.to_move(mv)
def policy(self, game): if (self.flag): newBoard = numpy.zeros((1, 15, 15, 1)) for i in range(15): for j in range(15): if (game.board()[i][j] == -1): newBoard[0][i][j][0] = 0.5 if (game.board()[i][j] == 1): newBoard[0][i][j][0] = 1 with self._graph.as_default(): predictions = self._model.predict(newBoard) available = numpy.zeros((225, 1)) positions = util.list_positions(game.board(), renju.Player.NONE) for pos in positions: available[pos[0] * 15 + pos[1]] = 1 arr = predictions.T * available code_move = numpy.argmax(arr) # maxmove = 0 # move = 0 # xy = 0 # for m in predictions[0]: # if ((m > maxmove) and (available[xy][0])): # move = xy # maxmove = m # xy += 1 # code_move = maxmove print(self._name + ':', util.to_move([code_move // 15, code_move % 15])) return arr else: move = input() pos = util.to_pos(move) probs = numpy.zeros(game.shape) probs[pos] = 1.0 return probs
def policy(self, game): if (self._color == 'black' and len( util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[142] = 1 return res.reshape((1, 225)) predictions = numpy.zeros((1, 225)) for model in self._models: with model[1].as_default(): predictions += model[0].predict( -game.board().reshape(1, 15, 15, 1)) available = numpy.zeros((225, 1)) positions = util.list_positions(game.board(), renju.Player.NONE) for pos in positions: available[pos[0] * 15 + pos[1]] = 1 arr = predictions.T + available code_move = numpy.argmax(arr) if (self._verbose): print(self._name + ':', util.to_move([code_move // 15, code_move % 15])) return arr
def policy(self, game): from time import time if (self._color == 'black' and len( util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[112] = 1 return res.reshape((1, 225)) board = numpy.copy(-game.board()) available = numpy.zeros(225) checker = numpy.zeros(225) for parsed_pos in util.list_positions(game.board(), renju.Player.NONE): pos = parsed_pos[0] * 15 + parsed_pos[1] parsed_pos = tuple(parsed_pos) board[parsed_pos] = 1 if (util.check(board, parsed_pos)): checker[pos] += 1 board[parsed_pos] = -1 if (util.check(board, parsed_pos)): checker[pos] += 1 board[parsed_pos] = 0 available[pos] = 1 start = time() with self._graph.as_default(): predictions = self._model.predict(board.reshape(1, 15, 15, 1))[0] arr = (predictions * available) * (1 + checker) code_move = numpy.argmax(arr) if (self._verbose): print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), time() - start) return arr.reshape(1, 225)
def policy(self, game): start = time() if (self._color == 'black' and len( util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[112] = 1 return res.reshape((1, 225)) predictions = 0 temp_high = 0 if self._color == 'black': with self._black_graph.as_default(): predictions = self._black_model.predict( -game.board().reshape(1, 15, 15, 1), batch_size=1, verbose=0)[0] else: with self._white_graph.as_default(): predictions = self._white_model.predict( -game.board().reshape(1, 15, 15, 1), batch_size=1, verbose=0)[0] res = numpy.zeros(225) n = numpy.zeros(225) def rollout(board, temp_high, color, checking_pos, max_high=self._high, gamma=self._gamma): temp_predictions = 0 if color == 'black': with self._black_graph.as_default(): temp_predictions = self._black_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] else: with self._white_graph.as_default(): temp_predictions = self._white_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] temp_pos = numpy.random.choice(225, p=temp_predictions) temp_parsed_pos = numpy.unravel_index(temp_pos, (15, 15)) if (board[temp_parsed_pos] != 0): if (temp_high % 2): res[checking_pos] += gamma**(temp_high - 1) else: res[checking_pos] -= gamma**(temp_high - 1) return if (util.check(board, temp_parsed_pos)): if (temp_high % 2): res[checking_pos] -= gamma**(temp_high - 1) else: res[checking_pos] += gamma**(temp_high - 1) return board[temp_parsed_pos] = 1 next_color = 'white' if color == 'white': next_color = 'black' board[temp_parsed_pos] = -1 if (temp_high < max_high): rollout(board, temp_high + 1, next_color, checking_pos) return i = 0 while (time() - start < self._timeout and i < self._samples): i += 1 pos = numpy.random.choice(225, p=predictions) parsed_pos = numpy.unravel_index(pos, (15, 15)) board = numpy.copy(-game.board()) n[pos] += 1 if (board[parsed_pos] != 0): res[pos] -= 1 continue if (util.check(board, parsed_pos)): res[pos] = res.max() + 1 break board[parsed_pos] = 1 color = 'white' if self._color == 'white': color = 'black' board[parsed_pos] = -1 rollout(board, 1, color, pos) values = (res + 10 * predictions) / (1 + n) + (res > 0.5 * n) if (self._verbose): code_move = numpy.argmax(values.reshape(1, 225)) print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \ 'working time:', time() - start, 'iterations:', i) #values -= values.mean() #values /= values.std() return values.reshape(1, 225)
def policy(self, game): if (self._color == 'black' and len( util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[142] = 1 return res.reshape((1, 225)) predictions = 0 temp_high = 0 if self._color == 'black': #positions = util.list_positions(game.board(), renju.Player.NONE) #if (len(positions) == 225): with self._black_graph.as_default(): predictions = self._black_model.predict( -game.board().reshape(1, 15, 15, 1), batch_size=1, verbose=0)[0] else: with self._white_graph.as_default(): predictions = self._white_model.predict( -game.board().reshape(1, 15, 15, 1), batch_size=1, verbose=0)[0] max_sum_log = -100000000000000 best_move = -1 results = dict() danger = set() def check_state_white(board, pos, checking_pos, temp_prob, temp_high, results): # print(board, pos) temp_high += 1 parsed_pos = numpy.unravel_index(pos, (15, 15)) #if (util.check(-board, parsed_pos)): #if self._color == 'white': #checking_pos = -1 #return #results[0] = checking_pos #results[1] = 0 #return if (temp_high > self._high): if (checking_pos not in danger): if (results[checking_pos] < temp_prob): results[checking_pos] = temp_prob return board[parsed_pos] = 1 white_predict = 0 with self._white_graph.as_default(): white_predict = self._white_model.predict( board.reshape(1, 15, 15, 1))[0] top_args_white = numpy.argsort(white_predict)[::-1][:self._width] for pos in top_args_white: if (util.check(-board, parsed_pos)): if (self._color == 'black'): danger.add(checking_pos) continue if checking_pos not in danger: check_state_black( numpy.copy(board), pos, checking_pos, temp_prob + numpy.log(white_predict[pos]), temp_high, results) def check_state_black(board, pos, checking_pos, temp_prob, temp_high, results): # print(board, pos) # global max_sum_log, best_move temp_high += 1 parsed_pos = numpy.unravel_index(pos, (15, 15)) #if (util.check(-board, parsed_pos)): # if self._color == 'black': #checking_pos = -1 #return #results[0] = checking_pos #results[1] = 0 #return if (temp_high > self._high): if (checking_pos not in danger): if (results[checking_pos] < temp_prob): results[checking_pos] = temp_prob return board[parsed_pos] = -1 black_predict = 0 with self._black_graph.as_default(): black_predict = self._black_model.predict( board.reshape(1, 15, 15, 1))[0] top_args_black = numpy.argsort(black_predict)[::-1][:self._width] for pos in top_args_black: if (util.check(-board, parsed_pos)): if (self._color == 'white'): danger.add(checking_pos) continue if checking_pos not in danger: check_state_white( numpy.copy(board), pos, checking_pos, temp_prob + numpy.log(black_predict[pos]), temp_high, results) top_args = numpy.argsort(predictions)[::-1][:self._width] # print(top_args) for pos in top_args: results[pos] = -100000000000 if (self._verbose): print(numpy.unravel_index(pos, (15, 15)), end=' - ') if (self._color == 'black'): check_state_white(numpy.copy(-game.board()), pos, pos, numpy.log(predictions[pos]), temp_high, results) else: check_state_black(numpy.copy(-game.board()), pos, pos, numpy.log(predictions[pos]), temp_high, results) max_sum_log, best_move = -100000000, -1 for move in results: if move not in danger: if max_sum_log < results[move]: max_sum_log = results[move] best_move = move res = numpy.zeros((225, 1)) res[best_move] = 1 code_move = numpy.argmax(res.reshape(1, 225)) if (self._verbose): print(self._name + ':', util.to_move([code_move // 15, code_move % 15])) return res.reshape(1, 225)
def policy(self, game): start = time() if (self._color == 'black' and len( util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[112] = 1 return res.reshape((1, 225)) predictions = 0 temp_high = 0 checker = numpy.zeros(225) available = numpy.zeros(225) board = numpy.copy(-game.board()) for parsed_pos in util.list_positions(game.board(), renju.Player.NONE): pos = parsed_pos[0] * 15 + parsed_pos[1] parsed_pos = tuple(parsed_pos) board[parsed_pos] = 1 if (util.check(board, parsed_pos)): checker[pos] += 1 board[parsed_pos] = -1 if (util.check(board, parsed_pos)): checker[pos] += 1 board[parsed_pos] = 0 available[pos] = 1 if self._color == 'black': with self._black_graph.as_default(): predictions = self._black_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] else: with self._white_graph.as_default(): predictions = self._white_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] logs = numpy.zeros(225, dtype=numpy.float32) res = numpy.zeros(225, dtype=numpy.float32) n = numpy.zeros(225, dtype=numpy.float32) def rollout(board, temp_high, color, checking_pos, sum_logs, max_high=self._high, gamma=self._gamma, fine=self._fine, bonus=self._bonus): temp_predictions = 0 if color == 'black': with self._black_graph.as_default(): temp_predictions = self._black_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] else: with self._white_graph.as_default(): temp_predictions = self._white_model.predict(board.reshape( 1, 15, 15, 1), batch_size=1, verbose=0)[0] temp_pos = numpy.random.choice(225, p=temp_predictions) temp_parsed_pos = numpy.unravel_index(temp_pos, (15, 15)) if (board[temp_parsed_pos] != 0): if (temp_high % 2 == 0): res[checking_pos] += fine * gamma**(temp_high - 1) else: res[checking_pos] -= bonus * gamma**(temp_high - 1) return board[temp_parsed_pos] = 1 next_color = 'white' if color == 'white': next_color = 'black' board[temp_parsed_pos] = -1 if (util.check(board, temp_parsed_pos)): if (temp_high % 2 == 0): res[checking_pos] -= fine * gamma**(temp_high - 1) else: res[checking_pos] += bonus * gamma**(temp_high - 1) return if (temp_high < max_high): rollout(board, temp_high + 1, next_color, checking_pos, sum_logs + numpy.log(temp_predictions[temp_pos])) else: if logs[checking_pos]: logs[checking_pos] = max( logs[checking_pos], sum_logs + numpy.log(temp_predictions[temp_pos])) else: logs[checking_pos] = sum_logs + numpy.log( temp_predictions[temp_pos]) return i = 0 while (time() - start < self._timeout and i < self._samples): i += 1 pos = numpy.random.choice(225, p=predictions) parsed_pos = numpy.unravel_index(pos, (15, 15)) n[pos] += 1 if (board[parsed_pos] != 0): res[pos] -= 1 continue board[parsed_pos] = 1 color = 'white' if self._color == 'white': color = 'black' board[parsed_pos] = -1 if (util.check(board, parsed_pos)): res[pos] += res.max() + 1000 break rollout(numpy.copy(board), 1, color, pos, numpy.log(predictions[pos])) board[parsed_pos] = 0 for indx in numpy.where(logs == 0.0): logs[indx] = logs.min() - 100.0 value = (logs) * available * (1 + checker) #print(value) value /= value.sum() if numpy.max(value) > 0.8: code_move = numpy.argmax(value) else: if (self._verbose): print('----!!!----') code_move = numpy.argmax((n + res) * available * (1 + checker)) ans = numpy.zeros(225) ans[code_move] = 1 if (self._verbose): for elem in numpy.where(res != 0)[0]: print(util.to_move(numpy.unravel_index(int(elem), (15, 15))), 'R:', res[int(elem)], 'V:', value[int(elem)], 'N:', n[int(elem)]) # code_move = numpy.argmax(values.reshape(1, 225)) print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \ 'working time:', time() - start, 'iterations:', i) return ans.reshape(1, 225)
def policy(self, game): self._start_time = time() if not self._color: if ((225 - len(util.list_positions(game.board(), renju.Player.NONE))) % 2 == 1): self._color = 'white' else: self._color = 'black' if (len(util.list_positions(game.board(), renju.Player.NONE)) == 225): res = numpy.zeros((225, 1)) res[112] = 1 return res.reshape((1, 225)) done = True if (self._root and len(util.list_positions(game.board(), renju.Player.NONE)) < 224): to_do_moves = game._positions[-2:] for move in to_do_moves: if not self.make_move(move[0] * 15 + move[1]): done = False else: done = False if self._verbose: print(done) checker = np.zeros(225) available = numpy.zeros(225) self._board = np.copy(-game.board()) for parsed_pos in util.list_positions(self._board, renju.Player.NONE): pos = parsed_pos[0] * 15 + parsed_pos[1] parsed_pos = tuple(parsed_pos) self._board[parsed_pos] = 1 if (util.check(self._board, parsed_pos)): checker[pos] += 1 self._board[parsed_pos] = -1 if (util.check(self._board, parsed_pos)): checker[pos] += 1 self._board[parsed_pos] = 0 available[pos] = 1 if not done: self._root = Node(self._board, self._color, self._node_model_black, self._node_model_white) self.tree_search() values = (self._root._N > self._iters / 5) * self._root._R / (1 + self._root._N) * available * (1 + 10 * checker) if np.max(values) > self._min_prob: code_move = np.argmax(values) else: code_move = np.argmax((self._root._N) * available * (1 + 10 * checker)) res = np.zeros(225) res[code_move] = 1 if (self._verbose): for elem in np.where(self._root._R != 0)[0]: try: print(util.to_move(numpy.unravel_index(int(elem), (15, 15))), 'R:', self._root._R[int(elem)], 'V:', values[int(elem)]) except: print(np.where(self._root._R != 0)) continue print(self._name + ':', util.to_move([code_move // 15, code_move % 15]), \ 'working time:', time() - self._start_time, 'iterations:', self._iters) return res.reshape(1, 225)