def _go_to_child(self, parent, child_index): if debug: print "go_to @", self._stack_string() move = naf.policy_index_point(self.game.turn, child_index) subnode = parent.subnodes[child_index] self.game.play(move) parent.Ns[child_index] += 1 if subnode: gc_index = self._pick_expand_index(subnode) if gc_index == "wait": parent.Ns[child_index] -= 1 r = 0 elif gc_index == "lose": subnode.proven = True subnode.winning_index = None subnode.score = -1 self.finished_leves.append(subnode) r = 1 else: assert gc_index >= 0 r = self._go_to_child(subnode, gc_index) else: parent.subnodes[child_index] = self._expand_leaf( parent, child_index) r = 1 self.game.undo() return r
def pick_move(self, game): if self.use_swap and len(game.history) < 2: if len(game.history) == 0: self.report = "swapmodel" return swapmodel.choose_first_move() elif swapmodel.want_swap(game.history[0]): self.report = "swapmodel" return "swap" # else didn't want swap so compute a regular move N = self.nm.mcts(game, self.num_trials) self.report = self.nm.report # When a forcing win or forcing draw move is found, there's no policy # array returned if isinstance(N, (str, twixt.Point)): return N if self.temperature == 0.0: mx = N.max() weights = numpy.where(N == mx, 1.0, 0.0) elif self.temperature == 1.0: weights = N elif self.temperature == 0.5: weights = N**2 if self.verbosity >= 2: print "weights=", weights index = numpy.random.choice(numpy.arange(len(weights)), p=weights / weights.sum()) return naf.policy_index_point(game, index), N
def _top_moves_str(self): indices = numpy.argsort(self.root.P[self.root.LMnz]) pts = [ str(naf.policy_index_point(self.game, self.root.LMnz[0][index])) for index in indices[-3:] ] return ":" + ",".join(pts)
def show_game_with_p(game, P): fig, ax = plt.subplots() D = numpy.zeros((SIZE, SIZE)) for i, p in enumerate(P): point = naf.policy_index_point(game, i) print point, p D[point.y, point.x] = p ax.imshow(D) game_to_axes(game, ax) plt.show()
def _node_to_pv(self, node, color): descend_index = None if node.winning_index is not None: descend_index = node.winning_index else: nmax = node.Nf.max() if nmax < 2: return [] Nfnz = (node.Nf == nmax).nonzero() i1 = node.Q[Nfnz].argmax() i2 = Nfnz[0][i1] descend_index = i2 move = naf.policy_index_point(color, descend_index) sub = self._node_to_pv(node.subnodes[descend_index], 1 - color) sub.insert(0, str(move)) return sub
def mcts(self, game, trials): """ Using the neural net, compute the move visit count vector """ self.compute_root(game) if self.root == None: self.root = self.expand_leaf(game) self.history_at_root = list(game.history) if self.verbosity >= 1: top_ixs = numpy.argsort(self.root.P)[-5:] print "eval=%.3f %s" % (self.root.score, " ".join([ "%s:%d" % (naf.policy_index_point( game, ix), int(self.root.P[ix] * 10000 + 0.5)) for ix in top_ixs ])) if not self.root.proven: for i in range(trials): assert not self.root.proven self.visit_node(game, self.root, True, trials - i) if self.root.proven: break if self.root.proven: if self.root.winning_move: self.report = "fwin" + self.top_moves_str(game) return self.root.winning_move elif self.root.drawing_move: self.report = "fdraw" + self.top_moves_str(game) return self.root.drawing_move else: self.report = "flose" + self.top_moves_str(game) return "resign" if self.verbosity >= 2: print "N=", self.root.N print "Q=", self.root.Q self.report = "%6.3f" % (self.root.Q[numpy.argmax( self.root.N)]) + self.top_moves_str(game) return self.root.N
type=str, nargs='+', help='a file with selfplay binary logs') args = parser.parse_args() for p in args.positions: colon = p.find(':') if colon == -1: raise Exception("Required position format <file>:<index>") filename = p[:colon] index = int(p[colon + 1:]) f = open(filename, "rb") f.seek(index * naf.LearningState.NUM_BYTES) b = f.read(naf.LearningState.NUM_BYTES) ts = naf.LearningState.from_bytes(b) print "evaluation: %d" % (ts.z), for i in range(len(ts.N)): if i % 8 == 0: print print "%3s:%-4d" % (naf.policy_index_point(twixt.Game.WHITE, i), ts.N[i]), print if args.display: import ui tb = ui.TwixtBoardWindow(p) tb.set_naf(ts.naf) tb.win.getMouse()
def pick_move(self, game): if len(game.history) == 0: self.report = "swapmodel" return swapmodel.choose_first_move() elif len(game.history) == 1 and self.use_swap: if swapmodel.want_swap(game.history[0]): self.report = "swapmodel" return "swap" # If we get here, it is move 2+ and either we cannot or do not swap. self.game = game self._preload_root() self.finished_leaves = [] self.leaves_waiting = 0 self.num_evals = 0 if self.root == None: self.root = self._expand_leaf(None, None) self.history_at_root = list(self.game.history) self._block_until_reply() num_evals = 1 # Let's start thinkin'! request_block = False while not self.root.proven and self.num_evals < self.trials: self._process_incoming_data(request_block) request_block = False self._process_finished_leaves() if self.root.proven: break ei = self._pick_root_expand_index(self.trials - self.num_evals) if ei >= 0: r = self._go_to_child(self.root, ei) if r == 0: assert self.leaves_waiting > 0 request_block = True elif self.leaves_waiting == 0: # no moves worth evaluating; means we lost. self.root.proven = True else: request_block = True # Okay, done thinkin'. flush out any waiting kids while self.leaves_waiting > 0: self._block_until_reply() self._process_finished_leaves() if self.root.proven: if self.root.winning_index is not None: self.report = "fwin" return naf.policy_index_point(game, self.root.winning_index) else: self.report = "flose" return "resign" assert numpy.array_equal(self.root.Nf, self.root.Ns), (self.root.Nf.sum(), self.root.Ns.sum()) good_moves = numpy.where(self.root.Nf == self.root.Nf.max()) index = random.choice(good_moves[0]) move = naf.policy_index_point(game, index) self.report = "%6.3f %s" % (self.root.Q[index], self._principal_var_str()) return move
nips.rotate(ROT) pw, ml = ne.eval_one(nips) ml = naf.rotate_policy_array(ml[0,:], ROT) print "pwin=",pw LM = naf.legal_move_policy_array(game) LMnz = LM.nonzero() max_ml = ml[LMnz].max() el = numpy.exp(ml - max_ml) divisor = el[LMnz].sum() P = el / divisor # print P P[(1-LM).nonzero()] = 0 inds = numpy.argsort(P) for idx, i in enumerate(reversed(inds[-40:])): coord = naf.policy_index_point(game, i) possible_moves.append(coord) if idx<10: print "%3s %6.2f" % (str(coord), P[i]*100.0) #import mpui #mpui.show_game_with_p(game, P) if args.thinker: thinker = twixt.get_thinker(args.thinker, resources) tup = thinker.pick_move(game) if type(tup) == tuple: m, n = thinker.pick_move(game) else : m = tup
def visit_node(self, game, node, top=False, trials_left=-1): """ Visit a node, return the evaluation from the point of view of the player currently to play. """ assert not game.just_won() if not node.LM.any(): self.proven = True if node.drawing_move: self.score = 0 else: # all moves lose. very sad. self.score = -1 return self.score if top and self.smart_root: maxn = node.N[node.LMnz].max() winnables = (node.N > maxn - trials_left) & node.LM num_winnables = numpy.count_nonzero(winnables) assert num_winnables > 0, (maxn, trials_left, numpy.array_str(node.N), numpy.array_str(node.LM)) if num_winnables == 1: index = winnables.argmax() else: maxes = (node.N == maxn) num_maxes = numpy.count_nonzero(maxes) assert num_maxes > 0 if num_maxes == 1: winnables[maxes.argmax()] = 0 nsum = node.N.sum() # don't need to filter since all are 0 stv = math.sqrt(nsum + 1.0) U = node.Q + node.P * stv / (1.0 + node.N) wnz = numpy.nonzero(winnables) nz_index = U[wnz].argmax() index = wnz[0][nz_index] else: # At least one node worth visiting. Figure out which one to visit... nsum = node.N.sum() # don't need to filter since all are 0 stv = math.sqrt(nsum + 1.0) U = node.Q + node.P * stv / (1.0 + node.N) nz_index = U[node.LMnz].argmax() index = node.LMnz[0][nz_index] move = naf.policy_index_point(game.turn, index) if top and self.verbosity >= 3: for i in range(len(U)): m = naf.policy_index_point(game.turn, i) if top and self.verbosity > 1: print "selecting index=%d move=%s Q=%.3f P=%.5f N=%d" % ( index, str(move), node.Q[index], node.P[index], node.N[index]) subnode = node.subnodes[index] game.play(move) if subnode: subscore = -self.visit_node(game, subnode) else: subnode = self.expand_leaf(game) node.subnodes[index] = subnode subscore = -subnode.score game.undo() node.N[index] += 1 if subnode.proven: node.Q[index] = subscore node.LM[index] = 0 node.LMnz = node.LM.nonzero() if subscore == 1: node.proven = True node.winning_move = move elif subscore == 0: node.drawing_move = move else: oldq = node.Q[index] node.Q[index] += (subscore - node.Q[index]) / node.N[index] return subscore