def run_sim(turn): # simulate game forward arch.sess.run(arch.session_backup) pu.session_backup() for sim in range(N_SIM): # backup then make next move for turn_sim in range(turn, N_TURNS+1): for player in [0,1]: # get valid moves, network policy and value estimates: valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol, arch.val], feed_dict=ret_d(player)) # backup visit Q values if turn_sim != turn: pu.backup_visit(player, val) pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree to_coords = pu.choose_moves(player, pol, CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree) pu.register_mv(player, to_coords) # register move in tree arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars) # backup terminal state for player in [0,1]: winner = arch.sess.run(arch.winner, feed_dict=ret_d(player)) pu.backup_visit(player, winner) # return move back to previous node in tree arch.sess.run(arch.session_restore) pu.session_restore()
def run_sim(turn, starting_player, scopes=['main', 'main']): arch.sess.run(arch.session_backup) pu.session_backup() for sim in range(N_SIM): # backup then make next move for turn_sim in range(turn, N_TURNS + 1): for player, s in zip([0, 1], scopes): if turn_sim == turn and starting_player == 1 and player == 0: # skip player 0, has already moved continue # get valid moves, network policy and value estimates: valid_mv_map, pol, val = arch.sess.run( [arch.valid_mv_map, arch.pol[s], arch.val[s]], feed_dict=ret_d(player)) # backup visit Q values if turn_sim != turn: pu.backup_visit(player, np.array(val, dtype='single')) pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree to_coords = pu.choose_moves( player, np.array(pol, dtype='single'), CPUCT )[0] # choose moves based on policy and Q values (latter of which already stored in tree) pu.register_mv(player, np.array( to_coords, dtype='int32')) # register move in tree arch.sess.run(arch.move_frm_inputs, feed_dict={ arch.moving_player: player, arch.to_coords_input: to_coords }) # move network (update GPU vars) ############ backup terminal state winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single') # update tree with values (outcomes) of each game) pu.backup_visit(0, winner) pu.backup_visit(1, -winner) # return move back to previous node in tree arch.sess.run(arch.session_restore) # reset gpu game state pu.session_restore() # reset cpu tree state
def nn_mv(): global Q_map, P_map, visit_count_map global Q_map_next, P_map_next, visit_count_map_next t_start = time.time() arch.sess.run(arch.session_backup) pu.init_tree() pu.session_backup() if run_net: if turn == 0: arch.sess.run(arch.nn_prob_move_unit_valid_mvs, feed_dict=ret_d(0)) else: arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs, feed_dict=ret_d(0)) Q_map, P_map, visit_count_map = ret_stats(0) else: for sim in range(N_SIM): # initial moves for player in [0, 1]: valid_mv_map, pol = arch.sess.run( [arch.valid_mv_map, arch.pol], feed_dict=ret_d(player)) pu.add_valid_mvs(player, valid_mv_map) to_coords = pu.choose_moves(player, pol, CPUCT)[0] pu.register_mv(player, to_coords) arch.sess.run(arch.move_frm_inputs, feed_dict={ arch.moving_player: player, arch.to_coords_input: to_coords }) # backup then make next move for turn_sim in range(turn, N_TURNS): for player in [0, 1]: valid_mv_map, pol, val = arch.sess.run( [arch.valid_mv_map, arch.pol, arch.val], feed_dict=ret_d(player)) pu.backup_visit(player, val) pu.add_valid_mvs(player, valid_mv_map) to_coords = pu.choose_moves(player, pol, CPUCT)[0] pu.register_mv(player, to_coords) arch.sess.run(arch.move_frm_inputs, feed_dict={ arch.moving_player: player, arch.to_coords_input: to_coords }) # backup terminal state for player in [0, 1]: winner = arch.sess.run(arch.winner, feed_dict=ret_d(player)) pu.backup_visit(player, winner) arch.sess.run(arch.session_restore) pu.session_restore() if sim % 20 == 0: '''Q_map, P_map, visit_count_map = ret_stats(0) arch.sess.run(arch.tree_det_move_unit, feed_dict = ret_d(0)) Q_map_next, P_map_next, visit_count_map_next = ret_stats(1) arch.sess.run(arch.session_restore) pu.session_restore() draw(True) pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start)) ''' print 'simulation', sim, 'total elapsed time', time.time( ) - t_start ### make move Q_map, P_map, visit_count_map = ret_stats(0) valid_mv_map, pol = arch.sess.run( [arch.imgs, arch.valid_mv_map, arch.pol], feed_dict=ret_d(0))[1:] ######### pu.add_valid_mvs(player, valid_mv_map) visit_count_map = pu.choose_moves(player, pol, CPUCT)[-1] to_coords = arch.sess.run( [arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={ arch.moving_player: 0, arch.visit_count_map: visit_count_map, arch.dir_pre: dir_pre, arch.dir_a: DIR_A })[0] pu.register_mv(player, to_coords) pu.prune_tree() print time.time() - t_start return arch.sess.run(arch.gm_vars['board'])[0]
def session_restore(): # restore tree state arch.sess.run(arch.session_restore) pu.session_restore()
def nn_mv(): global Q_map, P_map, visit_count_map, valid_mv_map, pol global Q_map_next, P_map_next, visit_count_map_next, to_coords t_start = time.time() arch.sess.run(arch.session_backup) #### make most probable mv, do not use tree search if run_one_pass_only: # 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined) d = ret_d(NET_PLAYER) imgs = arch.sess.run(arch.imgs, feed_dict=d) d[arch.imgs32] = np.asarray(imgs, dtype='float') pol = arch.sess.run(arch.pol[net], feed_dict=d) d = ret_d(NET_PLAYER) d[arch.pol['eval']] = pol if turn == 0: arch.sess.run(arch.nn_prob_move_unit_valid_mvs['eval'], feed_dict=d) else: arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs['eval'], feed_dict=d) #Q_map, P_map, visit_count_map = ret_stats(0) ##### use tree search else: #pu.init_tree() pu.session_backup() sim = 0 # each loop is one simulation while True: if ((time.time() - t_start) > TIME_MIN) and (sim >= SIM_MIN): break # backup then make next move # (this loop, iterates over one full game-play from present turn) for turn_sim in range(turn, np.max( (N_TURNS + 1, turn + TURN_MIN))): for player in [0, 1]: if turn_sim == turn and human_player( ) == 0 and player == 0: # skip player 0 (human), has already moved continue # get valid moves, network policy and value estimates: valid_mv_map, pol, val = arch.sess.run( [arch.valid_mv_map, arch.pol[net], arch.val[net]], feed_dict=ret_d(player)) # backup visit Q values if turn_sim != turn: pu.backup_visit(player, np.array(val, dtype='single')) pu.add_valid_mvs( player, valid_mv_map) # register valid moves in tree to_coords = pu.choose_moves( player, np.array(pol, dtype='float32'), CPUCT )[0] # choose moves based on policy and Q values (latter of which already stored in tree) pu.register_mv(player, np.array( to_coords, dtype='int32')) # register move in tree arch.sess.run(arch.move_frm_inputs, feed_dict={ arch.moving_player: player, arch.to_coords_input: to_coords }) # move network (update GPU vars) # backup terminal state winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single') pu.backup_visit(0, winner) pu.backup_visit(1, -winner) # return move to previous node in tree arch.sess.run(arch.session_restore) # reset gpu game state pu.session_restore() # reset cpu tree state ###################### # print stats from tree if sim % 20 == 0: # get valid moves, network policy and value estimates: valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d(NET_PLAYER))[1] pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # register valid moves in tree visit_count_map_128 = pu.choose_moves( NET_PLAYER, np.array(pol, dtype='float32'), CPUCT )[-1] # to feed back into tf (entries for all 128 games, not just 1) Q_map, P_map, visit_count_map = ret_stats( NET_PLAYER) # stats we will show on screen # move network where it is estimates is its best move to_coords = arch.sess.run( [ arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net] ], feed_dict={ arch.moving_player: NET_PLAYER, arch.pol[net]: visit_count_map_128 })[0] pu.register_mv(NET_PLAYER, np.asarray( to_coords, dtype='int32')) # register move in tree arch.sess.run(arch.move_frm_inputs, feed_dict={ arch.moving_player: NET_PLAYER, arch.to_coords_input: to_coords }) # move network (update GPU vars) # get network tree estimates as to where it thinks you will move after it moves valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d( human_player()))[1] pu.add_valid_mvs(human_player(), valid_mv_map) # register valid moves in tree Q_map_next, P_map_next, visit_count_map_next = ret_stats( human_player()) arch.sess.run( arch.session_restore) # restore prior tf game state pu.session_restore() # restore prior tree draw(True) pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start)) print 'simulation: ', sim, ' (%i sec)' % (time.time() - t_start) sim += 1 ### make move # first get valid moves and current policy at board position valid_mv_map, pol = arch.sess.run( [arch.imgs, arch.valid_mv_map, arch.pol[net]], feed_dict=ret_d(NET_PLAYER))[1:] pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # set in tree visit_count_map_128 = pu.choose_moves( NET_PLAYER, np.array(pol, dtype='float32'), CPUCT )[-1] # to feed back into tf (entries for all 128 games, not just 1) Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER) # makes moves as if this were still part of the self-play (max visit count) #to_coords = arch.sess.run([arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={arch.moving_player: 0, # arch.visit_count_map: visit_count_map})[0] # move to max visited node: #if turn != 0: to_coords = arch.sess.run([ arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net] ], feed_dict={ arch.moving_player: NET_PLAYER, arch.pol[net]: visit_count_map_128 })[0] # randomly move proportionatly to vist counts #else: # to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: 0, # arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts pu.register_mv(NET_PLAYER, np.array(to_coords, dtype='int32')) print 'pruning...' pu.prune_tree( 1) # 0: prune all games in batch, 1: prune only first game print time.time() - t_start print 'finished' return arch.sess.run(arch.gm_vars['board'])[0]