Пример #1
0
def run_sim(turn): # simulate game forward
	arch.sess.run(arch.session_backup)
	pu.session_backup()

	for sim in range(N_SIM):
		# backup then make next move
		for turn_sim in range(turn, N_TURNS+1):
			for player in [0,1]:
				# get valid moves, network policy and value estimates:
				valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol, arch.val], feed_dict=ret_d(player))

				# backup visit Q values
				if turn_sim != turn:
					pu.backup_visit(player, val)

				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
				to_coords = pu.choose_moves(player, pol, CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree)
				pu.register_mv(player, to_coords) # register move in tree

				arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars)
		
		# backup terminal state
		for player in [0,1]:
			winner = arch.sess.run(arch.winner, feed_dict=ret_d(player))
			pu.backup_visit(player, winner)
		
		# return move back to previous node in tree
		arch.sess.run(arch.session_restore)
		pu.session_restore()
Пример #2
0
def run_sim(turn, starting_player, scopes=['main', 'main']):
    arch.sess.run(arch.session_backup)
    pu.session_backup()

    for sim in range(N_SIM):
        # backup then make next move
        for turn_sim in range(turn, N_TURNS + 1):
            for player, s in zip([0, 1], scopes):
                if turn_sim == turn and starting_player == 1 and player == 0:  # skip player 0, has already moved
                    continue

                # get valid moves, network policy and value estimates:
                valid_mv_map, pol, val = arch.sess.run(
                    [arch.valid_mv_map, arch.pol[s], arch.val[s]],
                    feed_dict=ret_d(player))

                # backup visit Q values
                if turn_sim != turn:
                    pu.backup_visit(player, np.array(val, dtype='single'))

                pu.add_valid_mvs(player,
                                 valid_mv_map)  # register valid moves in tree
                to_coords = pu.choose_moves(
                    player, np.array(pol, dtype='single'), CPUCT
                )[0]  # choose moves based on policy and Q values (latter of which already stored in tree)
                pu.register_mv(player, np.array(
                    to_coords, dtype='int32'))  # register move in tree

                arch.sess.run(arch.move_frm_inputs,
                              feed_dict={
                                  arch.moving_player: player,
                                  arch.to_coords_input: to_coords
                              })  # move network (update GPU vars)

        ############ backup terminal state
        winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)),
                          dtype='single')

        # update tree with values (outcomes) of each game)
        pu.backup_visit(0, winner)
        pu.backup_visit(1, -winner)

        # return move back to previous node in tree
        arch.sess.run(arch.session_restore)  # reset gpu game state
        pu.session_restore()  # reset cpu tree state
def nn_mv():
    global Q_map, P_map, visit_count_map
    global Q_map_next, P_map_next, visit_count_map_next

    t_start = time.time()
    arch.sess.run(arch.session_backup)
    pu.init_tree()
    pu.session_backup()

    if run_net:
        if turn == 0:
            arch.sess.run(arch.nn_prob_move_unit_valid_mvs, feed_dict=ret_d(0))
        else:
            arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs,
                          feed_dict=ret_d(0))

        Q_map, P_map, visit_count_map = ret_stats(0)
    else:
        for sim in range(N_SIM):
            # initial moves
            for player in [0, 1]:
                valid_mv_map, pol = arch.sess.run(
                    [arch.valid_mv_map, arch.pol], feed_dict=ret_d(player))

                pu.add_valid_mvs(player, valid_mv_map)
                to_coords = pu.choose_moves(player, pol, CPUCT)[0]
                pu.register_mv(player, to_coords)

                arch.sess.run(arch.move_frm_inputs,
                              feed_dict={
                                  arch.moving_player: player,
                                  arch.to_coords_input: to_coords
                              })
            # backup then make next move
            for turn_sim in range(turn, N_TURNS):
                for player in [0, 1]:
                    valid_mv_map, pol, val = arch.sess.run(
                        [arch.valid_mv_map, arch.pol, arch.val],
                        feed_dict=ret_d(player))

                    pu.backup_visit(player, val)

                    pu.add_valid_mvs(player, valid_mv_map)
                    to_coords = pu.choose_moves(player, pol, CPUCT)[0]
                    pu.register_mv(player, to_coords)

                    arch.sess.run(arch.move_frm_inputs,
                                  feed_dict={
                                      arch.moving_player: player,
                                      arch.to_coords_input: to_coords
                                  })

            # backup terminal state
            for player in [0, 1]:
                winner = arch.sess.run(arch.winner, feed_dict=ret_d(player))
                pu.backup_visit(player, winner)

            arch.sess.run(arch.session_restore)
            pu.session_restore()

            if sim % 20 == 0:
                '''Q_map, P_map, visit_count_map = ret_stats(0)
				arch.sess.run(arch.tree_det_move_unit, feed_dict = ret_d(0))
				Q_map_next, P_map_next, visit_count_map_next = ret_stats(1)

				arch.sess.run(arch.session_restore)
				pu.session_restore()

				draw(True)
				pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start))
				'''
                print 'simulation', sim, 'total elapsed time', time.time(
                ) - t_start

        ### make move
        Q_map, P_map, visit_count_map = ret_stats(0)

        valid_mv_map, pol = arch.sess.run(
            [arch.imgs, arch.valid_mv_map, arch.pol], feed_dict=ret_d(0))[1:]

        #########
        pu.add_valid_mvs(player, valid_mv_map)
        visit_count_map = pu.choose_moves(player, pol, CPUCT)[-1]

        to_coords = arch.sess.run(
            [arch.tree_det_visit_coord, arch.tree_det_move_unit],
            feed_dict={
                arch.moving_player: 0,
                arch.visit_count_map: visit_count_map,
                arch.dir_pre: dir_pre,
                arch.dir_a: DIR_A
            })[0]

        pu.register_mv(player, to_coords)

        pu.prune_tree()
        print time.time() - t_start

    return arch.sess.run(arch.gm_vars['board'])[0]
def nn_mv():
    global Q_map, P_map, visit_count_map, valid_mv_map, pol
    global Q_map_next, P_map_next, visit_count_map_next, to_coords

    t_start = time.time()
    arch.sess.run(arch.session_backup)

    #### make most probable mv, do not use tree search
    if run_one_pass_only:
        # 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined)
        d = ret_d(NET_PLAYER)
        imgs = arch.sess.run(arch.imgs, feed_dict=d)
        d[arch.imgs32] = np.asarray(imgs, dtype='float')
        pol = arch.sess.run(arch.pol[net], feed_dict=d)
        d = ret_d(NET_PLAYER)
        d[arch.pol['eval']] = pol

        if turn == 0:
            arch.sess.run(arch.nn_prob_move_unit_valid_mvs['eval'],
                          feed_dict=d)
        else:
            arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs['eval'],
                          feed_dict=d)

        #Q_map, P_map, visit_count_map = ret_stats(0)

    ##### use tree search
    else:
        #pu.init_tree()
        pu.session_backup()

        sim = 0
        # each loop is one simulation
        while True:
            if ((time.time() - t_start) > TIME_MIN) and (sim >= SIM_MIN):
                break

            # backup then make next move
            # (this loop, iterates over one full game-play from present turn)
            for turn_sim in range(turn, np.max(
                (N_TURNS + 1, turn + TURN_MIN))):
                for player in [0, 1]:
                    if turn_sim == turn and human_player(
                    ) == 0 and player == 0:  # skip player 0 (human), has already moved
                        continue

                    # get valid moves, network policy and value estimates:
                    valid_mv_map, pol, val = arch.sess.run(
                        [arch.valid_mv_map, arch.pol[net], arch.val[net]],
                        feed_dict=ret_d(player))

                    # backup visit Q values
                    if turn_sim != turn:
                        pu.backup_visit(player, np.array(val, dtype='single'))

                    pu.add_valid_mvs(
                        player, valid_mv_map)  # register valid moves in tree
                    to_coords = pu.choose_moves(
                        player, np.array(pol, dtype='float32'), CPUCT
                    )[0]  # choose moves based on policy and Q values (latter of which already stored in tree)

                    pu.register_mv(player, np.array(
                        to_coords, dtype='int32'))  # register move in tree
                    arch.sess.run(arch.move_frm_inputs,
                                  feed_dict={
                                      arch.moving_player: player,
                                      arch.to_coords_input: to_coords
                                  })  # move network (update GPU vars)

            # backup terminal state
            winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)),
                              dtype='single')
            pu.backup_visit(0, winner)
            pu.backup_visit(1, -winner)

            # return move to previous node in tree
            arch.sess.run(arch.session_restore)  # reset gpu game state
            pu.session_restore()  # reset cpu tree state

            ######################
            # print stats from tree
            if sim % 20 == 0:
                # get valid moves, network policy and value estimates:
                valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map],
                                             feed_dict=ret_d(NET_PLAYER))[1]
                pu.add_valid_mvs(NET_PLAYER,
                                 valid_mv_map)  # register valid moves in tree

                visit_count_map_128 = pu.choose_moves(
                    NET_PLAYER, np.array(pol, dtype='float32'), CPUCT
                )[-1]  # to feed back into tf (entries for all 128 games, not just 1)
                Q_map, P_map, visit_count_map = ret_stats(
                    NET_PLAYER)  # stats we will show on screen

                # move network where it is estimates is its best move
                to_coords = arch.sess.run(
                    [
                        arch.nn_max_prob_to_coords_valid_mvs[net],
                        arch.nn_max_prob_move_unit_valid_mvs[net]
                    ],
                    feed_dict={
                        arch.moving_player: NET_PLAYER,
                        arch.pol[net]: visit_count_map_128
                    })[0]

                pu.register_mv(NET_PLAYER, np.asarray(
                    to_coords, dtype='int32'))  # register move in tree
                arch.sess.run(arch.move_frm_inputs,
                              feed_dict={
                                  arch.moving_player: NET_PLAYER,
                                  arch.to_coords_input: to_coords
                              })  # move network (update GPU vars)

                # get network tree estimates as to where it thinks you will move after it moves
                valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map],
                                             feed_dict=ret_d(
                                                 human_player()))[1]
                pu.add_valid_mvs(human_player(),
                                 valid_mv_map)  # register valid moves in tree

                Q_map_next, P_map_next, visit_count_map_next = ret_stats(
                    human_player())

                arch.sess.run(
                    arch.session_restore)  # restore prior tf game state
                pu.session_restore()  # restore prior tree

                draw(True)
                pygame.display.set_caption('%i %2.1f' %
                                           (sim, time.time() - t_start))

                print 'simulation: ', sim, ' (%i sec)' % (time.time() -
                                                          t_start)

            sim += 1

        ### make move

        # first get valid moves and current policy at board position
        valid_mv_map, pol = arch.sess.run(
            [arch.imgs, arch.valid_mv_map, arch.pol[net]],
            feed_dict=ret_d(NET_PLAYER))[1:]
        pu.add_valid_mvs(NET_PLAYER, valid_mv_map)  # set in tree

        visit_count_map_128 = pu.choose_moves(
            NET_PLAYER, np.array(pol, dtype='float32'), CPUCT
        )[-1]  # to feed back into tf (entries for all 128 games, not just 1)
        Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER)

        # makes moves as if this were still part of the self-play (max visit count)
        #to_coords = arch.sess.run([arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={arch.moving_player: 0,
        #				arch.visit_count_map: visit_count_map})[0]

        # move to max visited node:
        #if turn != 0:
        to_coords = arch.sess.run([
            arch.nn_max_prob_to_coords_valid_mvs[net],
            arch.nn_max_prob_move_unit_valid_mvs[net]
        ],
                                  feed_dict={
                                      arch.moving_player: NET_PLAYER,
                                      arch.pol[net]: visit_count_map_128
                                  })[0]

        # randomly move proportionatly to vist counts
        #else:
        #	to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: 0,
        #			arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts

        pu.register_mv(NET_PLAYER, np.array(to_coords, dtype='int32'))

        print 'pruning...'
        pu.prune_tree(
            1)  # 0: prune all games in batch, 1: prune only first game
        print time.time() - t_start

    print 'finished'
    return arch.sess.run(arch.gm_vars['board'])[0]