Пример #1
0
def main(unused_argv):
    begin = time.time()
    env = Go()
    agents = [RandomAgent(idx) for idx in range(2)]

    for ep in range(FLAGS.num_eval):
        time_step = env.reset()
        while not time_step.last():
            player_id = time_step.observations["current_player"]
            if player_id == 0:
                agent_output = agents[player_id].step(time_step)
            else:
                agent_output = agents[player_id].step(time_step)
            action_list = agent_output.action
            time_step = env.step(action_list)
            print(time_step.observations["info_state"][0])

        # Episode is over, step all agents with final info state.
        # for agent in agents:
        agents[0].step(time_step)
        agents[1].step(time_step)
        print(time_step.rewards, env.get_current_board())

    print('Time elapsed:', time.time() - begin)
Пример #2
0
import random

# import os
#
# os.environ['BOARD_SIZE'] = '7'
from environment.GoEnv import Go
import time

if __name__ == '__main__':
    begin = time.time()
    env = Go()
    for i in range(10):
        state = env.reset()  # a go.Position object
        while True:
            # cur_player = env.to_play
            _a = env.get_all_legal_moves()
            probs = _a / sum(_a)
            probs = probs.cumsum()
            selection = random.random()
            fcoord = probs.searchsorted(selection)
            n_s, done, rew, info = env.step(fcoord)
            print(fcoord, env.get_current_board())
            if done:
                break
    print('Time elapsed:', time.time() - begin)