def do_self_play(board_size, agent1_filename, agent2_filename, num_games, experience_filename, gpu_frac): kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(agent1_filename) agent2 = load_agent(agent2_filename) collector1 = rl.ExperienceCollector() color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) else: print('Agent 2 wins.') collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def play_games(args): agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) with h5py.File(agent1_fname, 'r') as agent1f: agent1 = agent.load_policy_agent(agent1f) with h5py.File(agent2_fname, 'r') as agent2f: agent2 = agent.load_policy_agent(agent2f) wins, losses = 0, 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins') wins += 1 else: print('Agent 2 wins') losses += 1 print('Agent 1 record: %d/%d' % (wins, wins + losses)) color1 = color1.other return wins, losses
def play_games(args): agent1_fname, agent2_fname, num_games, board_size, gpu_frac, temperature = args kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(agent1_fname) agent1.set_temperature(temperature) agent1.set_policy('eps-greedy') agent2 = load_agent(agent2_fname) agent2.set_temperature(temperature) agent2.set_policy('eps-greedy') wins, losses = 0, 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins') wins += 1 else: print('Agent 2 wins') losses += 1 print('Agent 1 record: %d/%d' % (wins, wins + losses)) color1 = color1.other return (wins, losses)
def play_games(agent1_fname, agent2_fname, num_games=480, board_size=19, gpu_frac=0.95, temperature=0.0): kerasutil.set_gpu_memory_target(gpu_frac) agent1 = load_agent(agent1_fname) agent1.set_temperature(temperature) agent1.set_policy('eps-greedy') agent2 = load_agent(agent2_fname) agent2.set_temperature(temperature) agent2.set_policy('eps-greedy') wins, losses = 0, 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins , time is %s' % (datetime.datetime.now())) wins += 1 else: print('Agent 2 wins, time is %s' % (datetime.datetime.now())) losses += 1 print('Agent 1 record: %d/%d' % (wins, wins + losses)) color1 = color1.other return wins, losses
def do_self_play(board_size, agent1_filename, agent2_filename, num_games, temperature, experience_filename, chunk=100, gpu_frac=0.95): kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(agent1_filename) agent1.set_temperature(temperature) agent1.set_policy('eps-greedy') agent2 = load_agent(agent2_filename) agent2.set_temperature(temperature) agent2.set_policy('eps-greedy') color1 = Player.black times = int(num_games / chunk) for current_chunk in range(times): print('Текущая порция %d' % current_chunk) collector1 = rl.ExperienceCollector() for i in range(chunk): print('Симуляция игры %d/%d...' % (i + 1, chunk)) collector1.begin_episode() agent1.set_collector(collector1) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) cnt_moves = len(game_record.moves) if game_record.winner == color1: print('Агент 1 выигрывает, время: %s' % (datetime.datetime.now())) collector1.complete_episode(reward=1) else: print('Агент 2 выигрывает, время: %s' % (datetime.datetime.now())) collector1.complete_episode(reward=-1) print('Количество ходов в игре = ', cnt_moves) color1 = color1.other experience = rl.combine_experience([collector1]) print('Saving experience buffer to %s\n' % (experience_filename + str( (current_chunk + 1) * chunk) + '.h5')) with h5py.File( experience_filename + '_' + str( (current_chunk + 1) * chunk) + '.h5', 'w') as experience_outf: experience.serialize(experience_outf)
def do_self_play(args): work_dir, board_size, agent1_num, agent2_num, num_games, experience_filename, gpu_frac, load_args = args kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(agent1_num, work_dir, load_args) agent2 = load_agent(agent2_num, work_dir, load_args) collector1 = ExperienceCollector() collector2 = ExperienceCollector() ag1b = [] ag2b = [] color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=game_record.margin) collector2.complete_episode(reward=-game_record.margin) elif game_record.winner == color1.other: print('Agent 2 wins.') collector1.complete_episode(reward=-game_record.margin) collector2.complete_episode(reward=game_record.margin) else: print('Agents play a draw.') collector1.complete_episode(reward=0) collector2.complete_episode(reward=0) if game_record.winner == Player.black: black_score = game_record.margin elif game_record.winner == Player.white: black_score = -game_record.margin else: black_score = 0 if color1 == Player.black: ag1b.append(black_score) else: ag2b.append(black_score) color1 = color1.other experience = combine_experience([collector1, collector2]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf) return (ag1b, ag2b)
def play_games(args): num_games, board_size, gpu_frac, load_args = args kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(1, load_args.agent1_num, load_args.agent1_dir, load_args) agent2 = load_agent(2, load_args.agent2_num, load_args.agent2_dir, load_args) ag1b = [] ag2b = [] color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') elif game_record.winner == color1.other: print('Agent 2 wins.') else: print('Agents play a draw.') if game_record.winner == Player.black: black_score = game_record.margin elif game_record.winner == Player.white: black_score = -game_record.margin else: black_score = 0 if color1 == Player.black: ag1b.append(black_score) else: ag2b.append(black_score) color1 = color1.other return (ag1b, ag2b)