def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-agent', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) parser.add_argument('--experience-out', '-o', required=True) parser.add_argument('--num-workers', '-w', type=int, default=1) parser.add_argument('--temperature', '-t', type=float, default=0.0) parser.add_argument('--board-size', '-b', type=int, default=19) args = parser.parse_args() experience_files = [] workers = [] gpu_frac = 0.95 / float(args.num_workers) games_per_worker = args.num_games // args.num_workers print('Starting workers...') for i in range(args.num_workers): filename = get_temp_file() experience_files.append(filename) worker = multiprocessing.Process( target=do_self_play, args=( args.board_size, args.learning_agent, games_per_worker, args.temperature, filename, gpu_frac, ) ) worker.start() workers.append(worker) # Wait for all workers to finish. print('Waiting for workers...') for worker in workers: worker.join() # Merge experience buffers. print('Merging experience buffers...') # first_filename = experience_files[0] other_filenames = experience_files[1:] combined_buffer = rl.load_experience(h5py.File(filename)) for filename in other_filenames: next_buffer = rl.load_experience(h5py.File(filename)) combined_buffer = rl.combine_experience([combined_buffer, next_buffer]) print('Saving into %s...' % args.experience_out) with h5py.File(args.experience_out, 'w') as experience_outf: combined_buffer.serialize(experience_outf) # Clean up. for fname in experience_files: os.unlink(fname)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--learning-agent', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) # parser.add_argument('--game-log-out', required=True) parser.add_argument('--experience-out', required=True) parser.add_argument('--temperature', type=float, default=0.0) args = parser.parse_args() global BOARD_SIZE BOARD_SIZE = args.board_size agent1 = rl.load_ac_agent(h5py.File(args.learning_agent)) agent2 = rl.load_ac_agent(h5py.File(args.learning_agent)) agent1.set_temperature(args.temperature) agent2.set_temperature(args.temperature) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black # logf = open(args.game_log_out, 'a') # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),)) for i in range(args.num_games): print('Simulating game %d/%d...' % (i + 1, args.num_games)) collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Agent 2 wins.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1, collector2]) # logf.write('Saving experience buffer to %s\n' % args.experience_out) with h5py.File(args.experience_out, 'w') as experience_outf: experience.serialize(experience_outf)
def do_self_play(board_size, agent1_filename, agent2_filename, num_games, temperature, experience_filename, chunk = 100, gpu_frac=0.95): kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = load_agent(agent1_filename) agent1.set_temperature(temperature) agent1.set_policy('eps-greedy') agent2 = load_agent(agent2_filename) agent2.set_temperature(temperature) agent2.set_policy('eps-greedy') color1 = Player.black times =int(num_games/chunk) for current_chunk in range(times): print('Текущая порция %d' % current_chunk) collector1 = rl.ExperienceCollector() for i in range(chunk): print('Симуляция игры %d/%d...' % (i + 1, chunk)) collector1.begin_episode() agent1.set_collector(collector1) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins, time is %s' % (datetime.datetime.now())) collector1.complete_episode(reward=1) else: print('Agent 2 wins, time is %s' % (datetime.datetime.now())) collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1]) print('Saving experience buffer to %s\n' % (experience_filename+'_' + str(current_chunk*chunk)+'.h5')) with h5py.File(experience_filename+'_' + str(current_chunk)+'.h5', 'w') as experience_outf: experience.serialize(experience_outf)
def do_self_play(board_size, agent1_filename, agent2_filename, num_games, temperature, experience_filename, gpu_frac): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) except RuntimeError as e: print(e) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) with h5py.File(agent1_filename, 'r') as agent1f: agent1 = agent.load_policy_agent(agent1f) agent1.set_temperature(temperature) with h5py.File(agent2_filename, 'r') as agent2f: agent2 = agent.load_policy_agent(agent2f) collector1 = rl.ExperienceCollector() color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) else: print('Agent 2 wins.') collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def generate_experience(learning_agent, reference_agent, exp_file, num_games, board_size, num_workers, temperature): experience_files = [] workers = [] gpu_frac = 0.95 / float(num_workers) games_per_worker = num_games // num_workers for i in range(num_workers): filename = get_temp_file() experience_files.append(filename) worker = multiprocessing.Process( target=do_self_play, args=( board_size, learning_agent, reference_agent, games_per_worker, temperature, filename, gpu_frac, ) ) worker.start() workers.append(worker) # Wait for all workers to finish. print('Waiting for workers...') for worker in workers: worker.join() # Merge experience buffers. print('Merging experience buffers...') first_filename = experience_files[0] other_filenames = experience_files[1:] with h5py.File(first_filename, 'r') as expf: combined_buffer = rl.load_experience(expf) for filename in other_filenames: with h5py.File(filename, 'r') as expf: next_buffer = rl.load_experience(expf) combined_buffer = rl.combine_experience([combined_buffer, next_buffer]) print('Saving into %s...' % exp_file) with h5py.File(exp_file, 'w') as experience_outf: combined_buffer.serialize(experience_outf) # Clean up. for fname in experience_files: os.unlink(fname)
def do_self_play(board_size, agent_filename, num_games, temperature, experience_filename, gpu_frac): kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent1.set_temperature(temperature) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) agent2.set_temperature(temperature) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Agent 2 wins.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1, collector2]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('--learning-agent', required=True) # parser.add_argument('--num-games', '-n', type=int, default=10) # parser.add_argument('--game-log-out', required=True) # parser.add_argument('--experience-out', required=True) # parser.add_argument('--temperature', type=float, default=0.0) # # args = parser.parse_args() # 9.19 agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() agent1.set_collector(collector1) agent2.set_collector(collector2) # color1 = Player.black # logf = open(args.game_log_out, 'a') # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),)) # 9.20 for i in range(num_games): collector1.begin_episode() collector2.begin_episode() game_record = simulate_game(agent1, agent2) if game_record.winner == Player.black: collector1.complete_episode(reward=1) # Agent1 won the game, so +reward collector2.complete_episode(reward=-1) else: collector2.complete_episode(reward=1) # Agent2 won the game collector1.complete_episode(reward=-1) experience = rl.combine_experience([collector1, collector2]) # logf.write('Saving experience buffer to %s\n' % args.experience_out) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, required=True) parser.add_argument('--learning-agent', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) parser.add_argument('--experience-out', required=True) args = parser.parse_args() agent_filename = args.learning_agent experience_filename = args.experience_out num_games = args.num_games global BOARD_SIZE BOARD_SIZE = args.board_size agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() agent1.set_collector(collector1) agent2.set_collector(collector2) for i in range(num_games): collector1.begin_episode() collector2.begin_episode() game_record = simulate_game(agent1, agent2) if game_record.winner == Player.black: collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) experience = rl.combine_experience([collector1, collector2]) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): learning_agent = input("Бот: ") temperature = float(input('Температура = ')) game_log = input('game_log: ') experience_out = input('experience_out: ') num_games = int(input('Количество игр = ')) try: chunk_size = int(input('Количество игр в "порции" =')) except: chunk_size = 100 pth = '//media//nail//SSD_Disk//Experience//' learning_agent = '//media//nail//SSD_Disk//Models//' + learning_agent + '.h5' game_log = pth + game_log + '_' + str(num_games) experience_out = pth + experience_out + '_' + str(num_games) + '_' #+'.h5' #args = parser.parse_args() # ================================================== os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== agent1 = agent.load_policy_agent(h5py.File(learning_agent, "r")) agent2 = agent.load_policy_agent(h5py.File(learning_agent, "r")) agent1.set_temperature(temperature) agent2.set_temperature(temperature) k = 0 j = 0 for i in range(num_games + 1): if j == 0: game_log_out = game_log + '_' + str((k + 1) * chunk_size) + ".txt" logf = open(game_log_out, 'a') logf.write('Начало игр в %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.write( str((k + 1) * chunk_size) + ' из количества игр: ' + str(num_games) + '\n') print('Моделируемая игра %d/%d...' % (i + 1, num_games)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black j += 1 collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent2, agent1 game_record = simulate_game(black_player, white_player) print(" № игры : ", i + 1) if game_record.winner == color1: print('Агент 1 выигрывает.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Агент 2 выигрывает.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other if i >= chunk_size and i % chunk_size == 0: experience = rl.combine_experience([collector1, collector2]) experience_out_file = experience_out + str( (k + 1) * chunk_size) + ".h5" logf.write('Сохранение буфера в файл %s\n' % experience_out_file) logf.write('Завершение игр %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.close() with h5py.File(experience_out_file, 'w') as experience_outf: experience.serialize(experience_outf) print('Записано игр: ', (k + 1) * chunk_size, ' из ', num_games, ' игр.') k += 1 j = 0