def __init__(self, player, coordinator, max_time_for_training=60 * 60 * 24, max_time_per_one_opponent=60 * 60 * 2, max_frames_per_episode=22.4 * 60 * 15, max_frames=22.4 * 60 * 60 * 24, max_episodes=MAX_EPISODES): self.player = player self.player.add_actor(self) self.teacher = get_supervised_agent(player.race, model_type="sl") # below code is not used because we only can create the env when we know the opponnet information (e.g., race) # AlphaStar: self.environment = SC2Environment() self.coordinator = coordinator self.max_time_for_training = max_time_for_training self.max_time_per_one_opponent = max_time_per_one_opponent self.max_frames_per_episode = max_frames_per_episode self.max_frames = max_frames self.max_episodes = max_episodes self.thread = threading.Thread(target=self.run, args=()) self.thread.daemon = True # Daemonize thread self.is_running = True self.is_start = False
def test(on_server=False, replay_path=None): device = DEVICE league = League(initial_agents={ race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device) for race in [Race.protoss] }, main_players=1, main_exploiters=0, league_exploiters=0) coordinator = Coordinator(league, output_file=OUTPUT_FILE, winrate_scale=2) learners = [] actors = [] rank = 0 for idx in range(league.get_learning_players_num()): player = league.get_learning_player(idx) learner = None # Learner(player, rank, v_steps, device, max_time_for_training=60 * 60 * 24, is_training=IS_TRAINING) learners.append(learner) actors.extend( [ActorEval(player, coordinator, j + 1) for j in range(ACTOR_NUMS)]) threads = [] # for l in learners: # l.start() # threads.append(l.thread) # sleep(1) for a in actors: a.start() threads.append(a.thread) sleep(1) try: # Wait for training to finish. for t in threads: t.join() coordinator.write_eval_results() except Exception as e: print("Exception Handled in Main, Detials of the Exception:", e)
def __init__(self, player, q_winloss, q_points, device, global_model, coordinator, teacher, idx, buffer_lock=None, results_lock=None, writer=None, max_time_for_training=MAX_TIME_FOR_TRAINING, max_time_per_one_opponent=MAX_TIME_FOR_TRAINING, max_frames_per_episode=22.4 * MAX_TIME_FOR_TRAINING, max_frames=MAX_FRAMES, max_episodes=MAX_EPISODES, is_training=IS_TRAINING, replay_dir="./added_simple64_replays/", update_params_interval=UPDATE_PARAMS_INTERVAL, need_save_result=NEED_SAVE_RESULT): self.player = player self.player.add_actor(self) self.idx = idx self.name = 'agent_' + str(self.idx) self.teacher = teacher self.q_winloss = q_winloss self.q_points = q_points self.global_model = global_model self.coordinator = coordinator #self.agent = self.player.agent self.agent = get_supervised_agent(player.race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device) #self.agent = get_supervised_agent(player.race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device) # if ON_GPU: # self.agent.agent_nn.to(device) self.max_time_for_training = max_time_for_training self.max_time_per_one_opponent = max_time_per_one_opponent self.max_frames_per_episode = max_frames_per_episode self.max_frames = max_frames self.max_episodes = max_episodes self.is_training = is_training self.thread = threading.Thread(target=self.run, args=()) self.thread.daemon = True # Daemonize thread self.buffer_lock = buffer_lock self.results_lock = results_lock self.is_running = True self.is_start = False self.replay_dir = replay_dir self.writer = writer self.update_params_interval = update_params_interval self.need_save_result = need_save_result
def __init__(self, player, coordinator, max_time_for_training=60 * 60 * 24, max_time_per_one_opponent=60 * 60 * 2, max_frames_per_episode=22.4 * 60 * 15, max_frames=22.4 * 60 * 60 * 24, max_episodes=MAX_EPISODES, use_replay_expert_reward=True, replay_path=REPLAY_PATH, replay_version=REPLAY_VERIOSN): self.player = player self.player.add_actor(self) if ON_GPU: self.player.agent.agent_nn.to(DEVICE) self.teacher = get_supervised_agent(player.race, model_type="sl", restore=RESTORE) if ON_GPU: self.teacher.agent_nn.to(DEVICE) # below code is not used because we only can create the env when we know the opponnet information (e.g., race) # AlphaStar: self.environment = SC2Environment() self.coordinator = coordinator self.max_time_for_training = max_time_for_training self.max_time_per_one_opponent = max_time_per_one_opponent self.max_frames_per_episode = max_frames_per_episode self.max_frames = max_frames self.max_episodes = max_episodes self.thread = threading.Thread(target=self.run, args=()) self.thread.daemon = True # Daemonize thread self.is_running = True self.is_start = False self.use_replay_expert_reward = use_replay_expert_reward self.replay_path = replay_path self.replay_version = replay_version
def test(on_server=False, replay_path=None): # model path MODEL_TYPE = "sl" MODEL_PATH = "./model/" ACTOR_NUMS = 1 league = League( initial_agents={ race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE) for race in [Race.protoss] }, main_players=1, main_exploiters=0, league_exploiters=0) coordinator = Coordinator(league) learners = [] actors = [] for idx in range(league.get_learning_players_num()): player = league.get_learning_player(idx) learner = Learner(player, max_time_for_training=60 * 60 * 24) learners.append(learner) actors.extend([ActorLoopPlusZ(player, coordinator, replay_path=replay_path) for _ in range(ACTOR_NUMS)]) threads = [] for l in learners: l.start() threads.append(l.thread) sleep(1) for a in actors: a.start() threads.append(a.thread) sleep(1) try: # Wait for training to finish. for t in threads: t.join() except Exception as e: print("Exception Handled in Main, Detials of the Exception:", e)
def league_train(): """Trains the AlphaStar league.""" league = League( initial_agents={ race: get_supervised_agent(race) for race in [Race.protoss] }, main_players=1, main_exploiters=1, league_exploiters=2) coordinator = Coordinator(league) learners = [] actors = [] for idx in range(league.get_learning_players_num()): player = league.get_learning_player(idx) learner = Learner(player) learners.append(learner) actors.extend([ActorLoop(player, coordinator) for _ in range(1)]) threads = [] for l in learners: l.start() threads.append(l.thread) sleep(1) for a in actors: a.start() threads.append(a.thread) sleep(1) try: # Wait for training to finish. for t in threads: t.join() except Exception as e: print("Exception Handled in Main, Detials of the Exception:", e)
def test(on_server=False, replay_path=None): if SIMPLE_TEST: use_cuda_device = False else: use_cuda_device = True torch.manual_seed(RANDOM_SEED) mp.set_start_method('spawn') model_save_type = "rl" model_save_path = os.path.join("./model/", model_save_type + "_" + strftime("%y-%m-%d_%H-%M-%S", localtime())) now = datetime.datetime.now() log_path = "./log/" + now.strftime("%Y%m%d-%H%M%S") + "/" device_learner = torch.device("cuda:0" if use_cuda_device else "cpu") league = League( initial_agents={ race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device_learner) for race in [Race.protoss] }, main_players=1, main_exploiters=0, league_exploiters=0) player = league.get_learning_player(0) player.agent.set_rl_training(IS_TRAINING) if ON_GPU: player.agent.agent_nn.to(device_learner) model_learner = player.agent.agent_nn.model model_learner.share_memory() if 0: optimizer = SA.MorvanZhouSharedAdam(model_learner.parameters(), lr=LR, betas=(THP.beta1, THP.beta2), eps=THP.epsilon, weight_decay=WEIGHT_DECAY) else: optimizer = SA.IkostrikovSharedAdam(model_learner.parameters(), lr=LR, betas=(THP.beta1, THP.beta2), eps=THP.epsilon, weight_decay=WEIGHT_DECAY) optimizer.share_memory() synchronizer = mp.Lock() processes = [] q_winloss = mp.Queue(maxsize=TRAIN_ITERS * 24) q_points = mp.Queue(maxsize=TRAIN_ITERS * 24) v_steps = mp.Value('d', 0.0) for rank in range(PARALLEL): p = mp.Process(target=Worker, args=(synchronizer, rank, optimizer, q_winloss, q_points, v_steps, use_cuda_device, model_learner, device_learner)) p.start() processes.append(p) ps = mp.Process(target=Parameter_Server, args=(synchronizer, q_winloss, q_points, v_steps, use_cuda_device, model_learner, log_path, model_save_path)) ps.start() processes.append(ps) for p in processes: p.join()
def Worker(synchronizer, rank, optimizer, q_winloss, q_points, v_steps, use_cuda_device, model_learner, device_learner, model_teacher=None, device_teacher=None): torch.manual_seed(RANDOM_SEED + rank) # with synchronizer: # print('module name:', "worker") # print('parent process:', os.getppid()) # print('process id:', os.getpid()) if rank < 8: cuda_device = "cuda:" + str(rank) if use_cuda_device else 'cpu' else: new_rank = (rank - 8) % 7 + 1 cuda_device = "cuda:" + str(new_rank) if use_cuda_device else 'cpu' league = League( initial_agents={ race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=True, device=cuda_device) for race in [Race.protoss] }, main_players=1, main_exploiters=0, league_exploiters=0) now = datetime.datetime.now() summary_path = "./log/" + now.strftime("%Y%m%d-%H%M%S") + "_" + str(rank) + "/" writer = SummaryWriter(summary_path) if NEED_SAVE_RESULT else None # results_lock = threading.Lock() # coordinator = Coordinator(league, winrate_scale=WINRATE_SCALE, output_file=OUTPUT_FILE, results_lock=results_lock, writer=writer) # coordinator.set_uninitialed_results(actor_nums=ACTOR_NUMS, episode_nums=MAX_EPISODES) learners = [] actors = [] process_lock = synchronizer if USE_UPDATE_LOCK else None try: for idx in range(league.get_learning_players_num()): player = league.get_learning_player(idx) #player.agent.agent_nn.model = model_learner # player.agent.agent_nn.model.load_state_dict(model_learner.state_dict()) if use_cuda_device: player.agent.agent_nn.model.to(cuda_device) player.agent.set_rl_training(IS_TRAINING) buffer_lock = threading.Lock() learner = Learner(player, rank, v_steps, cuda_device, optimizer=optimizer, global_model=model_learner, max_time_for_training=MAX_TIME_FOR_TRAINING, lr=LR, weight_decay=WEIGHT_DECAY, baseline_weight=BASELINE_WEIGHT, is_training=IS_TRAINING, buffer_lock=buffer_lock, writer=writer, use_opponent_state=USE_OPPONENT_STATE, no_replay_learn=NO_REPLAY_LEARN, num_epochs=NUM_EPOCHS, count_of_batches=COUNT_OF_BATCHES, buffer_size=BUFFER_SIZE, use_random_sample=USE_RANDOM_SAMPLE, only_update_baseline=ONLY_UPDATE_BASELINE, need_save_result=NEED_SAVE_RESULT, process_lock=process_lock, update_params_interval=UPDATE_PARAMS_INTERVAL) learners.append(learner) teacher = get_supervised_agent(player.race, model_type="sl", restore=True, device=cuda_device) teacher.set_rl_training(IS_TRAINING) # teacher.agent_nn.model = model_teacher if use_cuda_device: teacher.agent_nn.model.to(cuda_device) for z in range(ACTOR_NUMS): device = torch.device(cuda_device if use_cuda_device else "cpu") agent_id = rank * ACTOR_NUMS + z actor = ActorVSComputer(player, q_winloss, q_points, device, model_learner, None, teacher, agent_id, None, None, None) actors.append(actor) threads = [] for l in learners: l.start() threads.append(l.thread) sleep(1) for a in actors: a.start() threads.append(a.thread) sleep(1) # Wait for training to finish. for t in threads: t.join() # coordinator.write_eval_results() except Exception as e: print("Worker Exception cause return, Detials of the Exception:", e) print(traceback.format_exc()) pass finally: pass