Exemplo n.º 1
0
    def __init__(self,
                 player,
                 coordinator,
                 max_time_for_training=60 * 60 * 24,
                 max_time_per_one_opponent=60 * 60 * 2,
                 max_frames_per_episode=22.4 * 60 * 15,
                 max_frames=22.4 * 60 * 60 * 24,
                 max_episodes=MAX_EPISODES):

        self.player = player
        self.player.add_actor(self)

        self.teacher = get_supervised_agent(player.race, model_type="sl")

        # below code is not used because we only can create the env when we know the opponnet information (e.g., race)
        # AlphaStar: self.environment = SC2Environment()

        self.coordinator = coordinator
        self.max_time_for_training = max_time_for_training
        self.max_time_per_one_opponent = max_time_per_one_opponent
        self.max_frames_per_episode = max_frames_per_episode
        self.max_frames = max_frames
        self.max_episodes = max_episodes

        self.thread = threading.Thread(target=self.run, args=())
        self.thread.daemon = True  # Daemonize thread

        self.is_running = True
        self.is_start = False
Exemplo n.º 2
0
def test(on_server=False, replay_path=None):
    device = DEVICE

    league = League(initial_agents={
        race: get_supervised_agent(race,
                                   path=MODEL_PATH,
                                   model_type=MODEL_TYPE,
                                   restore=RESTORE,
                                   device=device)
        for race in [Race.protoss]
    },
                    main_players=1,
                    main_exploiters=0,
                    league_exploiters=0)

    coordinator = Coordinator(league, output_file=OUTPUT_FILE, winrate_scale=2)
    learners = []
    actors = []

    rank = 0

    for idx in range(league.get_learning_players_num()):
        player = league.get_learning_player(idx)
        learner = None  # Learner(player, rank, v_steps, device, max_time_for_training=60 * 60 * 24, is_training=IS_TRAINING)
        learners.append(learner)
        actors.extend(
            [ActorEval(player, coordinator, j + 1) for j in range(ACTOR_NUMS)])

    threads = []
    # for l in learners:
    #     l.start()
    #     threads.append(l.thread)
    #     sleep(1)

    for a in actors:
        a.start()
        threads.append(a.thread)
        sleep(1)

    try:
        # Wait for training to finish.
        for t in threads:
            t.join()

        coordinator.write_eval_results()

    except Exception as e:
        print("Exception Handled in Main, Detials of the Exception:", e)
Exemplo n.º 3
0
    def __init__(self, player, q_winloss, q_points, device, global_model, coordinator, 
                 teacher, idx, buffer_lock=None, results_lock=None, 
                 writer=None, max_time_for_training=MAX_TIME_FOR_TRAINING,
                 max_time_per_one_opponent=MAX_TIME_FOR_TRAINING,
                 max_frames_per_episode=22.4 * MAX_TIME_FOR_TRAINING, max_frames=MAX_FRAMES, 
                 max_episodes=MAX_EPISODES, is_training=IS_TRAINING,
                 replay_dir="./added_simple64_replays/",
                 update_params_interval=UPDATE_PARAMS_INTERVAL,
                 need_save_result=NEED_SAVE_RESULT):
        self.player = player
        self.player.add_actor(self)
        self.idx = idx
        self.name = 'agent_' + str(self.idx)
        self.teacher = teacher

        self.q_winloss = q_winloss
        self.q_points = q_points

        self.global_model = global_model
        self.coordinator = coordinator

        #self.agent = self.player.agent
        self.agent = get_supervised_agent(player.race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device)
        #self.agent = get_supervised_agent(player.race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE, device=device)
        # if ON_GPU:
        #     self.agent.agent_nn.to(device)

        self.max_time_for_training = max_time_for_training
        self.max_time_per_one_opponent = max_time_per_one_opponent
        self.max_frames_per_episode = max_frames_per_episode
        self.max_frames = max_frames
        self.max_episodes = max_episodes
        self.is_training = is_training

        self.thread = threading.Thread(target=self.run, args=())

        self.thread.daemon = True                            # Daemonize thread
        self.buffer_lock = buffer_lock
        self.results_lock = results_lock

        self.is_running = True
        self.is_start = False

        self.replay_dir = replay_dir
        self.writer = writer
        self.update_params_interval = update_params_interval
        self.need_save_result = need_save_result
Exemplo n.º 4
0
    def __init__(self,
                 player,
                 coordinator,
                 max_time_for_training=60 * 60 * 24,
                 max_time_per_one_opponent=60 * 60 * 2,
                 max_frames_per_episode=22.4 * 60 * 15,
                 max_frames=22.4 * 60 * 60 * 24,
                 max_episodes=MAX_EPISODES,
                 use_replay_expert_reward=True,
                 replay_path=REPLAY_PATH,
                 replay_version=REPLAY_VERIOSN):

        self.player = player
        self.player.add_actor(self)
        if ON_GPU:
            self.player.agent.agent_nn.to(DEVICE)

        self.teacher = get_supervised_agent(player.race,
                                            model_type="sl",
                                            restore=RESTORE)
        if ON_GPU:
            self.teacher.agent_nn.to(DEVICE)

        # below code is not used because we only can create the env when we know the opponnet information (e.g., race)
        # AlphaStar: self.environment = SC2Environment()

        self.coordinator = coordinator
        self.max_time_for_training = max_time_for_training
        self.max_time_per_one_opponent = max_time_per_one_opponent
        self.max_frames_per_episode = max_frames_per_episode
        self.max_frames = max_frames
        self.max_episodes = max_episodes

        self.thread = threading.Thread(target=self.run, args=())
        self.thread.daemon = True  # Daemonize thread

        self.is_running = True
        self.is_start = False

        self.use_replay_expert_reward = use_replay_expert_reward
        self.replay_path = replay_path
        self.replay_version = replay_version
Exemplo n.º 5
0
def test(on_server=False, replay_path=None):
    # model path
    MODEL_TYPE = "sl"
    MODEL_PATH = "./model/"
    ACTOR_NUMS = 1

    league = League(
        initial_agents={
            race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, restore=RESTORE)
            for race in [Race.protoss]
        },
        main_players=1, 
        main_exploiters=0,
        league_exploiters=0)

    coordinator = Coordinator(league)
    learners = []
    actors = []

    for idx in range(league.get_learning_players_num()):
        player = league.get_learning_player(idx)
        learner = Learner(player, max_time_for_training=60 * 60 * 24)
        learners.append(learner)
        actors.extend([ActorLoopPlusZ(player, coordinator, replay_path=replay_path) for _ in range(ACTOR_NUMS)])

    threads = []
    for l in learners:
        l.start()
        threads.append(l.thread)
        sleep(1)
    for a in actors:
        a.start()
        threads.append(a.thread)
        sleep(1)

    try: 
        # Wait for training to finish.
        for t in threads:
            t.join()
    except Exception as e: 
        print("Exception Handled in Main, Detials of the Exception:", e)
Exemplo n.º 6
0
def league_train():
    """Trains the AlphaStar league."""
    league = League(
        initial_agents={
            race: get_supervised_agent(race)
            for race in [Race.protoss]
        },
        main_players=1, 
        main_exploiters=1,
        league_exploiters=2)

    coordinator = Coordinator(league)
    learners = []
    actors = []

    for idx in range(league.get_learning_players_num()):
        player = league.get_learning_player(idx)
        learner = Learner(player)
        learners.append(learner)
        actors.extend([ActorLoop(player, coordinator) for _ in range(1)])

    threads = []
    for l in learners:
        l.start()
        threads.append(l.thread)
        sleep(1)
    for a in actors:
        a.start()
        threads.append(a.thread)
        sleep(1)

    try: 
        # Wait for training to finish.
        for t in threads:
            t.join()
    except Exception as e: 
        print("Exception Handled in Main, Detials of the Exception:", e)
Exemplo n.º 7
0
def test(on_server=False, replay_path=None):
    if SIMPLE_TEST:
        use_cuda_device = False
    else:
        use_cuda_device = True

    torch.manual_seed(RANDOM_SEED)
    mp.set_start_method('spawn')

    model_save_type = "rl"
    model_save_path = os.path.join("./model/", model_save_type + "_" + strftime("%y-%m-%d_%H-%M-%S", localtime()))

    now = datetime.datetime.now()
    log_path = "./log/" + now.strftime("%Y%m%d-%H%M%S") + "/"

    device_learner = torch.device("cuda:0" if use_cuda_device else "cpu")
    league = League(
        initial_agents={
            race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, 
                                       restore=RESTORE, device=device_learner)
            for race in [Race.protoss]
        },
        main_players=1, 
        main_exploiters=0,
        league_exploiters=0)

    player = league.get_learning_player(0)
    player.agent.set_rl_training(IS_TRAINING)
    if ON_GPU:
        player.agent.agent_nn.to(device_learner)

    model_learner = player.agent.agent_nn.model
    model_learner.share_memory()

    if 0:
        optimizer = SA.MorvanZhouSharedAdam(model_learner.parameters(), lr=LR, betas=(THP.beta1, THP.beta2), 
                                            eps=THP.epsilon, weight_decay=WEIGHT_DECAY)
    else:
        optimizer = SA.IkostrikovSharedAdam(model_learner.parameters(), lr=LR, betas=(THP.beta1, THP.beta2), 
                                            eps=THP.epsilon, weight_decay=WEIGHT_DECAY)
        optimizer.share_memory()

    synchronizer = mp.Lock()
    processes = []

    q_winloss = mp.Queue(maxsize=TRAIN_ITERS * 24)
    q_points = mp.Queue(maxsize=TRAIN_ITERS * 24)
    v_steps = mp.Value('d', 0.0)

    for rank in range(PARALLEL):
        p = mp.Process(target=Worker, args=(synchronizer, rank, optimizer, q_winloss, q_points, v_steps,
                                            use_cuda_device, model_learner, device_learner))
        p.start()
        processes.append(p)

    ps = mp.Process(target=Parameter_Server, args=(synchronizer, q_winloss, q_points, v_steps, 
                                                   use_cuda_device, model_learner, log_path, model_save_path))
    ps.start()
    processes.append(ps)

    for p in processes:
        p.join()
Exemplo n.º 8
0
def Worker(synchronizer, rank, optimizer, q_winloss, q_points, v_steps, use_cuda_device, model_learner, device_learner, model_teacher=None, device_teacher=None):
    torch.manual_seed(RANDOM_SEED + rank)

    # with synchronizer:
    #     print('module name:', "worker")
    #     print('parent process:', os.getppid())
    #     print('process id:', os.getpid())

    if rank < 8:
        cuda_device = "cuda:" + str(rank) if use_cuda_device else 'cpu'
    else:
        new_rank = (rank - 8) % 7 + 1
        cuda_device = "cuda:" + str(new_rank) if use_cuda_device else 'cpu'

    league = League(
        initial_agents={
            race: get_supervised_agent(race, path=MODEL_PATH, model_type=MODEL_TYPE, 
                                       restore=True, device=cuda_device)
            for race in [Race.protoss]
        },
        main_players=1, 
        main_exploiters=0,
        league_exploiters=0)

    now = datetime.datetime.now()
    summary_path = "./log/" + now.strftime("%Y%m%d-%H%M%S") + "_" + str(rank) + "/"
    writer = SummaryWriter(summary_path) if NEED_SAVE_RESULT else None

    # results_lock = threading.Lock()
    # coordinator = Coordinator(league, winrate_scale=WINRATE_SCALE, output_file=OUTPUT_FILE, results_lock=results_lock, writer=writer)
    # coordinator.set_uninitialed_results(actor_nums=ACTOR_NUMS, episode_nums=MAX_EPISODES)

    learners = []
    actors = []

    process_lock = synchronizer if USE_UPDATE_LOCK else None

    try:

        for idx in range(league.get_learning_players_num()):
            player = league.get_learning_player(idx)

            #player.agent.agent_nn.model = model_learner
            # player.agent.agent_nn.model.load_state_dict(model_learner.state_dict())
            if use_cuda_device:
                player.agent.agent_nn.model.to(cuda_device)

            player.agent.set_rl_training(IS_TRAINING)

            buffer_lock = threading.Lock()
            learner = Learner(player, rank, v_steps, cuda_device, optimizer=optimizer, global_model=model_learner, 
                              max_time_for_training=MAX_TIME_FOR_TRAINING, lr=LR, 
                              weight_decay=WEIGHT_DECAY, baseline_weight=BASELINE_WEIGHT, is_training=IS_TRAINING, 
                              buffer_lock=buffer_lock, writer=writer, use_opponent_state=USE_OPPONENT_STATE,
                              no_replay_learn=NO_REPLAY_LEARN, num_epochs=NUM_EPOCHS,
                              count_of_batches=COUNT_OF_BATCHES, buffer_size=BUFFER_SIZE,
                              use_random_sample=USE_RANDOM_SAMPLE, only_update_baseline=ONLY_UPDATE_BASELINE,
                              need_save_result=NEED_SAVE_RESULT, process_lock=process_lock,
                              update_params_interval=UPDATE_PARAMS_INTERVAL)
            learners.append(learner)

            teacher = get_supervised_agent(player.race, model_type="sl", restore=True, device=cuda_device)
            teacher.set_rl_training(IS_TRAINING)

            # teacher.agent_nn.model = model_teacher
            if use_cuda_device:
                teacher.agent_nn.model.to(cuda_device)

            for z in range(ACTOR_NUMS):
                device = torch.device(cuda_device if use_cuda_device else "cpu")
                agent_id = rank * ACTOR_NUMS + z
                actor = ActorVSComputer(player, q_winloss, q_points, device, model_learner, None, teacher, agent_id, None, None, None)
                actors.append(actor)

        threads = []
        for l in learners:
            l.start()
            threads.append(l.thread)
            sleep(1)
        for a in actors:
            a.start()
            threads.append(a.thread)
            sleep(1)

        # Wait for training to finish.
        for t in threads:
            t.join()

        # coordinator.write_eval_results()

    except Exception as e:
        print("Worker Exception cause return, Detials of the Exception:", e)
        print(traceback.format_exc())
        pass

    finally:
        pass