def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 21
        self.a_dim = 6

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 1.0
        self.eps_min = 0.01
        self.epsilon = 1.0
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = 'rlagent_with_vanilla_dqn_qnet'
        self.data_file_qnet_target = 'rlagent_with_vanilla_dqn_qnet_target'

        self.qnetwork = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                           output_dim=self.a_dim,
                           num_neurons=[128, 64, 32],
                           hidden_act_func='ReLU',
                           out_act_func='Identity').to(device)

        self.qnetwork_target = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                           output_dim=self.a_dim,
                           num_neurons=[128, 64, 32],
                           hidden_act_func='ReLU',
                           out_act_func='Identity').to(device)

        #self.qnetwork = CNNFC(output_dim=self.a_dim).to(device)
        #self.qnetwork_target = CNNFC(output_dim=self.a_dim).to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(torch.load(self.data_file_qnet + '.pt'))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(torch.load(self.data_file_qnet_target + '.pt'))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                             action_dim=self.a_dim,
                             qnet=self.qnetwork,
                             qnet_target=self.qnetwork_target,
                             lr=self.lr,
                             gamma=self.gamma,
                             epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 15
        self.a_dim = 6

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 0.5
        self.eps_min = 0.02
        self.epsilon = 0.5
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = DATA_FILE_QNET
        self.data_file_qnet_target = DATA_FILE_QNET_TARGET
        self.score_file = SCORE_FILE

        self.qnetwork = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                  output_dim=self.a_dim,
                                                  num_neurons=[256, 128, 64],
                                                  hidden_act_func='ReLU',
                                                  out_act_func='Identity').to(device)

        self.qnetwork_target = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                         output_dim=self.a_dim,
                                                         num_neurons=[256, 128, 64],
                                                         hidden_act_func='ReLU',
                                                         out_act_func='Identity').to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(torch.load(self.data_file_qnet + '.pt', map_location=torch.device('cpu')))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(torch.load(self.data_file_qnet_target + '.pt', map_location=torch.device('cpu')))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                       action_dim=self.a_dim,
                       qnet=self.qnetwork,
                       qnet_target=self.qnetwork_target,
                       lr=self.lr,
                       gamma=self.gamma,
                       epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()
Пример #3
0
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 21  # state의 개수
        self.a_dim = 14  # action의 개수

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 1.0
        self.eps_min = 0.01
        self.epsilon = 1.0
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = 'rlagent_with_dueling_dqn_qnet'
        self.data_file_qnet_target = 'rlagent_with_dueling_dqn_qnet_target'

        self.qnetwork = DuelingQNet(input_dim=self.s_dim,
                                    output_dim=self.a_dim).to(device)

        self.qnetwork_target = DuelingQNet(input_dim=self.s_dim,
                                           output_dim=self.a_dim).to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(
                torch.load(self.data_file_qnet + '.pt'))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(
                torch.load(self.data_file_qnet_target + '.pt'))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                       action_dim=self.a_dim,
                       qnet=self.qnetwork,
                       qnet_target=self.qnetwork_target,
                       lr=self.lr,
                       gamma=self.gamma,
                       epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()
Пример #4
0
class TerranRLAgentWithRawActsAndRawObs(TerranAgentWithRawActsAndRawObs):
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 21  # state의 개수
        self.a_dim = 14  # action의 개수

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 1.0
        self.eps_min = 0.01
        self.epsilon = 1.0
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = 'rlagent_with_dueling_dqn_qnet'
        self.data_file_qnet_target = 'rlagent_with_dueling_dqn_qnet_target'

        self.qnetwork = DuelingQNet(input_dim=self.s_dim,
                                    output_dim=self.a_dim).to(device)

        self.qnetwork_target = DuelingQNet(input_dim=self.s_dim,
                                           output_dim=self.a_dim).to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(
                torch.load(self.data_file_qnet + '.pt'))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(
                torch.load(self.data_file_qnet_target + '.pt'))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                       action_dim=self.a_dim,
                       qnet=self.qnetwork,
                       qnet_target=self.qnetwork_target,
                       lr=self.lr,
                       gamma=self.gamma,
                       epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()

    def reset(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).reset()
        self.new_game()

    def new_game(self):
        self.base_top_left = None
        self.previous_state = None
        self.previous_action = None
        self.cum_reward = 0
        self.cum_loss = 0

        # epsilon scheduling
        # slowly decaying_epsilon
        self.epsilon = max(
            self.eps_min,
            self.eps_max - self.eps_min * (self.episode_count / 50))
        self.dqn.epsilon = torch.tensor(self.epsilon).to(device)

    def get_state(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        command_centers = self.get_my_units_by_type(obs,
                                                    units.Terran.CommandCenter)
        supply_depots = self.get_my_units_by_type(obs,
                                                  units.Terran.SupplyDepot)
        completed_supply_depots = self.get_my_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        completed_barrackses = self.get_my_completed_units_by_type(
            obs, units.Terran.Barracks)
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)

        queued_marines = (completed_barrackses[0].order_length
                          if len(completed_barrackses) > 0 else 0)

        free_supply = (obs.observation.player.food_cap -
                       obs.observation.player.food_used)
        can_afford_supply_depot = obs.observation.player.minerals >= 100
        can_afford_barracks = obs.observation.player.minerals >= 150
        can_afford_marine = obs.observation.player.minerals >= 100

        enemy_scvs = self.get_enemy_units_by_type(obs, units.Terran.SCV)
        enemy_idle_scvs = [scv for scv in enemy_scvs if scv.order_length == 0]
        enemy_command_centers = self.get_enemy_units_by_type(
            obs, units.Terran.CommandCenter)
        enemy_supply_depots = self.get_enemy_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_completed_supply_depots = self.get_enemy_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_barrackses = self.get_enemy_units_by_type(
            obs, units.Terran.Barracks)
        enemy_completed_barrackses = self.get_enemy_completed_units_by_type(
            obs, units.Terran.Barracks)
        enemy_marines = self.get_enemy_units_by_type(obs, units.Terran.Marine)

        return (len(command_centers), len(scvs), len(idle_scvs),
                len(supply_depots), len(completed_supply_depots),
                len(barrackses), len(completed_barrackses), len(marines),
                queued_marines, free_supply, can_afford_supply_depot,
                can_afford_barracks, can_afford_marine,
                len(enemy_command_centers), len(enemy_scvs),
                len(enemy_idle_scvs), len(enemy_supply_depots),
                len(enemy_completed_supply_depots), len(enemy_barrackses),
                len(enemy_completed_barrackses), len(enemy_marines))

    def step(self, obs):
        super(TerranRLAgentWithRawActsAndRawObs, self).step(obs)

        #time.sleep(0.5)

        state = self.get_state(obs)
        state = torch.tensor(state).float().view(1, self.s_dim).to(device)
        action_idx = self.dqn.choose_action(state)
        action = self.actions[action_idx]
        done = True if obs.last() else False

        if self.previous_action is not None:
            experience = (self.previous_state.to(device),
                          torch.tensor(
                              self.previous_action).view(1, 1).to(device),
                          torch.tensor(obs.reward).view(1, 1).to(device),
                          state.to(device),
                          torch.tensor(done).view(1, 1).to(device))
            self.memory.push(experience)

        self.cum_reward += obs.reward
        self.previous_state = state
        self.previous_action = action_idx

        if obs.last():
            self.episode_count = self.episode_count + 1

            if len(self.memory) >= self.init_sampling:
                # training dqn
                sampled_exps = self.memory.sample(self.batch_size)
                sampled_exps = prepare_training_inputs(sampled_exps, device)
                self.dqn.learn(*sampled_exps)

            if self.episode_count % self.target_update_interval == 0:
                self.dqn.qnet_target.load_state_dict(
                    self.dqn.qnet.state_dict())

            if self.episode_count % self.print_every == 0:
                msg = (self.episode_count, self.cum_reward, self.epsilon)
                print(
                    "Episode : {:4.0f} | Cumulative Reward : {:4.0f} | Epsilon : {:.3f}"
                    .format(*msg))

            torch.save(self.dqn.qnet.state_dict(), self.data_file_qnet + '.pt')
            torch.save(self.dqn.qnet_target.state_dict(),
                       self.data_file_qnet_target + '.pt')

            #writer.add_scalar("Loss/train", self.cum_loss/obs.observation.game_loop, self.episode_count)
            writer.add_scalar("Score", self.cum_reward, self.episode_count)

        return getattr(self, action)(obs)
class TerranRLAgentWithRawActsAndRawObs(TerranAgentWithRawActsAndRawObs):
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 15
        self.a_dim = 6

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 0.5
        self.eps_min = 0.02
        self.epsilon = 0.5
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = DATA_FILE_QNET
        self.data_file_qnet_target = DATA_FILE_QNET_TARGET
        self.score_file = SCORE_FILE

        self.qnetwork = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                  output_dim=self.a_dim,
                                                  num_neurons=[256, 128, 64],
                                                  hidden_act_func='ReLU',
                                                  out_act_func='Identity').to(device)

        self.qnetwork_target = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                         output_dim=self.a_dim,
                                                         num_neurons=[256, 128, 64],
                                                         hidden_act_func='ReLU',
                                                         out_act_func='Identity').to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(torch.load(self.data_file_qnet + '.pt', map_location=torch.device('cpu')))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(torch.load(self.data_file_qnet_target + '.pt', map_location=torch.device('cpu')))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                       action_dim=self.a_dim,
                       qnet=self.qnetwork,
                       qnet_target=self.qnetwork_target,
                       lr=self.lr,
                       gamma=self.gamma,
                       epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()

    def reset(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).reset()
        self.new_game()

    def new_game(self):
        self.base_top_left = None
        self.previous_state = None
        self.previous_action = None
        self.cum_reward = 0
        self.cum_loss = 0

        # epsilon scheduling
        # slowly decaying_epsilon
        self.epsilon = max(self.eps_min, self.eps_max - self.eps_min * (self.episode_count / 50))
        self.dqn.epsilon = torch.tensor(self.epsilon).to(device)

    def get_state(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        command_centers = self.get_my_units_by_type(obs, units.Terran.CommandCenter)
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        completed_supply_depots = self.get_my_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        completed_barrackses = self.get_my_completed_units_by_type(
            obs, units.Terran.Barracks)
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)
        queued_marines = (completed_barrackses[0].order_length
                          if len(completed_barrackses) > 0 else 0)
        free_supply = (obs.observation.player.food_cap -
                       obs.observation.player.food_used)
        can_afford_marine = obs.observation.player.minerals >= 100
        too_much_minerals = obs.observation.player.minerals >= 2000
        minerals_size = round(obs.observation.player.minerals / 10, 1)

        enemy_scvs = self.get_enemy_units_by_type(obs, units.Terran.SCV)
        enemy_idle_scvs = [scv for scv in enemy_scvs if scv.order_length == 0]
        enemy_command_centers = self.get_enemy_units_by_type(
            obs, units.Terran.CommandCenter)
        enemy_supply_depots = self.get_enemy_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_completed_supply_depots = self.get_enemy_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_barrackses = self.get_enemy_units_by_type(obs, units.Terran.Barracks)
        enemy_completed_barrackses = self.get_enemy_completed_units_by_type(
            obs, units.Terran.Barracks)
        enemy_Factory = self.get_enemy_units_by_type(obs, units.Terran.Factory)
        enemy_Starport = self.get_enemy_units_by_type(obs, units.Terran.Starport)
        enemy_Bunker = self.get_enemy_units_by_type(obs, units.Terran.Bunker)

        enemy_marines = self.get_enemy_units_by_type(obs, units.Terran.Marine)
        enemy_Marauder = self.get_enemy_units_by_type(obs, units.Terran.Marauder)
        enemy_Reaper = self.get_enemy_units_by_type(obs, units.Terran.Reaper)
        enemy_Hellion = self.get_enemy_units_by_type(obs, units.Terran.Hellion)
        enemy_Hellbat = self.get_enemy_units_by_type(obs, units.Terran.Hellbat)
        enemy_SiegeTank = self.get_enemy_units_by_type(obs, units.Terran.SiegeTank)
        enemy_Cyclone = self.get_enemy_units_by_type(obs, units.Terran.Cyclone)
        enemy_WidowMine = self.get_enemy_units_by_type(obs, units.Terran.WidowMine)
        enemy_Thor = self.get_enemy_units_by_type(obs, units.Terran.Thor)
        enemy_Viking = self.get_enemy_units_by_type(obs, units.Terran.VikingAssault)
        enemy_Medivac = self.get_enemy_units_by_type(obs, units.Terran.Medivac)
        enemy_Liberator = self.get_enemy_units_by_type(obs, units.Terran.Liberator)
        enemy_Raven = self.get_enemy_units_by_type(obs, units.Terran.Raven)
        enemy_Battlecruiser = self.get_enemy_units_by_type(obs, units.Terran.Battlecruiser)
        enemy_land_count = len(enemy_marines) + len(enemy_Marauder) + len(enemy_Reaper) + len(enemy_Hellion) + \
                           len(enemy_Hellbat) + len(enemy_SiegeTank) + len(enemy_Cyclone) + len(enemy_WidowMine) + len(
            enemy_Thor)
        enemy_air_count = len(enemy_Viking) + len(enemy_Medivac) + len(enemy_Medivac) + len(enemy_Liberator) + len(
            enemy_Raven) + len(enemy_Battlecruiser)
        enemy_total_count = enemy_land_count + enemy_air_count

        killed_unit_count = obs.observation.score_cumulative.killed_value_units
        killed_building_count = obs.observation.score_cumulative.killed_value_structures
        collected_minerals = obs.observation.score_cumulative.collected_minerals
        spent_minerals = obs.observation.score_cumulative.spent_minerals
        idle_worker_time = obs.observation.score_cumulative.idle_worker_time
        idle_production_time = obs.observation.score_cumulative.idle_production_time

        return (
            len(scvs),
            len(supply_depots),
            len(barrackses),
            len(marines),
            round(obs.observation.player.minerals / 10, 0),
            round(spent_minerals / 10, 0),
            idle_production_time,
            killed_unit_count,
            killed_building_count,
            len(enemy_scvs),
            len(enemy_supply_depots),
            len(enemy_barrackses),
            len(enemy_Factory),
            len(enemy_Bunker),
            enemy_total_count
        )

    def step(self, obs):
        super(TerranRLAgentWithRawActsAndRawObs, self).step(obs)
        state_org = self.get_state(obs)

        state = torch.tensor(state_org).float().view(1, self.s_dim).to(device)
        action_idx = self.dqn.choose_action(state)
        action = self.actions[action_idx]
        done = True if obs.last() else False

        if self.previous_action is not None:
            experience = (self.previous_state.to(device),
                          torch.tensor(self.previous_action).view(1, 1).to(device),
                          torch.tensor(obs.reward).view(1, 1).to(device),
                          state.to(device),
                          torch.tensor(done).view(1, 1).to(device))
            self.memory.push(experience)

        self.previous_state = state
        self.previous_action = action_idx
        self.cum_reward = obs.reward

        if obs.last():
            supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
            marines = self.get_my_units_by_type(obs, units.Terran.Marine)
            barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
            print("barracks : ", len(barrackses), " supply : ", len(supply_depots))
            print("marines : ", len(marines))
            self.episode_count = self.episode_count + 1

            if len(self.memory) >= self.init_sampling:
                # training dqn
                sampled_exps = self.memory.sample(self.batch_size)
                sampled_exps = prepare_training_inputs(sampled_exps, device)
                self.dqn.learn(*sampled_exps)

            if self.episode_count % self.target_update_interval == 0:
                self.dqn.qnet_target.load_state_dict(self.dqn.qnet.state_dict())

            if self.episode_count % self.print_every == 0:
                msg = (self.episode_count, self.cum_reward, self.epsilon)
                print("Episode : {:4.0f} | Cumulative Reward : {:4.0f} | Epsilon : {:.3f}".format(*msg))

            torch.save(self.dqn.qnet.state_dict(), self.data_file_qnet + '.pt')
            torch.save(self.dqn.qnet_target.state_dict(), self.data_file_qnet_target + '.pt')

            scores_window.append(obs.reward)  # save most recent reward
            win_rate = scores_window.count(1) / len(scores_window) * 100
            tie_rate = scores_window.count(0) / len(scores_window) * 100
            lost_rate = scores_window.count(-1) / len(scores_window) * 100

            scores.append([win_rate, tie_rate, lost_rate])  # save most recent score(win_rate, tie_rate, lost_rate)
            with open(self.score_file + '.txt', "wb") as fp:
                pickle.dump(scores, fp)

            # writer.add_scalar("Loss/train", self.cum_loss/obs.observation.game_loop, self.episode_count)
            # writer.add_scalar("Score", self.cum_reward, self.episode_count)

        return getattr(self, action)(obs)