示例#1
0
class Botty(base_agent.BaseAgent):
    def __init__(self):
        super(Botty, self).__init__()
        self.strategy_manager = RLBrain(
            smart_actions)  # keeping default rates for now.
        self.state = GameState()

        # if we want to have predefined initialization actions, we can hard code values in here.
        self.action_list = []
        self.prev_action = None
        self.prev_state = None
        self.prev_killed_units = 0
        self.prev_value_units = 0
        self.prev_mineral_rate = 0
        self.prev_vespene_rate = 0
        self.base = 'right'
        self.building_queue = BuildingQueue()
        self.unit_queue = UnitQueue()
        self.research_queue = ResearchQueue()

    def init_base(self, obs):
        """method to set the location of the base."""
        x, y = (obs.observation['minimap'][_PLAYER_RELATIVE] == _PLAYER_SELF
                ).nonzero()

        if y.any() and y.mean() <= _MAP_SIZE // 2:
            self.base = 'left'
        else:
            self.base = 'right'

    def step(self, obs):
        """
        1. reduce state.
        2. Allow brain to learn based prev action, state, & rewards
        3. Choose action based on current state.
        4. Update prev actions & state.
        5. Do action. My current idea is to store many actions in an action list.
           This will allow our abstracted actions to do a lot more per action.
        :param obs: The observation of current step.
        :return: A function ID for SC2 to call.
        """
        super(Botty, self).step(obs)

        # gives us info about where our base is. Left side or right side. Works for 2 base pos maps.
        if not self.prev_state and not self.prev_action:
            self.init_base(obs)

        if self.action_list:
            turn_action = self.action_list.pop()

            if turn_action in obs.observation['available_actions']:
                return turn_action
            else:
                return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])

        self.state.update(obs)
        self.reward_and_learn(obs)

        if self.state not in self.strategy_manager.QTable.index:
            self.strategy_manager.add_state(self.state)
            action = self.strategy_manager.choose_action(self.state)
        else:
            action = self.strategy_manager.choose_action(self.state)

        self.prev_state, self.prev_action = self.state, action

        # Gets the abstracted action functions out the actions.py (as our_actions) file.

        self.action_list = self.get_action_list(action, obs)
        turn_action = self.action_list.pop()

        if turn_action in obs.observation['available_actions']:
            return turn_action
        else:
            return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])

    def reward_and_learn(self, obs):
        if self.prev_action and self.prev_state:
            # Update the reward, we going to need to give it to Brain/
            killed_units = obs.observation['score_cumulative'][5]
            value_units = obs.observation['player'][4]
            mineral_rate = obs.observation['score_cumulative'][9]
            vespene_rate = obs.observation['score_cumulative'][9]

            reward = 0
            if killed_units > self.prev_killed_units:
                reward += 0.25
            if value_units > self.prev_value_units:
                reward += 0.5
            if mineral_rate > self.prev_mineral_rate:
                reward += -.1
            if vespene_rate > self.prev_vespene_rate:
                reward += 0.15

            self.prev_killed_units = killed_units
            self.prev_value_units = vespene_rate
            self.prev_mineral_rate = mineral_rate
            self.prev_vespene_rate = vespene_rate

            # Todo finish reward stuff
            self.strategy_manager.learn(self.prev_state, self.state,
                                        self.prev_action, reward)

    def get_action_list(self, action_str, obs):
        """ This function will set up the appropriate args for the various actions."""
        if 'moveview' in action_str:
            funcall, x, y = action_str.split('_')
            action_function = getattr(our_actions, funcall)
            return action_function(int(x), int(y))

        action_function = getattr(our_actions, action_str)

        if action_str == 'no_op':
            return action_function()
        elif action_str == 'build_building':
            building = self.building_queue.dequeue(obs)
            target = self.get_building_target(obs, building)
            return action_function(obs, building, target)
        elif action_str == 'build_units':
            return action_function(self.unit_queue.dequeue(obs))
        elif action_str == 'build_worker':
            return action_function(actions.FUNCTIONS.Train_Drone_quick.id)
        elif action_str == 'research':
            return action_function(self.research_queue.dequeue(obs))
        elif action_str == 'attack':
            return action_function(obs)
        elif action_str == 'defend':
            unit_type = obs.observation['screen'][_UNIT_TYPE]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return action_function(hatchery_x.mean() + 10,
                                   hatchery_y.mean() + 10)
        elif action_str == 'return_to_base':
            unit_type = obs.observation['screen'][_UNIT_TYPE]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return action_function(hatchery_x + 10, hatchery_y + 10)

        return [actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])]

    @staticmethod
    def get_building_target(obs, building):
        unit_type = obs.observation['screen'][_UNIT_TYPE]
        if building == _BUILD_EXTRACTOR:
            vespene_y, vespene_x = (
                unit_type == _NEUTRAL_VESPENE_GEYSER).nonzero()
            # Two options. Use a classifier to group vespene coordinates,
            # OR we can choose randomly and hope we don't get a unit.
            # For now I will do the later.
            i = random.randint(0, len(vespene_y) - 1)
            return [vespene_x[i], vespene_y[i]]
        else:
            # Building may not pass into dict correctly as a key.
            x_offset, y_offset = building_offsets[building]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return [hatchery_x.mean() + x_offset, hatchery_y.mean() + y_offset]

    def transform_location(self, x, x_distance, y, y_distance):
        if self.base == 'right':
            return [x - x_distance, y - y_distance]

        return [x + x_distance, y + y_distance]
class Botty(base_agent.BaseAgent):

    ##Constructor :-
    #Initializes the RL brain and the game state
    def __init__(self):
        super(Botty, self).__init__()
        self.strategy_manager = RLBrain(
            smart_actions)  # keeping default rates for now.
        self.state = GameState()

        # if we want to have predefined initialization actions, we can hard code values in here.
        self.action_list = []
        self.prev_action = None
        self.prev_state = None
        self.prev_killed_units = 0
        self.prev_value_units = 0
        self.prev_mineral_rate = 0
        self.prev_vespene_rate = 0
        self.base = 'right'
        self.building_queue = BuildingQueue()
        self.unit_queue = UnitQueue()
        self.research_queue = ResearchQueue()

    ## Sets the location of the base for use by the AI.
    #  @param self The object pointer calling the function
    #  @param obs The observation maps
    def init_base(self, obs):
        x, y = (obs.observation['minimap'][_PLAYER_RELATIVE] == _PLAYER_SELF
                ).nonzero()

        if y.any() and y.mean() <= _MAP_SIZE // 2:
            self.base = 'left'
        else:
            self.base = 'right'

    ##  1. Reduce state.
    #   2. Allow brain to learn based prev action, state, & rewards
    #   3. Choose action based on current state.
    #   4. Update prev actions & state.
    #   5. Do action. My current idea is to store many actions in an action list.
    #      This will allow our abstracted actions to do a lot more per action.
    #   @param self The object pointer calling the function.
    #   @param obs The observation of current step.
    #   @return A function ID for SC2 to call.
    def step(self, obs):
        super(Botty, self).step(obs)

        # gives us info about where our base is. Left side or right side. Works for 2 base pos maps.
        if not self.prev_state and not self.prev_action:
            self.init_base(obs)

        if self.action_list:
            turn_action = self.action_list.pop()

            if turn_action in obs.observation['available_actions']:
                return turn_action
            else:
                return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])

        self.state.update(obs)
        self.reward_and_learn(obs)

        if self.state not in self.strategy_manager.QTable.index:
            self.strategy_manager.add_state(self.state)
            action = self.strategy_manager.choose_action(self.state)
        else:
            action = self.strategy_manager.choose_action(self.state)

        self.prev_state, self.prev_action = self.state, action

        # Gets the abstracted action functions out the actions.py (as our_actions) file.

        self.action_list = self.get_action_list(action, obs)
        turn_action = self.action_list.pop()

        if turn_action in obs.observation['available_actions']:
            return turn_action
        else:
            return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])

    ## Takes information about the current game state, creates a 'reward'
    #  based on how good the current state is, and passes the reward to Brain.
    #  @param self The object pointer calling the function
    #  @param obs The observation map.
    def reward_and_learn(self, obs):
        if self.prev_action and self.prev_state:
            # Update the reward, we going to need to give it to Brain/
            killed_units = obs.observation['score_cumulative'][5]
            value_units = obs.observation['player'][4]
            mineral_rate = obs.observation['score_cumulative'][9]
            vespene_rate = obs.observation['score_cumulative'][9]

            reward = 0
            if killed_units > self.prev_killed_units:
                reward += 0.25
            if value_units > self.prev_value_units:
                reward += 0.5
            if mineral_rate > self.prev_mineral_rate:
                reward += -.1
            if vespene_rate > self.prev_vespene_rate:
                reward += 0.15

            self.prev_killed_units = killed_units
            self.prev_value_units = vespene_rate
            self.prev_mineral_rate = mineral_rate
            self.prev_vespene_rate = vespene_rate

            # Todo finish reward stuff
            self.strategy_manager.learn(self.prev_state, self.state,
                                        self.prev_action, reward)

    ##Takes in actions, and if the action is one that needs specific parameters, it passes those to it.
    # @param self The object pointer calling the function
    # @param action_str A string containing one of the actions from those available to the AI.
    # @obs The observation maps
    def get_action_list(self, action_str, obs):
        """ This function will set up the appropriate args for the various actions."""
        if 'moveview' in action_str:
            funcall, x, y = action_str.split('_')
            action_function = getattr(our_actions, funcall)
            return action_function(int(x), int(y))

        action_function = getattr(our_actions, action_str)

        if action_str == 'no_op':
            return action_function()
        elif action_str == 'build_building':
            building = self.building_queue.dequeue(obs)
            target = self.get_building_target(obs, building)
            return action_function(obs, building, target)
        elif action_str == 'build_units':
            return action_function(self.unit_queue.dequeue(obs))
        elif action_str == 'build_worker':
            return action_function(actions.FUNCTIONS.Train_Drone_quick.id)
        elif action_str == 'research':
            return action_function(self.research_queue.dequeue(obs))
        elif action_str == 'attack':
            return action_function(obs)
        elif action_str == 'defend':
            unit_type = obs.observation['screen'][_UNIT_TYPE]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return action_function(hatchery_x.mean() + 10,
                                   hatchery_y.mean() + 10)
        elif action_str == 'return_to_base':
            unit_type = obs.observation['screen'][_UNIT_TYPE]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return action_function(hatchery_x + 10, hatchery_y + 10)

        return [actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])]

    ## Whenever we build a building, we call this function. If it is a
    # building with special requirements, we fullfill those.
    # We also use offset to make sure the building does not overlap with any other buildings.
    # @param obs The observation maps
    # @param building A macro used to refer to specific buildings.
    # @return The location where we are building.
    @staticmethod
    def get_building_target(obs, building):
        unit_type = obs.observation['screen'][_UNIT_TYPE]
        if building == _BUILD_EXTRACTOR:
            vespene_y, vespene_x = (
                unit_type == _NEUTRAL_VESPENE_GEYSER).nonzero()
            # Two options. Use a classifier to group vespene coordinates,
            # OR we can choose randomly and hope we don't get a unit.
            # For now I will do the later.
            i = random.randint(0, len(vespene_y) - 1)
            return [vespene_x[i], vespene_y[i]]
        else:
            # Building may not pass into dict correctly as a key.
            x_offset, y_offset = building_offsets[building]
            hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero()
            return [hatchery_x.mean() + x_offset, hatchery_y.mean() + y_offset]

    ## Called in order to move a set of coordinates by some distance, depending
    # on whether the base is on the right or left side of the map.
    # @param self The object pointer calling the function
    # @param x The initial x
    # @param x_distance The distance between the initial and final x
    # @param y The initial y
    # @param y_distance The distance between the initial and final y
    # @return The transformed x and y.
    def transform_location(self, x, x_distance, y, y_distance):
        if self.base == 'right':
            return [x - x_distance, y - y_distance]

        return [x + x_distance, y + y_distance]