def __init__(self): super(Botty, self).__init__() self.strategy_manager = RLBrain(smart_actions) # keeping default rates for now. self.state = GameState() # if we want to have predefined initialization actions, we can hard code values in here. self.action_list = [] self.prev_action = None self.prev_state = None self.prev_killed_units = 0 self.prev_value_units = 0 self.prev_mineral_rate = 0 self.prev_vespene_rate = 0 self.base = 'right' self.building_queue = BuildingQueue() self.unit_queue = UnitQueue() self.research_queue = ResearchQueue()
class Botty(base_agent.BaseAgent): def __init__(self): super(Botty, self).__init__() self.strategy_manager = RLBrain( smart_actions) # keeping default rates for now. self.state = GameState() # if we want to have predefined initialization actions, we can hard code values in here. self.action_list = [] self.prev_action = None self.prev_state = None self.prev_killed_units = 0 self.prev_value_units = 0 self.prev_mineral_rate = 0 self.prev_vespene_rate = 0 self.base = 'right' self.building_queue = BuildingQueue() self.unit_queue = UnitQueue() self.research_queue = ResearchQueue() def init_base(self, obs): """method to set the location of the base.""" x, y = (obs.observation['minimap'][_PLAYER_RELATIVE] == _PLAYER_SELF ).nonzero() if y.any() and y.mean() <= _MAP_SIZE // 2: self.base = 'left' else: self.base = 'right' def step(self, obs): """ 1. reduce state. 2. Allow brain to learn based prev action, state, & rewards 3. Choose action based on current state. 4. Update prev actions & state. 5. Do action. My current idea is to store many actions in an action list. This will allow our abstracted actions to do a lot more per action. :param obs: The observation of current step. :return: A function ID for SC2 to call. """ super(Botty, self).step(obs) # gives us info about where our base is. Left side or right side. Works for 2 base pos maps. if not self.prev_state and not self.prev_action: self.init_base(obs) if self.action_list: turn_action = self.action_list.pop() if turn_action in obs.observation['available_actions']: return turn_action else: return actions.FunctionCall(actions.FUNCTIONS.no_op.id, []) self.state.update(obs) self.reward_and_learn(obs) if self.state not in self.strategy_manager.QTable.index: self.strategy_manager.add_state(self.state) action = self.strategy_manager.choose_action(self.state) else: action = self.strategy_manager.choose_action(self.state) self.prev_state, self.prev_action = self.state, action # Gets the abstracted action functions out the actions.py (as our_actions) file. self.action_list = self.get_action_list(action, obs) turn_action = self.action_list.pop() if turn_action in obs.observation['available_actions']: return turn_action else: return actions.FunctionCall(actions.FUNCTIONS.no_op.id, []) def reward_and_learn(self, obs): if self.prev_action and self.prev_state: # Update the reward, we going to need to give it to Brain/ killed_units = obs.observation['score_cumulative'][5] value_units = obs.observation['player'][4] mineral_rate = obs.observation['score_cumulative'][9] vespene_rate = obs.observation['score_cumulative'][9] reward = 0 if killed_units > self.prev_killed_units: reward += 0.25 if value_units > self.prev_value_units: reward += 0.5 if mineral_rate > self.prev_mineral_rate: reward += -.1 if vespene_rate > self.prev_vespene_rate: reward += 0.15 self.prev_killed_units = killed_units self.prev_value_units = vespene_rate self.prev_mineral_rate = mineral_rate self.prev_vespene_rate = vespene_rate # Todo finish reward stuff self.strategy_manager.learn(self.prev_state, self.state, self.prev_action, reward) def get_action_list(self, action_str, obs): """ This function will set up the appropriate args for the various actions.""" if 'moveview' in action_str: funcall, x, y = action_str.split('_') action_function = getattr(our_actions, funcall) return action_function(int(x), int(y)) action_function = getattr(our_actions, action_str) if action_str == 'no_op': return action_function() elif action_str == 'build_building': building = self.building_queue.dequeue(obs) target = self.get_building_target(obs, building) return action_function(obs, building, target) elif action_str == 'build_units': return action_function(self.unit_queue.dequeue(obs)) elif action_str == 'build_worker': return action_function(actions.FUNCTIONS.Train_Drone_quick.id) elif action_str == 'research': return action_function(self.research_queue.dequeue(obs)) elif action_str == 'attack': return action_function(obs) elif action_str == 'defend': unit_type = obs.observation['screen'][_UNIT_TYPE] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return action_function(hatchery_x.mean() + 10, hatchery_y.mean() + 10) elif action_str == 'return_to_base': unit_type = obs.observation['screen'][_UNIT_TYPE] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return action_function(hatchery_x + 10, hatchery_y + 10) return [actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])] @staticmethod def get_building_target(obs, building): unit_type = obs.observation['screen'][_UNIT_TYPE] if building == _BUILD_EXTRACTOR: vespene_y, vespene_x = ( unit_type == _NEUTRAL_VESPENE_GEYSER).nonzero() # Two options. Use a classifier to group vespene coordinates, # OR we can choose randomly and hope we don't get a unit. # For now I will do the later. i = random.randint(0, len(vespene_y) - 1) return [vespene_x[i], vespene_y[i]] else: # Building may not pass into dict correctly as a key. x_offset, y_offset = building_offsets[building] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return [hatchery_x.mean() + x_offset, hatchery_y.mean() + y_offset] def transform_location(self, x, x_distance, y, y_distance): if self.base == 'right': return [x - x_distance, y - y_distance] return [x + x_distance, y + y_distance]
class Botty(base_agent.BaseAgent): ##Constructor :- #Initializes the RL brain and the game state def __init__(self): super(Botty, self).__init__() self.strategy_manager = RLBrain( smart_actions) # keeping default rates for now. self.state = GameState() # if we want to have predefined initialization actions, we can hard code values in here. self.action_list = [] self.prev_action = None self.prev_state = None self.prev_killed_units = 0 self.prev_value_units = 0 self.prev_mineral_rate = 0 self.prev_vespene_rate = 0 self.base = 'right' self.building_queue = BuildingQueue() self.unit_queue = UnitQueue() self.research_queue = ResearchQueue() ## Sets the location of the base for use by the AI. # @param self The object pointer calling the function # @param obs The observation maps def init_base(self, obs): x, y = (obs.observation['minimap'][_PLAYER_RELATIVE] == _PLAYER_SELF ).nonzero() if y.any() and y.mean() <= _MAP_SIZE // 2: self.base = 'left' else: self.base = 'right' ## 1. Reduce state. # 2. Allow brain to learn based prev action, state, & rewards # 3. Choose action based on current state. # 4. Update prev actions & state. # 5. Do action. My current idea is to store many actions in an action list. # This will allow our abstracted actions to do a lot more per action. # @param self The object pointer calling the function. # @param obs The observation of current step. # @return A function ID for SC2 to call. def step(self, obs): super(Botty, self).step(obs) # gives us info about where our base is. Left side or right side. Works for 2 base pos maps. if not self.prev_state and not self.prev_action: self.init_base(obs) if self.action_list: turn_action = self.action_list.pop() if turn_action in obs.observation['available_actions']: return turn_action else: return actions.FunctionCall(actions.FUNCTIONS.no_op.id, []) self.state.update(obs) self.reward_and_learn(obs) if self.state not in self.strategy_manager.QTable.index: self.strategy_manager.add_state(self.state) action = self.strategy_manager.choose_action(self.state) else: action = self.strategy_manager.choose_action(self.state) self.prev_state, self.prev_action = self.state, action # Gets the abstracted action functions out the actions.py (as our_actions) file. self.action_list = self.get_action_list(action, obs) turn_action = self.action_list.pop() if turn_action in obs.observation['available_actions']: return turn_action else: return actions.FunctionCall(actions.FUNCTIONS.no_op.id, []) ## Takes information about the current game state, creates a 'reward' # based on how good the current state is, and passes the reward to Brain. # @param self The object pointer calling the function # @param obs The observation map. def reward_and_learn(self, obs): if self.prev_action and self.prev_state: # Update the reward, we going to need to give it to Brain/ killed_units = obs.observation['score_cumulative'][5] value_units = obs.observation['player'][4] mineral_rate = obs.observation['score_cumulative'][9] vespene_rate = obs.observation['score_cumulative'][9] reward = 0 if killed_units > self.prev_killed_units: reward += 0.25 if value_units > self.prev_value_units: reward += 0.5 if mineral_rate > self.prev_mineral_rate: reward += -.1 if vespene_rate > self.prev_vespene_rate: reward += 0.15 self.prev_killed_units = killed_units self.prev_value_units = vespene_rate self.prev_mineral_rate = mineral_rate self.prev_vespene_rate = vespene_rate # Todo finish reward stuff self.strategy_manager.learn(self.prev_state, self.state, self.prev_action, reward) ##Takes in actions, and if the action is one that needs specific parameters, it passes those to it. # @param self The object pointer calling the function # @param action_str A string containing one of the actions from those available to the AI. # @obs The observation maps def get_action_list(self, action_str, obs): """ This function will set up the appropriate args for the various actions.""" if 'moveview' in action_str: funcall, x, y = action_str.split('_') action_function = getattr(our_actions, funcall) return action_function(int(x), int(y)) action_function = getattr(our_actions, action_str) if action_str == 'no_op': return action_function() elif action_str == 'build_building': building = self.building_queue.dequeue(obs) target = self.get_building_target(obs, building) return action_function(obs, building, target) elif action_str == 'build_units': return action_function(self.unit_queue.dequeue(obs)) elif action_str == 'build_worker': return action_function(actions.FUNCTIONS.Train_Drone_quick.id) elif action_str == 'research': return action_function(self.research_queue.dequeue(obs)) elif action_str == 'attack': return action_function(obs) elif action_str == 'defend': unit_type = obs.observation['screen'][_UNIT_TYPE] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return action_function(hatchery_x.mean() + 10, hatchery_y.mean() + 10) elif action_str == 'return_to_base': unit_type = obs.observation['screen'][_UNIT_TYPE] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return action_function(hatchery_x + 10, hatchery_y + 10) return [actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])] ## Whenever we build a building, we call this function. If it is a # building with special requirements, we fullfill those. # We also use offset to make sure the building does not overlap with any other buildings. # @param obs The observation maps # @param building A macro used to refer to specific buildings. # @return The location where we are building. @staticmethod def get_building_target(obs, building): unit_type = obs.observation['screen'][_UNIT_TYPE] if building == _BUILD_EXTRACTOR: vespene_y, vespene_x = ( unit_type == _NEUTRAL_VESPENE_GEYSER).nonzero() # Two options. Use a classifier to group vespene coordinates, # OR we can choose randomly and hope we don't get a unit. # For now I will do the later. i = random.randint(0, len(vespene_y) - 1) return [vespene_x[i], vespene_y[i]] else: # Building may not pass into dict correctly as a key. x_offset, y_offset = building_offsets[building] hatchery_x, hatchery_y = (unit_type == Zerg.Hatchery).nonzero() return [hatchery_x.mean() + x_offset, hatchery_y.mean() + y_offset] ## Called in order to move a set of coordinates by some distance, depending # on whether the base is on the right or left side of the map. # @param self The object pointer calling the function # @param x The initial x # @param x_distance The distance between the initial and final x # @param y The initial y # @param y_distance The distance between the initial and final y # @return The transformed x and y. def transform_location(self, x, x_distance, y, y_distance): if self.base == 'right': return [x - x_distance, y - y_distance] return [x + x_distance, y + y_distance]