def get_disallowed_actions(self, obs): disallowed_actions = [] refinery_y, refinery_x = get_refineries(self.unit_type) geyser_top_left = geysers[0] if geyser_top_left[0] in refinery_x and geyser_top_left[1] in refinery_y: disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_TOP_LEFT)) geyser_top_right = geysers[1] if geyser_top_right[0] in refinery_x and geyser_top_right[1] in refinery_y: disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_TOP_RIGHT)) geyser_bottom_left = geysers[2] if geyser_bottom_left[0] in refinery_x and geyser_bottom_left[1] in refinery_y: disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_BOTTOM_LEFT)) geyser_bottom_right = geysers[3] if geyser_bottom_right[0] in refinery_x and geyser_bottom_right[1] in refinery_y: disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_BOTTOM_RIGHT)) if self.supply_depots >= len(depots): disallowed_actions.append(get_action_id(ACTION_BUILD_SUPPLY_DEPOT)) if p.get_food_cap(obs) == p.get_food_used(obs): disallowed_actions.append(get_action_id(ACTION_BUILD_SCV_START_CC)) disallowed_actions.append(get_action_id(ACTION_BUILD_SCV_NEW_CC)) if get_command_center_amount(self.unit_type) == 2: disallowed_actions.append(get_action_id(ACTION_BUILD_CC)) return disallowed_actions
def handle_last_action(self, obs): cm = str(p.get_minerals(obs)) cv = str(p.get_vespene(obs)) iwc = str(p.get_idle_worker_count(obs)) depot_count = str(self.supply_depots) refinery_count = str(self.refineries) command_centers = str(get_command_center_amount(self.unit_type)) food_used = str(p.get_food_used(obs)) food_cap = str(p.get_food_cap(obs)) sp_attempts = str(self.build_supply_depot_attempts) sp_attempts_f = str(self.build_supply_depot_attempts_failed) refinery_attempts = str(self.build_refinery_attempts) refinery_attempts_f = str(self.build_refinery_attempts_failed) cc_attempts = str(self.build_cc_attempts) cc_attempts_f = str(self.build_cc_attempt_failed) score = cs.get_score(obs) data = [sp_attempts, sp_attempts_f, depot_count, refinery_attempts, refinery_attempts_f, refinery_count, cc_attempts, cc_attempts_f, cm, cv, iwc, command_centers, food_used + "/" + food_cap, str(score)] with open('/home/kenn/Development/sc2-bot/CustomAgents/scores.txt', 'a+') as f: f.write('{0[0]:<15}{0[1]:<15}{0[2]:<15}{0[3]:<15}{0[4]:<15}{0[5]:<15}{0[6]:<15}{0[7]:<15}{0[8]:<15}{0[9]:<15}{0[10]:<15}{0[11]:<15}{0[12]:<15}{0[13]:<15}\n'.format(data)) # If we score less than 4000 we are doing so poorly, we want to learn that it was very bad. # Symbolizes the ultimate loss. self.qlearn.learn(str(self.previous_state), self.previous_action, int(score) - 4000, 'terminal') self.previous_state = None self.previous_action = None self.move_number = 0 self.supply_depots = 0 self.refineries = 0 self.builder_iterator = 0 self.build_supply_depot_attempts = 0 self.build_supply_depot_attempts_failed = 0 self.build_refinery_attempts = 0 self.build_refinery_attempts_failed = 0 self.build_cc_attempts = 0 self.build_cc_attempt_failed = 0 self.initializing = 0 self.qlearn.q_table.to_pickle(DATA_FILE + '.gz', 'gzip') return actions.FunctionCall(_NOOP, [])
def step(self, obs): super(CollectMineralsAndGas, self).step(obs) self.unit_type = obs.observation['screen'][_UNIT_TYPE] if obs.last(): return self.handle_last_action(obs) if obs.first(): return self.handle_first_action(obs) if self.initializing < 2: return self.handle_initial_action(obs) if self.move_number == 0: self.update_buildings_built() supply_used = p.get_food_used(obs) supply_cap = p.get_food_cap(obs) scvs_left = get_approx_scvs_in_rectangle_count(self.unit_type, (0, 0), (41, 83)) scvs_right = get_approx_scvs_in_rectangle_count(self.unit_type, (42, 0), (83, 83)) current_state = np.zeros(6) # Available supply min to reduce state space current_state[0] = min(supply_cap - supply_used, 7) # Detect how many times we have 100 minerals up to maximum 4 times (Afford a CC) to reduce our state space current_state[1] = min(int(p.get_minerals(obs) / 100.0), 4) # Any idle workers min to reduce state space current_state[2] = min(p.get_idle_worker_count(obs), 3) # scvs left side to scvs right side difference scv_side_diff = scvs_left - scvs_right current_state[3] = max(-7, scv_side_diff) if scv_side_diff < 0 else min(8, scv_side_diff) # Number of CCs current_state[4] = get_command_center_amount(self.unit_type) # Number of supply depots current_state[5] = get_supply_depot_amount(self.unit_type) if self.previous_action is not None: # reward = self.get_reward(obs) self.qlearn.learn(str(self.previous_state), self.previous_action, 0, str(current_state)) disallowed_actions = self.get_disallowed_actions(obs) rl_action = self.qlearn.choose_action(str(current_state), disallowed_actions) action_name = get_smart_action(int(rl_action)) action_function, actual_action = self.apply_action(obs, action_name) action_id = rl_action if actual_action is None: action_function = actions.FunctionCall(_NOOP, []) action_id = get_action_id(ACTION_DO_NOTHING) self.previous_state = current_state self.previous_action = action_id self.move_number += 1 return action_function elif self.move_number == 1: action_name = get_smart_action(self.previous_action) action_function, adjusted_action = self.apply_action(obs, action_name) if adjusted_action is None: action_function = actions.FunctionCall(_NOOP, []) self.previous_action = get_action_id(ACTION_DO_NOTHING) self.move_number += 1 return action_function elif self.move_number == 2: action_name = get_smart_action(self.previous_action) action_function, adjusted_action = self.apply_action(obs, action_name) if adjusted_action is None: action_function = actions.FunctionCall(_NOOP, []) self.previous_action = get_action_id(ACTION_DO_NOTHING) self.move_number = 0 return action_function return actions.FunctionCall(_NOOP, [])