def transform_action_to_function(action, obs): y = math.floor(action / (2 * _ACTION_RANGE + 1)) y -= (_ACTION_RANGE + 1) x = action % (2 * _ACTION_RANGE + 1) x -= (_ACTION_RANGE + 1) marine = get_marine(obs) new_x = marine.x + x new_x = MoveToBeaconAgent.check_bounds(new_x) new_y = marine.y + y new_y = MoveToBeaconAgent.check_bounds(new_y) return move_screen(new_x, new_y)
def step(self, obs): super(MoveToBeaconAgent, self).step(obs) if obs.first(): self.ql.descend_epsilon() return actions.FUNCTIONS.select_army("select") marine = get_marine(obs) beacon = get_beacon(obs) state = state_of_marine(marine, beacon, _SCREEN, self._DISTANCE_WINDOW) if _LEARN: chosen_action = self.ql.get_action(state, self.calc_reward()) else: chosen_action = self.ql.choose_action(state) # if chosen_action.value[0] not in obs.observation.available_actions: # self.old_reward = -1 # return actions.FUNCTIONS.no_op() return Act.action_to_function(chosen_action, marine, _SCREEN)
def transform_action_to_function(action, obs): screen = obs.observation.feature_screen.shape[1] marine = get_marine(obs) if action == 0: return Act.action_to_function(Act.Actions.down_left, marine, screen) elif action == 1: return Act.action_to_function(Act.Actions.down, marine, screen) elif action == 2: return Act.action_to_function(Act.Actions.down_right, marine, screen) elif action == 3: return Act.action_to_function(Act.Actions.up_left, marine, screen) elif action == 4: return Act.action_to_function(Act.Actions.up, marine, screen) elif action == 5: return Act.action_to_function(Act.Actions.up_right, marine, screen) elif action == 6: return Act.action_to_function(Act.Actions.right, marine, screen) elif action == 7: return Act.action_to_function(Act.Actions.left, marine, screen)