def calc_reward(self, game_map, cur_pos, new_head_pos): width = game_map.game_map['width'] reward = 0 food_coords = util.translate_positions( game_map.game_map['foodPositions'], width) obstacle_coords = util.translate_positions( game_map.game_map['obstaclePositions'], width) if new_head_pos in food_coords: reward += 100 if new_head_pos in obstacle_coords or new_head_pos in cur_pos: reward -= 100 enemies = filter(lambda x: x['name'] != self.name, game_map.game_map['snakeInfos']) enemy_positions = [] for enemy in enemies: enemy_positions.append( util.translate_positions(enemy['positions'], width)) if new_head_pos in enemy_positions: reward -= 100 if reward == 0: reward += 20 return reward
def create_state(game_map, player_coords): width = game_map.game_map['width'] state = State() food_coords = util.translate_positions(game_map.game_map['foodPositions'], width) head_coords = player_coords[0] if len(food_coords) > 0: distances = [] for food in food_coords: distances.append((util.get_manhattan_distance(head_coords, food), food)) food_to_catch = min(distances, key=lambda t: t[0])[1] if food_to_catch[0] < head_coords[0]: state.food_left = True if food_to_catch[0] > head_coords[0]: state.food_right = True if food_to_catch[1] < head_coords[1]: state.food_above = True if food_to_catch[1] > head_coords[1]: state.food_below = True if food_to_catch[0] == head_coords[0] and food_to_catch[1] == head_coords[1]: state.on_food = True if not game_map.is_tile_available_for_movement(calc_new_pos(head_coords, (1, 0))) or game_map.is_coordinate_out_of_bounds(calc_new_pos(head_coords, (1, 0))): state.obstacle_right = True if not game_map.is_tile_available_for_movement(calc_new_pos(head_coords, (-1, 0))) or game_map.is_coordinate_out_of_bounds(calc_new_pos(head_coords, (-1, 0))): state.obstacle_left = True if not game_map.is_tile_available_for_movement(calc_new_pos(head_coords, (0, -1))) or game_map.is_coordinate_out_of_bounds(calc_new_pos(head_coords, (0, -1))): state.obstacle_above = True if not game_map.is_tile_available_for_movement(calc_new_pos(head_coords, (0, 1))) or game_map.is_coordinate_out_of_bounds(calc_new_pos(head_coords, (0, 1))): state.obstacle_below = True return state
def get_next_move(self, game_map): v = game_map.get_snake_by_id(self.snake_id) print(v) #game_map.is_coordinate_out_of_bounds() cmd = game_map.can_snake_move_in_direction(self.snake_id, util.Direction.DOWN) cmu = game_map.can_snake_move_in_direction(self.snake_id, util.Direction.UP) cmr = game_map.can_snake_move_in_direction(self.snake_id, util.Direction.RIGHT) cml = game_map.can_snake_move_in_direction(self.snake_id, util.Direction.LEFT) positions = v.get("positions") trl = util.translate_positions(positions, 46) v = util.Direction.DOWN if (cmu): v = util.Direction.UP if (cmr): v = util.Direction.RIGHT if (cml): v = util.Direction.LEFT return v
def get_next_move(self, game_map): width = game_map.game_map['width'] player = next( filter(lambda x: x['name'] == self.name, game_map.game_map['snakeInfos']), None) player_coords = util.translate_positions(player['positions'], width) cur_state = self.create_state(game_map, player_coords) if cur_state.get_tuple() not in self.qtable: self.qtable[cur_state.get_tuple()] = [0, 0, 0, 0] learning_rate = .7 discount_rate = .9 exploration_chance = 0 if exploration_chance > random.random(): direction_num = random.randrange(4) else: direction_num = self.qtable[cur_state.get_tuple()].index( max(self.qtable[cur_state.get_tuple()])) if direction_num == 0: direction = util.Direction.DOWN elif direction_num == 1: direction = util.Direction.UP elif direction_num == 2: direction = util.Direction.LEFT elif direction_num == 3: direction = util.Direction.RIGHT else: print("That should not be possible") new_pos = (player_coords[0][0] + direction.value[1][0], player_coords[0][1] + direction.value[1][1]) new_state = self.create_state(game_map, [new_pos]) if new_state.get_tuple() not in self.qtable: self.qtable[new_state.get_tuple()] = [0, 0, 0, 0] curr_qvalue = self.qtable[cur_state.get_tuple()][direction_num] memory = (1 - learning_rate) * curr_qvalue reward = self.calc_reward(game_map, player_coords, new_pos) max_quality_step = max(self.qtable[new_state.get_tuple()]) q_value = memory + learning_rate * (reward + discount_rate * max_quality_step) self.qtable[cur_state.get_tuple()][direction_num] = q_value return direction
def get_next_move(self, game_map): # initialize gym environment and the agent width = game_map.game_map['width'] player = next(filter(lambda x: x['name'] == self.name, game_map.game_map['snakeInfos']), None) player_coords = util.translate_positions(player['positions'], width) # reset state in the beginning of each game # state = self.create_state(game_map, player_coords) curr_state = state.create_state(game_map, player_coords) direction_num = self.agent.act(curr_state.get_array()) if direction_num == 0: direction = util.Direction.DOWN elif direction_num == 1: direction = util.Direction.UP elif direction_num == 2: direction = util.Direction.LEFT elif direction_num == 3: direction = util.Direction.RIGHT else: print("That should not be possible") new_pos = self.calc_new_pos(player_coords[0], direction.value[1]) # next_state = self.create_state(game_map, [new_pos]) next_state = state.create_state(game_map, [new_pos]) reward = self.calc_reward(game_map, player_coords[0], new_pos) # Remember the previous state, action, reward, and done self.agent.remember(curr_state.get_array(), direction_num, reward, next_state.get_array()) # train the agent with the experience of the episode return direction
def get_next_move(self, game_map): width = game_map.game_map['width'] players = game_map.game_map['snakeInfos'] player = next(filter(lambda x: x['name'] == self.name, players), None) player_pos = util.translate_positions(player['positions'], width) food_positions = game_map.game_map['foodPositions'] # food_coords = food_positions[0] if len(food_positions) > 0 else None food_coords = util.translate_positions(food_positions, width) if len(food_coords) > 0: distances = [] for food in food_coords: distances.append( (util.get_manhattan_distance(player_pos[0], food), food)) food_to_catch = min(distances, key=lambda t: t[0])[1] if food_to_catch[0] > player_pos[0][0]: if self.right_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.RIGHT, player_pos): return util.Direction.RIGHT elif self.down_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.DOWN, player_pos): return util.Direction.DOWN else: return util.Direction.UP elif food_to_catch[0] < player_pos[0][0]: if self.left_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.LEFT, player_pos): return util.Direction.LEFT elif self.down_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.DOWN, player_pos): return util.Direction.DOWN else: return util.Direction.UP elif food_to_catch[1] > player_pos[0][1]: if self.down_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.DOWN, player_pos): return util.Direction.DOWN elif self.right_available( player_pos[0], game_map) and not self.next_in_snake( util.Direction.RIGHT, player_pos): return util.Direction.RIGHT else: return util.Direction.LEFT elif food_to_catch[1] < player_pos[0][1]: if self.up_available(player_pos[0], game_map) and not self.next_in_snake( util.Direction.UP, player_pos): return util.Direction.UP elif self.right_available( player_pos[0], game_map) and not self.next_in_snake( util.Direction.RIGHT, player_pos): return util.Direction.RIGHT else: return util.Direction.LEFT else: return util.Direction.DOWN else: return util.Direction.DOWN