def __init__(self, state, actions): # определить, сколько кораблей построить self.num_ships(state, actions) # позиции верфей, для которых мы еще можем принять решение yards = [state.my_yards[yard] for yard in actions.yards] self.spawn_pos = np.array(yards, dtype=int) # сортировать позиции верфей по предпочтению для спауна - спаун # где мало наших кораблей не предпочтителен inds = np.ix_(state.my_ship_pos, self.spawn_pos) traffic = np.sum(state.dist[inds] <= 3, axis=0, initial=0) not_working = ~np.in1d(self.spawn_pos, working_yards(state)) score = traffic + 10 * not_working.astype(int) self.spawn_pos = self.spawn_pos[score.argsort()] return
def __init__(self, state, actions): # determine how many ships to build self.num_ships(state, actions) # positions of yards for which we can still decide actions yards = [state.my_yards[yard] for yard in actions.yards] self.spawn_pos = np.array(yards, dtype=int) # sort yard positions by preference for spawning - spawn # where there are less of our own ships in the area # but strongly prefer not to spawn at abandoned yards inds = np.ix_(state.my_ship_pos, self.spawn_pos) traffic = np.sum(state.dist[inds] <= 3, axis=0, initial=0) not_working = ~np.in1d(self.spawn_pos, working_yards(state)) score = traffic + 10 * not_working.astype(int) self.spawn_pos = self.spawn_pos[score.argsort()] return
def matrices(state, actions, targets): dims = (len(actions.ships), state.map_size**2) threat_matrix = np.full(dims, False, dtype=bool) threat_scores = np.zeros(dims) cost_matrix = np.zeros(dims) # store true (l1) distances to destinations for each ship dists_to_dest = np.zeros(len(actions.ships)) for index in range(len(actions.ships)): ship = actions.ships[index] pos, hal = state.my_ships[ship] dest = targets.destinations[ship] dists_to_dest[index] = state.dist[pos, dest] # construct cost_matrix and threat_matrix for index in range(len(actions.ships)): ship = actions.ships[index] pos, hal = state.my_ships[ship] dest = targets.destinations[ship] # find those ships that have less halite than we do # add 1 to hal if we want to have a strict halite comparison # since x < hal becomes x <= hal for integer values... strict = (pos not in state.my_yard_pos) ships = state.opp_ship_pos[state.opp_ship_hal < (hal + strict)] ship_dist = np.amin(state.dist[ships, :], axis=0, initial=state.map_size) # threatened sites are opponent shipyard and sites where ships with # less cargo can be in one step threat_matrix[index, state.opp_yard_pos] = True threat_matrix[index, (ship_dist <= 1)] = True threat_matrix[index, working_yards(state)] = False weak_ships = state.opp_ship_pos[state.opp_ship_hal < hal] weak_ship_hood = state.dist[weak_ships, :] <= 1 threat_scores[index, :] += np.sum(weak_ship_hood, axis=0, initial=0) threat_scores[index, state.opp_yard_pos] = 2 # penalize legal moves by ranking from targets cost_matrix[index, targets.moves[ship]] = -10 * np.arange(5) # try not to pick up unnecessary cargo that makes ships vulnerable # so add an additional penalty to the current position no_cargo = (dest not in state.my_yard_pos) no_cargo = no_cargo and (state.halite_map[pos] > 0) no_cargo = no_cargo and (pos != dest) cost_matrix[index, pos] -= (100 if no_cargo else 0) # penalize going to unsafe squares cost_matrix[index, threat_matrix[index, :]] -= 1000 # give higher priority to ships with higher cargo, but highest # priority to ships with no cargo at all if hal == 0: multiplier = 3 else: rank = np.sum(hal > state.my_ship_hal) multiplier = 1 + rank / state.my_ship_hal.size # break ties by distance to destination dist = dists_to_dest[index] rank = np.sum(dist < dists_to_dest) multiplier += (rank / dists_to_dest.size) / 10 cost_matrix[index, :] = multiplier * cost_matrix[index, :] # penalize illegal moves with infinity cost_matrix[index, (state.dist[pos, :] > 1)] = -np.inf return cost_matrix, threat_matrix, threat_scores
def __init__(self, state, actions, bounties, spawns): self.num_ships = len(actions.ships) # если кораблей нет, делать нечего if self.num_ships == 0: return # защищаем те верфи, которые работают, и в этом ходу у них не будет спауна likely_spawns = spawns.spawn_pos[0:spawns.ships_possible] yards = np.setdiff1d(working_yards(state), likely_spawns) # расстояние от ближайшего корабля противника до каждой верфи inds = np.ix_(state.opp_ship_pos, yards) opp_ship_dist = np.amin(state.dist[inds], axis=0, initial=state.map_size) # расстояние ближайшего дружественного корабля к каждой верфи inds = np.ix_(state.my_ship_pos, yards) my_ship_dist = np.amin(state.dist[inds], axis=0, initial=state.map_size) # если корабли противника начинают приближаться к верфи по сравнению # к своим, возвращаемся, чтобы защитить их inds = opp_ship_dist <= (2 + my_ship_dist) self.protected = yards[inds] self.protection_radius = opp_ship_dist[inds] # настраиваем возможные ходы для каждого корабля и вычисляем # расстояния на правильно взвешенном графике self.geometry(state, actions) # оптимальное назначение присвоит каждому месту только один корабль # но мы хотим, чтобы на каждую верфь вернулось более одного корабля, поэтому # мы добавляем дубликаты верфей к наградам, чтобы это стало возможным duplicates = np.tile(state.my_yard_pos, self.num_ships - 1) ind_to_site = np.append(duplicates, state.sites) # рассчитываем стоимость посещения места для каждого корабля cost_matrix = np.vstack( [self.rewards(ship, state, bounties) for ship in actions.ships]) # найти оптимальное назначение кораблей по направлениям # оптимальное назначение присваивает ship_inds [i] site_inds [i] ship_inds, site_inds = assignment(cost_matrix, maximize=True) # проходим решение задачи оптимального назначения и # упорядочиваем ходы по предпочтениям self.destinations = {} self.values = {} for ship_ind, site_ind in zip(ship_inds, site_inds): # сохранить пункт назначения и стоимость корабля ship = actions.ships[ship_ind] self.destinations[ship] = ind_to_site[site_ind] self.values[ship] = cost_matrix[ship_ind, site_ind] # sort перемещается по тому, насколько он уменьшает расстояние # в назначенный пункт назначения dest_dists = self.move_dists[ship][:, self.destinations[ship]] self.moves[ship] = self.moves[ship][dest_dists.argsort()] return
def __init__(self, state, actions, bounties, spawns): self.num_ships = len(actions.ships) # if there are no ships, there is nothing to do if self.num_ships == 0: return # protect those yards that are working and won't have a spawn this turn likely_spawns = spawns.spawn_pos[0:spawns.ships_possible] yards = np.setdiff1d(working_yards(state), likely_spawns) # distance of closest opponent ship to each yard inds = np.ix_(state.opp_ship_pos, yards) opp_ship_dist = np.amin(state.dist[inds], axis=0, initial=state.map_size) # distance of closest friendly ship to each yard inds = np.ix_(state.my_ship_pos, yards) my_ship_dist = np.amin(state.dist[inds], axis=0, initial=state.map_size) # if opponent ships start getting too close to a yard compared # to our own, start heading back to protect them inds = opp_ship_dist <= (2 + my_ship_dist) self.protected = yards[inds] self.protection_radius = opp_ship_dist[inds] # set up candidate moves for each ship and compute # distances on an appropriately weighted graph self.geometry(state, actions) # the optimal assignment will assign only one ship to each site # but we want more than one ship to go back to each yard so # we add duplicates of the yards to the rewards to make this possible duplicates = np.tile(state.my_yard_pos, self.num_ships - 1) ind_to_site = np.append(duplicates, state.sites) # calculate the value of going to a site for each ship cost_matrix = np.vstack( [self.rewards(ship, state, bounties) for ship in actions.ships]) # find the optimal assignment of ships to destinations # the optimal assignment assigns ship_inds[i] to site_inds[i] ship_inds, site_inds = assignment(cost_matrix, maximize=True) # go through the solution of the optimal assignment problem and # order the moves by preference self.destinations = {} self.values = {} for ship_ind, site_ind in zip(ship_inds, site_inds): # store destination and value of the ship ship = actions.ships[ship_ind] self.destinations[ship] = ind_to_site[site_ind] self.values[ship] = cost_matrix[ship_ind, site_ind] # sort moves by how much they decrease the distance # to the assigned destination dest_dists = self.move_dists[ship][:, self.destinations[ship]] self.moves[ship] = self.moves[ship][dest_dists.argsort()] return
def matrices(state, actions, targets): dims = (len(actions.ships), state.map_size**2) threat_matrix = np.full(dims, False, dtype=bool) threat_scores = np.zeros(dims) cost_matrix = np.zeros(dims) # сохраняем истинное (l1) расстояние до пунктов назначения для каждого корабля dists_to_dest = np.zeros(len(actions.ships)) for index in range(len(actions.ships)): ship = actions.ships[index] pos, hal = state.my_ships[ship] dest = targets.destinations[ship] dists_to_dest[index] = state.dist[pos, dest] # построить cost_matrix и threat_matrix for index in range(len(actions.ships)): ship = actions.ships[index] pos, hal = state.my_ships[ship] dest = targets.destinations[ship] # найти те корабли, у которых меньше галита, чем у нас # добавляем 1 к hal, если мы хотим иметь строгое сравнение галита # поскольку x <hal становится x <= hal для целочисленных значений ... strict = (pos not in state.my_yard_pos) ships = state.opp_ship_pos[state.opp_ship_hal < (hal + strict)] ship_dist = np.amin(state.dist[ships, :], axis=0, initial=state.map_size) # угрожаемые места - это верфь противника и площадки, где корабли # За один шаг могут перевезти меньше груза threat_matrix[index, state.opp_yard_pos] = True threat_matrix[index, (ship_dist <= 1)] = True threat_matrix[index, working_yards(state)] = False weak_ships = state.opp_ship_pos[state.opp_ship_hal < hal] weak_ship_hood = state.dist[weak_ships, :] <= 1 threat_scores[index, :] += np.sum(weak_ship_hood, axis=0, initial=0) threat_scores[index, state.opp_yard_pos] = 2 # наказываем правильные ходы, ранжируя цели cost_matrix[index, targets.moves[ship]] = -10 * np.arange(5) # стараемся не собирать ненужный груз, который делает корабли уязвимыми # поэтому добавляем дополнительный штраф к текущей позиции no_cargo = (dest not in state.my_yard_pos) no_cargo = no_cargo and (state.halite_map[pos] > 0) no_cargo = no_cargo and (pos != dest) cost_matrix[index, pos] -= (100 if no_cargo else 0) # штрафуем за посещение небезопасных площадей cost_matrix[index, threat_matrix[index, :]] -= 1000 # отдавать более высокий приоритет судам с большим грузом, но с наибольшим # приоритет для судов без груза if hal == 0: multiplier = 3 else: rank = np.sum(hal > state.my_ship_hal) multiplier = 1 + rank / state.my_ship_hal.size # разорвать связи по расстоянию до пункта назначения dist = dists_to_dest[index] rank = np.sum(dist < dists_to_dest) multiplier += (rank / dists_to_dest.size) / 10 cost_matrix[index, :] = multiplier * cost_matrix[index, :] # наказывать неправильные ходы бесконечностью cost_matrix[index, (state.dist[pos, :] > 1)] = -np.inf return cost_matrix, threat_matrix, threat_scores